Archived
1
0
Fork 0

Initial commit

This commit is contained in:
Henrik Hautakoski 2011-06-08 18:46:58 +02:00
commit b2df740514
19 changed files with 1640 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.*
*.o

21
Makefile Normal file
View file

@ -0,0 +1,21 @@
CC = gcc
LDFLAGS = -lxml2 -lcurl -lpcre
CFLAGS = -g -I/usr/include/libxml2
PROGRAMS = dlight dlight-compile dlight-read-config
all : $(PROGRAMS)
install : $(PROGRAMS)
cp $^ $(HOME)/bin/
dlight : dlight.o env.o http.o rss.o filter.o cconf.o dlhist.o
dlight-compile : compile.o env.o cconf.o
dlight-read-config : read-config.o env.o cconf.o
dlight-% : %.o
$(CC) $(LDFLAGS) -o $@ $^
clean :
$(RM) *.o $(PROGRAMS)

59
README Normal file
View file

@ -0,0 +1,59 @@
Dlight - automatic feed downloader
--------------------------------------
dlight is a program that checks items in rss feeds and download those
items/links that are matched against a set of rules.
What this does different than other programs of this type is that configuration
of the program should be easy and flexible. Not forcing users to write
and maintain large lists of raw regular expressions.
The best way to use dlight is by using time-based scheduling like cron.
--------------------------------------------------------
# Make cron execute dlight every 15 minutes
*/15 * * * * /path/to/dlight >> /path/to/logs/dlight.log
--------------------------------------------------------
dlight is divided into 3 major components: the dlight program,
Configuration files and Compiler.
* dlight
The actual program that checks feeds and download items.
The configuration data is read from the compiled config file "~/.dlight/config".
The program first fetches the rss file (target), walks through
all items applying all filters associated whit the current target.
And if one matches, that item will be downloaded to the destination associated
with the target. it does this for all rss files (targets) in the config.
* Configuration Files
A set of human-readable configuration files that the user should configure
dlight through. This is where users defines their targets, destinations,
filters and other type of information. (currently there is only one file
with a similar structure that the compiled format use).
* Compiler
An compiler is provided that compiles configuration files down to a
binary config file used by dlight, one can think of this step as publish/update
the configuration used by the program.
This design is used for two main reasons.
One, if you edit your configuration structure and dlight would be executed by
for example cron. if dlight would read directly from those files, it is possible
that the configuration files are not in a desired state and making dlight
do some weird things.
The second reason is that processing all those files everytime dlight is invoked
can be quite slow, the compiled format is designed to provide fast I/O reads.
Also by using a source -> compiler -> output design, errors can be caught
in the configuration files when the user invokes the compiler.
which is a more natural way of notify the user on such errors then to
have dlight abort and log the error because the program is supposed to be
executed in an automatic manner, the error will not be seen right away.

225
cconf.c Normal file
View file

@ -0,0 +1,225 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <openssl/sha.h>
#include <arpa/inet.h>
#include "cconf.h"
/* we count NULL as part of the string ondisk */
#define strsize(str) (strlen(str) + 1)
static int sha1_write(SHA_CTX *ctx, int fd, void *buf, size_t size) {
SHA1_Update(ctx, buf, size);
return write(fd, buf, size);
}
static void write_int(SHA_CTX *ctx, int fd, int val) {
val = htonl(val);
sha1_write(ctx, fd, &val, sizeof val);
}
static void* read_entry_nr(void *buf, unsigned int *out) {
memcpy(out, buf, sizeof(*out));
*out = ntohl(*out);
return buf + sizeof(*out);
}
void cconf_free(struct cconf *c) {
int i, j;
if (!c)
return;
if (c->map.buf) {
free(c->target);
munmap(c->map.buf, c->map.size);
} else if (c->nr) {
for(i=0; i < c->nr; i++) {
struct target *t = c->target + i;
free(t->src);
free(t->dest);
for(j=0; j < t->nr; j++)
free(t->filter[j]);
free(t->filter);
}
free(c->target);
}
}
struct target* cconf_new_target(struct cconf *c) {
struct target *t;
c->target = realloc(c->target, (sizeof(struct target) * (c->nr + 1)));
t = c->target + (c->nr++);
memset(t, 0, sizeof(*t));
return t;
}
void cconf_add_filter(struct target *t, char *filter) {
if (!filter)
return;
t->filter = realloc(t->filter, sizeof(t->filter) * (t->nr + 1));
t->filter[t->nr++] = filter;
}
static size_t parse_filter(void *buf, struct target *target) {
size_t offset = read_entry_nr(buf, &target->nr) - buf;
if (target->nr) {
int i;
target->filter = malloc(sizeof(char *) * target->nr);
for(i=0; i < target->nr; i++) {
target->filter[i] = (char *) buf + offset;
offset += strsize(buf + offset);
}
}
return offset;
}
static size_t parse_target(void *buf, struct target *target) {
size_t offset;
target->src = (char *) buf;
offset = strsize(buf);
target->dest = (char *) buf + offset;
offset += strsize(buf + offset);
return offset;
}
static struct cconf* parse(void *buf, size_t size) {
struct cconf *c = calloc(1, sizeof(struct cconf));
int i;
/* move! */
c->map.buf = buf;
c->map.size = size;
buf = read_entry_nr(buf + sizeof(struct cconf_header), &c->nr);
c->target = calloc(sizeof(struct target), c->nr);
for(i=0; i < c->nr; i++) {
struct target *target = c->target + i;
buf += parse_target(buf, target);
buf += parse_filter(buf, target);
}
return c;
}
static int validate_hdr(struct cconf_header *hdr, size_t size) {
SHA_CTX ctx;
unsigned char sha1[20];
if (hdr->signature != htonl(CCONF_SIGNATURE) ||
hdr->version != htonl(1))
return -1;
SHA1_Init(&ctx);
SHA1_Update(&ctx, hdr, offsetof(struct cconf_header, crc));
SHA1_Update(&ctx, hdr + 1, size - sizeof(*hdr));
SHA1_Final(sha1, &ctx);
if (memcmp(sha1, hdr->crc, sizeof(hdr->crc)))
return -1;
return 0;
}
struct cconf* cconf_read(const char *file) {
int fd;
struct stat st;
void *buf;
fd = open(file, O_RDONLY);
if (fd < 0)
return NULL;
if (fstat(fd, &st) < 0) {
close(fd);
return NULL;
}
buf = MAP_FAILED;
if (!fstat(fd, &st) && st.st_size > sizeof(struct cconf_header)) {
buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
}
close(fd);
if (buf == MAP_FAILED)
return NULL;
if (validate_hdr(buf, st.st_size) < 0)
goto error;
return parse(buf, st.st_size);
error:
munmap(buf, st.st_size);
return NULL;
}
int cconf_write(int fd, struct cconf *c) {
int i;
SHA_CTX ctx;
struct cconf_header hdr;
hdr.signature = htonl(CCONF_SIGNATURE);
hdr.version = htonl(1);
SHA1_Init(&ctx);
SHA1_Update(&ctx, &hdr, offsetof(struct cconf_header, crc));
/* leave room for the header to be written later as CRC
will be calculated as we write the rest of the data */
lseek(fd, sizeof(hdr), SEEK_SET);
/* put number of targets */
write_int(&ctx, fd, c->nr);
for(i = 0; i < c->nr; i++) {
int j;
struct target *target = c->target + i;
if (!target->src || !target->dest)
return -1;
sha1_write(&ctx, fd, target->src, strsize(target->src));
sha1_write(&ctx, fd, target->dest, strsize(target->dest));
/* write number of filters */
write_int(&ctx, fd, target->nr);
for(j=0; j < target->nr; j++) {
sha1_write(&ctx, fd, target->filter[j],
strsize(target->filter[j]));
}
}
SHA1_Final(hdr.crc, &ctx);
/* write header */
lseek(fd, 0, SEEK_SET);
sha1_write(&ctx, fd, &hdr, sizeof(hdr));
return 0;
}

43
cconf.h Normal file
View file

@ -0,0 +1,43 @@
#ifndef CCONF_H
#define CCONF_H
/*
* data structure for 'Dlight compiled config' file format.
*/
/* \232 D C C */
#define CCONF_SIGNATURE 0xe8444343
struct cconf_header {
unsigned int signature;
unsigned int version;
unsigned char crc[20];
};
struct target {
char *src; /* source. (url) */
char *dest; /* destination, path on filesystem */
char **filter;
unsigned int nr;
};
struct cconf {
struct target *target;
unsigned int nr;
struct {
void *buf;
unsigned long size;
} map;
};
void cconf_free(struct cconf *c);
struct target* cconf_new_target(struct cconf *c);
void cconf_add_filter(struct target *t, char *filter);
int cconf_write(int fd, struct cconf *c);
struct cconf* cconf_read(const char *filename);
#endif /* CCONF_H */

283
compile.c Normal file
View file

@ -0,0 +1,283 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <ctype.h>
#include "env.h"
#include "cconf.h"
#define error(...) fprintf(stderr, "error: " __VA_ARGS__)
#define isalias(x) (isalnum(x) || (x) == '-')
#define MAXNAME 1024
static int dest_table_nr;
static struct dest_table {
char *key;
char *value;
} *dest_table;
static struct cconf cconf;
static int config_lineno = 1;
static FILE *config_fd;
static int get_next_ch(void) {
int c = getc(config_fd);
if (c == '\n')
config_lineno++;
return c;
}
static int find_destination(const char *key) {
int i;
for(i=0; i < dest_table_nr; i++)
if (!strcmp(dest_table[i].key, key))
return i;
return -1;
}
static char* fetch_destination(char *key) {
int index = find_destination(key);
if (index < 0)
index = 0;
return dest_table[index].value;
}
static void free_destination(struct dest_table *entry) {
if (entry->key)
free(entry->key);
if (entry->value)
free(entry->value);
}
static void insert_destination(const char *key, const char *value) {
int index = find_destination(key);
if (index < 0) {
dest_table = realloc(dest_table,
sizeof(struct dest_table) * (dest_table_nr + 1));
index = dest_table_nr++;
} else {
free_destination(&dest_table[index]);
}
if (!value)
value = "";
dest_table[index].key = strdup(key);
dest_table[index].value = strdup(value);
}
static char* parse_value() {
static char value[1024];
int c, len = 0, space = 0;
for(;;) {
c = get_next_ch();
if (c == EOF || c == '\n')
break;
if (isspace(c)) {
if (len)
space++;
continue;
}
for(; space; space--)
value[len++] = ' ';
value[len++] = c;
}
value[len] = '\0';
return value;
}
static int parse_alias() {
static char name[MAXNAME];
const char *value;
int c, len = 0;
for(;;) {
c = get_next_ch();
if (c == EOF || isspace(c))
break;
if (!isalias(c)) {
error("Invalid character '%c' in alias\n", c);
return -1;
}
if (len >= sizeof(name))
return -1;
name[len++] = tolower(c);
}
name[len] = '\0';
value = NULL;
if (c != '\n') {
value = parse_value();
if (!value)
return -1;
}
insert_destination(name, value);
return 0;
}
static int parse_filter(struct target *target) {
char *value = parse_value();
if (!value)
return -1;
cconf_add_filter(target, strdup(value));
return 0;
}
static int parse_target(struct target *target) {
char src[4096], alias[4096];
int c, len = 0, trailing_space = 0;
for(;;) {
c = get_next_ch();
if (c == EOF || isspace(c))
break;
if (len >= sizeof(src))
return -1;
src[len++] = c;
}
src[len] = '\0';
/* next, get alias */
len = 0;
for(;;) {
c = get_next_ch();
if (c == EOF || c == '\n')
break;
if (isspace(c)) {
if (len)
trailing_space = 1;
continue;
}
if (!isalias(c)) {
error("Invalid character '%c' in alias\n", c);
return -1;
}
if (trailing_space) {
error("Space not allowed in alias\n");
return -1;
}
if (len >= sizeof(alias))
return -1;
alias[len++] = tolower(c);
}
alias[len] = '\0';
if (!len && !dest_table_nr) {
error("No destination found for target '%s'\n", src);
return -1;
}
target->src = strdup(src);
target->dest = strdup(len ? fetch_destination(alias) :
dest_table[0].value);
return 0;
}
static int parse_config_file(const char *file) {
struct target *target = NULL;
config_fd = fopen(file, "r");
if (!config_fd) {
perror(file);
return -1;
}
for(;;) {
int c = get_next_ch();
if (c == EOF)
return 0;
if (c == ':') {
if (parse_alias() < 0)
break;
continue;
}
if (target && c == '\t') {
if (parse_filter(target) < 0)
break;
continue;
}
if (isspace(c))
continue;
target = cconf_new_target(&cconf);
ungetc(c, config_fd);
if (parse_target(target) < 0)
break;
}
error("failed to parse line %i in %s\n", config_lineno, file);
fclose(config_fd);
return -1;
}
static int commit_lock(const char *file) {
char target[4096];
int len;
len = strlen(file) - 5; /* .lock */
memcpy(target, file, len);
target[len] = '\0';
return rename(file, target);
}
int main(int argc, char **argv) {
int lockfd;
char lockfile[4096];
snprintf(lockfile, sizeof(lockfile), "%s/%s",
env_get_dir(), "config.lock");
/* Remove lockfile if forced */
if (argc > 1 && !strcmp(argv[1], "-f"))
unlink(lockfile);
lockfd = open(lockfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (lockfd < 0) {
if (errno == EEXIST) {
error("config is locked\n");
} else {
perror("unable to create new configfile");
}
return 1;
}
if (parse_config_file("./config") < 0)
goto error;
if (!cconf_write(lockfd, &cconf) &&
!commit_lock(lockfile))
return 0;
error:
unlink(lockfile);
return 1;
}

13
config.sample Normal file
View file

@ -0,0 +1,13 @@
:dest1 /path/to/dest1
:dest2 /path/to/dest2
url1
regex1
regex2
regex3
regex4
url2 dest2
regex5
regex6

319
dlhist.c Normal file
View file

@ -0,0 +1,319 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include "env.h"
#include "dlhist.h"
/*
* TODO: maybe better to store hashes instead of strings in the file
* to 1, make records fixed size. 2, faster memory copy.
*/
/* \195 D L H */
#define SIGNATURE 0xC3444C48
#define STORAGE_FILE "dlhist"
#define TABLE_MIN_SIZE 128
#define HASH_TABLE_LOAD(c, s) ((double) (c) / ((s) ? (s) : 1))
struct header {
unsigned int signature;
unsigned int version;
unsigned int size;
};
struct hash_entry {
unsigned int time;
char *key;
};
#define he_empty(x) (!(x) || (x)->key == NULL)
static int fd = -1;
static struct hash_entry *table;
static unsigned int table_size;
static unsigned int table_count;
static unsigned hash(const char *s) {
unsigned h;
for(h=0; *s; s++) {
if (!strncmp(s, "://", 3)) {
h = 0;
s += 2;
} else if (!strncmp(s, "/", 2)) {
break;
}
h = ((unsigned)*s) + (h << 6) + (h << 16) - h;
}
return h;
}
static struct hash_entry* lookup(const char *key) {
unsigned index = hash(key) % table_size;
/* linear probing */
while(table[index].key) {
if (!strcmp(table[index].key, key))
break;
index = (index + 1) % table_size;
}
return table + index;
}
static inline void he_set(struct hash_entry *he, const char *key) {
if (!he_empty(he))
return;
he->key = strdup(key);
table_count++;
}
static int he_insert(struct hash_entry *he) {
struct hash_entry *dest = lookup(he->key);
if (he_empty(dest)) {
memcpy(dest, he, sizeof(*he));
table_count++;
return 1;
}
return 0;
}
static void he_remove(struct hash_entry *he) {
free(he->key);
memset(he, 0, sizeof(*he));
table_count--;
}
static void resize_table() {
unsigned int load, i, old_size = table_size;
struct hash_entry *old = table;
load = HASH_TABLE_LOAD(table_count, table_size);
/* check if resize should be done */
if ((load < 0.5 && table_size <= TABLE_MIN_SIZE) ||
(load >= 0.5 && load <= 0.75))
return;
/*
* set size to a load factor that is in the
* middle in the valid range.
*/
table_size = table_count / 0.625;
if (table_size < TABLE_MIN_SIZE)
table_size = TABLE_MIN_SIZE;
table_count = 0;
table = calloc(sizeof(*table), table_size);
for(i=0; i < old_size; i++) {
struct hash_entry *he = old + i;
if (!he_empty(he))
he_insert(he);
}
free(old);
}
static void build_table(const char *buf, size_t len) {
size_t offset = 0;
table = calloc(sizeof(*table), table_size);
while(offset < len) {
unsigned int keylen;
struct hash_entry entry;
memcpy(&entry.time, buf + offset, sizeof(entry.time));
offset += sizeof(entry.time);
memcpy(&keylen, buf + offset, sizeof(keylen));
offset += sizeof(keylen);
entry.time = ntohl(entry.time);
keylen = ntohl(keylen);
entry.key = malloc(keylen);
memcpy(entry.key, buf + offset, keylen);
offset += keylen;
if (he_insert(&entry) < 0)
free(entry.key);
}
}
int dlhist_open() {
char filename[4096], *buf = NULL;
unsigned offset = 0;
struct stat st;
struct header *hdr;
/* Open file */
snprintf(filename, sizeof(filename),
"%s/%s", env_get_dir(), STORAGE_FILE);
fd = open(filename, O_CREAT | O_RDWR, 0600);
if (fd < 0 || fstat(fd, &st) < 0) {
perror("dlhist_open");
goto error;
}
if (st.st_size >= sizeof(*hdr)) {
buf = malloc(st.st_size);
if (!buf)
goto error;
read(fd, buf, st.st_size);
/* Validate header */
hdr = (struct header *) buf;
if (hdr->signature != htonl(SIGNATURE) ||
hdr->version != htonl(1)) {
fprintf(stderr, "dlhist_open: Invalid header\n");
goto error;
}
/* Get current table size */
table_size = htonl(hdr->size);
offset = sizeof(*hdr);
}
if (table_size < 1)
table_size = TABLE_MIN_SIZE;
build_table(buf + offset, st.st_size - offset);
if (buf)
free(buf);
return 0;
error:
if (buf)
free(buf);
if (fd >= 0)
close(fd);
fd = -1;
return -1;
}
int dlhist_lookup(const char *url) {
if (table_size < 1)
return 0;
return !he_empty(lookup(url));
}
void dlhist_update(const char *url) {
struct hash_entry *he;
if (table_size < 1)
return;
/*
* set time and key before resize,
* hash_entry pointer is invalid after that operation.
*/
he = lookup(url);
he->time = time(NULL);
if (he_empty(he)) {
he_set(he, url);
resize_table();
}
}
void dlhist_purge(unsigned int timestamp) {
unsigned int i, t = 0, now = time(NULL);
if (now < timestamp)
return;
t = now - timestamp;
for(i=0; i < table_size; i++) {
struct hash_entry *entry = table + i;
if (entry->key && entry->time <= t)
he_remove(entry);
}
resize_table();
}
void dlhist_flush() {
int i;
struct header hdr;
if (fd < 0)
return;
ftruncate(fd, 0);
lseek(fd, 0, SEEK_SET);
/* Write header */
hdr.signature = htonl(SIGNATURE);
hdr.version = htonl(1);
hdr.size = htonl(table_size);
write(fd, &hdr, sizeof(hdr));
/* Write hash entries */
for(i=0; i < table_size; i++) {
unsigned int keylen;
struct hash_entry ondisk, *entry = table + i;
if (he_empty(entry))
continue;
ondisk.time = htonl(entry->time);
keylen = htonl(strlen(entry->key) + 1);
write(fd, &ondisk.time, sizeof(ondisk.time));
write(fd, &keylen, sizeof(keylen));
write(fd, entry->key, strlen(entry->key) + 1);
}
/* Make sure we flush to disk */
fsync(fd);
}
void dlhist_close() {
int i;
dlhist_flush();
if (fd >= 0)
close(fd);
fd = -1;
if (table) {
for(i=0; i < table_size; i++) {
struct hash_entry *he = table + i;
if (!he_empty(he))
free(he->key);
}
free(table);
}
table = NULL;
table_count = table_size = 0;
}

17
dlhist.h Normal file
View file

@ -0,0 +1,17 @@
#ifndef DLHIST_H
#define DLHIST_H
int dlhist_open();
int dlhist_lookup(const char *url);
void dlhist_update(const char *url);
void dlhist_purge(unsigned int timestamp);
void dlhist_flush();
void dlhist_close();
#endif /* DLHIST */

87
dlight.c Normal file
View file

@ -0,0 +1,87 @@
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "env.h"
#include "cconf.h"
#include "dlhist.h"
#include "filter.h"
#include "http.h"
#include "rss.h"
#define error(...) fprintf(stderr, "error: " __VA_ARGS__)
static void process_items(rss_t rss, struct target *t) {
struct rss_item item;
while(rss_walk_next(rss, &item)) {
if (!filter_match_list(t->filter, t->nr, item.title)
|| dlhist_lookup(item.link)) {
continue;
}
if (http_download_file(item.link, t->dest) < 0 &&
errno != EEXIST) {
error("download failed: %s\n", strerror(errno));
continue;
}
dlhist_update(item.link);
}
}
static void process(struct cconf *config) {
int i;
struct http_data *data;
dlhist_purge(7200);
for(i=0; i < config->nr; i++) {
struct target *t = config->target + i;
rss_t rss;
data = http_fetch_page(t->src);
if (!data)
continue;
rss = rss_parse(data->block, data->len);
if (!rss) {
error("failed to parse rss: %s\n", t->src);
continue;
}
process_items(rss, t);
rss_free(rss);
http_free(data);
}
}
int main(int argc, char *argv[]) {
struct cconf *config;
char configfile[4096];
snprintf(configfile, sizeof(configfile), "%s/%s",
env_get_dir(), "config");
config = cconf_read(configfile);
if (!config) {
perror(configfile);
return 1;
}
if (dlhist_open() < 0) {
perror("dlhist");
return 1;
}
process(config);
dlhist_close();
cconf_free(config);
return 0;
}

40
env.c Normal file
View file

@ -0,0 +1,40 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include "env.h"
static char base[4096];
static void get_base() {
char *ptr;
int len;
ptr = getenv("HOME");
if (!ptr)
ptr = ".";
len = strlen(ptr);
if (len < sizeof(base) - 9) {
memcpy(base, ptr, len);
memcpy(base+len, "/.dlight", 9);
}
}
const char* env_get_dir() {
if (!*base) {
get_base();
if (mkdir(base, 0700) < 0 && errno != EEXIST) {
fprintf(stderr, "unable to create '%s': %s\n",
base, strerror(errno));
exit(1);
}
}
return base;
}

7
env.h Normal file
View file

@ -0,0 +1,7 @@
#ifndef ENV_H
#define ENV_H
const char* env_get_dir();
#endif /* ENV_H */

76
filter.c Normal file
View file

@ -0,0 +1,76 @@
#include <assert.h>
#include <pcre.h>
#include <stdio.h>
#include <string.h>
#include "filter.h"
static inline pcre* compile(const char *pattern) {
const char *error;
int eoffset;
pcre *regex;
regex = pcre_compile(pattern, 0, &error, &eoffset, NULL);
if (!regex) {
fprintf(stderr, "Error compiling expression\n");
return NULL;
}
return regex;
}
static inline int match(pcre *pcre, const char *subject) {
int ovector[1];
return pcre_exec(pcre, NULL, subject, strlen(subject), 0, 0,
ovector, sizeof(ovector));
}
int filter_check_syntax(const char *pattern) {
const char *error;
int eoffset;
pcre *regex;
regex = pcre_compile(pattern, 0, &error, &eoffset, NULL);
if (!regex) {
fprintf(stderr, "filter: error in expression '%s': %s\n",
pattern, error);
return 0;
}
return 1;
}
int filter_match(const char *pattern, const char *subject) {
pcre *regex;
int rc;
if (!pattern || !subject)
return 0;
regex = compile(pattern);
if (!regex)
return 0;
rc = match(regex, subject);
pcre_free(regex);
return rc > 0;
}
int filter_match_list(char **patterns, unsigned n, const char *subject) {
int i;
for(i=0; i < n; i++) {
/* return true at the first matching pattern */
if (filter_match(patterns[i], subject))
return 1;
}
return 0;
}

11
filter.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef FILTER_H
#define FILTER_H
int filter_check_syntax(const char *pattern);
int filter_match(const char *pattern, const char *subject);
int filter_match_list(char **patterns, unsigned n, const char *subject);
#endif /* FILTER_H */

208
http.c Normal file
View file

@ -0,0 +1,208 @@
#include <curl/curl.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "http.h"
static char* strnstrr(const char *str, const char *needle, size_t size) {
char *ptr;
size_t len, pos;
if (!needle || !*needle)
return (char *) str;
len = strlen(needle);
pos = size;
for(ptr=(char*)str; *ptr; ptr = memchr(ptr+1, *needle, pos-1)) {
pos = size - (ptr - str);
if (pos < len)
break;
if (!strncmp(ptr, needle, len))
return ptr + len;
}
return NULL;
}
static char* url_filename(const char *url) {
const char *start = url;
char *name = NULL;
size_t size;
for(; *url; url++) {
if (*url != '/')
continue;
if (*(url+1)) {
start = url+1;
} else {
url--;
break;
}
}
size = url - start;
if (size) {
name = malloc(size + 1);
memcpy(name, start, size + 1);
name[size+1] = '\0';
}
return name;
}
#define HDR_CONDISP "Content-Disposition:"
static size_t hdr_fname_cb(void *src, size_t smemb, size_t nmemb, void *data) {
int pos, size = smemb * nmemb;
char *ptr = (char *) src;
char **filename = (char**) data;
if (*filename || size < sizeof(HDR_CONDISP)-1 ||
memcmp(ptr, HDR_CONDISP, sizeof(HDR_CONDISP)-1))
return size;
pos = sizeof(HDR_CONDISP)-1;
ptr = strnstrr(ptr + pos, "filename=\"", size);
if (ptr) {
int start, len;
start = pos = ptr - ((char*) src);
ptr = (char *) src;
for(len=0;;len++) {
if (ptr[pos] == '"' && ptr[pos-1] != '\\')
break;
if (++pos > size)
return 0;
}
if (len)
*filename = strndup(ptr + start, len);
}
return size;
}
static size_t write_cb(void *src, size_t smemb, size_t nmemb, void *data) {
struct http_data *dest = (struct http_data *) data;
size_t size = smemb * nmemb;
dest->block = realloc(dest->block, dest->len + size);
if (dest->block == NULL) {
printf("out of memory\n");
return 0;
}
memcpy(dest->block + dest->len, src, size);
dest->len += size;
return size;
}
static CURL* setup_connection(const char *url) {
CURL *handle = curl_easy_init();
curl_easy_setopt(handle, CURLOPT_URL, url);
curl_easy_setopt(handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
curl_easy_setopt(handle, CURLOPT_SSL_VERIFYHOST, 0);
curl_easy_setopt(handle, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(handle, CURLOPT_TIMEOUT, 10);
return handle;
}
struct http_data* http_fetch_page(const char *url) {
CURL *handle = curl_easy_init();
CURLcode res;
struct http_data *data = malloc(sizeof(struct http_data));
data->block = NULL;
data->len = 0;
handle = setup_connection(url);
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, data);
res = curl_easy_perform(handle);
if (res != CURLE_OK) {
printf("curl: (%s) %s\n", url, curl_easy_strerror(res));
goto error;
}
curl_easy_cleanup(handle);
return data;
error:
curl_easy_cleanup(handle);
http_free(data);
return NULL;
}
int http_download_file(const char *url, const char *dir) {
int fd, err;
char *filename = NULL;
char path[4096];
CURL *handle;
CURLcode res;
struct http_data *data = malloc(sizeof(struct http_data));
data->block = NULL;
data->len = 0;
handle = setup_connection(url);
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, data);
curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, hdr_fname_cb);
curl_easy_setopt(handle, CURLOPT_HEADERDATA, &filename);
res = curl_easy_perform(handle);
if (res != CURLE_OK) {
printf("curl: (%s) %s\n", url, curl_easy_strerror(res));
goto error;
}
if (!filename) {
filename = url_filename(url);
}
/* now, write to file */
snprintf(path, sizeof(path), "%s/%s", dir, filename);
fd = open(path, O_CREAT | O_EXCL | O_WRONLY, 0644);
if (fd < 0)
goto error;
write(fd, data->block, data->len);
close(fd);
free(filename);
http_free(data);
curl_easy_cleanup(handle);
return 0;
error:
err = errno;
if (filename)
free(filename);
http_free(data);
curl_easy_cleanup(handle);
errno = err;
return -1;
}
void http_free(struct http_data *data) {
if (!data)
return;
if (data->block)
free(data->block);
free(data);
}

20
http.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef HTTP_H
#define HTTP_H
#include <stddef.h>
struct http_data {
void *block;
size_t len;
};
struct http_data* http_fetch_page(const char *url);
int http_download_page(const char *url, const char *file);
int http_download_file(const char *url, const char *dir);
void http_free(struct http_data *data);
#endif

50
read-config.c Normal file
View file

@ -0,0 +1,50 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cconf.h"
#include "env.h"
static char *usage = "dlight-read-config [ <file> | -h ]\n";
int main(int argc, char **argv) {
int i;
struct cconf *c;
char file[4096];
if (argc > 1) {
if (!strcmp(argv[1], "-h")) {
fprintf(stderr, usage);
return 1;
}
strncpy(file, argv[1], sizeof(file));
} else {
snprintf(file, sizeof(file), "%s/config", env_get_dir());
}
c = cconf_read(file);
if (!c) {
perror(file);
return 1;
}
printf("--- Config file: %s ---\n", file);
for(i=0; i < c->nr; i++) {
int j;
struct target *t = c->target + i;
printf("src: %s\n", t->src);
printf("dest: %s\n", t->dest);
for(j=0; j < t->nr; j++)
printf("filter: %s\n", t->filter[j]);
printf("---\n");
}
cconf_free(c);
free(c);
return 0;
}

134
rss.c Normal file
View file

@ -0,0 +1,134 @@
#include <libxml/parser.h>
#include <libxml/tree.h>
#include "rss.h"
/* Sidestep warnings about signedness (xmlChar = unsigned char) */
#define xmlStrcmp(a, b) xmlStrcmp((xmlChar *)a, (xmlChar *)b)
#define xmlGetProp(n, p) xmlGetProp(n, (xmlChar *) p)
struct __walk_info {
xmlNodePtr current;
};
struct __rssdoc {
xmlDocPtr xmldoc;
xmlNodePtr channel;
xmlNodePtr firstitem;
};
struct __rss {
struct __rssdoc doc;
struct __walk_info info;
};
static xmlNodePtr getchild(xmlNodePtr node, const char *name) {
if (node) {
xmlNodePtr it;
for(it = node->children; it; it = it->next) {
if (!xmlStrcmp(it->name, name))
return it;
}
}
return NULL;
}
static const char* getnodetext(xmlNodePtr node) {
if (node) {
if (node->type == XML_ELEMENT_NODE)
node = node->children;
if (node->type == XML_TEXT_NODE)
return (const char *) node->content;
}
return "";
}
static int validate(struct __rssdoc *doc) {
xmlChar *attr;
xmlNodePtr node;
if (!doc->xmldoc)
return -1;
node = doc->xmldoc->children;
if (xmlStrcmp(node->name, "rss"))
return -1;
attr = xmlGetProp(node, "version");
if (!attr)
return -1;
/* get channel node */
node = xmlFirstElementChild(node);
if (!node || xmlStrcmp(node->name, "channel"))
return -1;
doc->channel = node;
/* get first item */
node = getchild(node, "item");
if (!node)
return -1;
doc->firstitem = node;
while(node) {
if (xmlStrcmp(node->name, "item"))
return -1;
node = xmlNextElementSibling(node);
}
return 0;
}
rss_t rss_parse(void *buf, size_t size) {
rss_t rss = malloc(sizeof(struct __rss));
rss->doc.xmldoc = xmlReadMemory(buf, size, "noname.xml", NULL, 0);
if (validate(&rss->doc) < 0) {
rss_free(rss);
return NULL;
}
rss->info.current = rss->doc.firstitem;
return rss;
}
void rss_free(rss_t r) {
if (!r)
return;
if (r->doc.xmldoc)
xmlFreeDoc(r->doc.xmldoc);
free(r);
}
int rss_walk_next(rss_t rss, struct rss_item *item) {
if (rss && rss->info.current) {
/* fill item */
xmlNodePtr cur = rss->info.current;
item->title = getnodetext(getchild(cur, "title"));
item->link = getnodetext(getchild(cur, "link"));
rss->info.current = xmlNextElementSibling(rss->info.current);
return 1;
}
return 0;
}
int rss_walk_reset(rss_t rss) {
if (rss) {
rss->info.current = rss->doc.firstitem;
return 1;
}
return 0;
}

25
rss.h Normal file
View file

@ -0,0 +1,25 @@
#ifndef RSS_ITEM_H
#define RSS_ITEM_H
#include <stddef.h>
typedef struct __rss* rss_t;
typedef struct __walk_info* rss_walk_info;
struct rss_item {
const char *title;
const char *link;
};
rss_t rss_parse(void *buf, size_t size);
void rss_free(rss_t r);
/* walking interface */
int rss_walk_next(rss_t rss, struct rss_item *item);
int rss_walk_reset(rss_t rss);
#endif