commit b2df7405147471d1503e676d28d002d3e62705c6 Author: Henrik Hautakoski Date: Wed Jun 8 18:46:58 2011 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a14c3bb --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.* +*.o \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ea8a713 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ + +CC = gcc +LDFLAGS = -lxml2 -lcurl -lpcre +CFLAGS = -g -I/usr/include/libxml2 + +PROGRAMS = dlight dlight-compile dlight-read-config + +all : $(PROGRAMS) + +install : $(PROGRAMS) + cp $^ $(HOME)/bin/ + +dlight : dlight.o env.o http.o rss.o filter.o cconf.o dlhist.o +dlight-compile : compile.o env.o cconf.o +dlight-read-config : read-config.o env.o cconf.o + +dlight-% : %.o + $(CC) $(LDFLAGS) -o $@ $^ + +clean : + $(RM) *.o $(PROGRAMS) diff --git a/README b/README new file mode 100644 index 0000000..d36662f --- /dev/null +++ b/README @@ -0,0 +1,59 @@ + + Dlight - automatic feed downloader + + -------------------------------------- + +dlight is a program that checks items in rss feeds and download those +items/links that are matched against a set of rules. +What this does different than other programs of this type is that configuration +of the program should be easy and flexible. Not forcing users to write +and maintain large lists of raw regular expressions. + +The best way to use dlight is by using time-based scheduling like cron. +-------------------------------------------------------- +# Make cron execute dlight every 15 minutes +*/15 * * * * /path/to/dlight >> /path/to/logs/dlight.log +-------------------------------------------------------- + + +dlight is divided into 3 major components: the dlight program, +Configuration files and Compiler. + + * dlight + +The actual program that checks feeds and download items. +The configuration data is read from the compiled config file "~/.dlight/config". + +The program first fetches the rss file (target), walks through +all items applying all filters associated whit the current target. +And if one matches, that item will be downloaded to the destination associated +with the target. it does this for all rss files (targets) in the config. + + * Configuration Files + +A set of human-readable configuration files that the user should configure +dlight through. This is where users defines their targets, destinations, +filters and other type of information. (currently there is only one file +with a similar structure that the compiled format use). + + * Compiler + +An compiler is provided that compiles configuration files down to a +binary config file used by dlight, one can think of this step as publish/update +the configuration used by the program. + + +This design is used for two main reasons. + +One, if you edit your configuration structure and dlight would be executed by +for example cron. if dlight would read directly from those files, it is possible +that the configuration files are not in a desired state and making dlight +do some weird things. + +The second reason is that processing all those files everytime dlight is invoked +can be quite slow, the compiled format is designed to provide fast I/O reads. +Also by using a source -> compiler -> output design, errors can be caught +in the configuration files when the user invokes the compiler. +which is a more natural way of notify the user on such errors then to +have dlight abort and log the error because the program is supposed to be +executed in an automatic manner, the error will not be seen right away. diff --git a/cconf.c b/cconf.c new file mode 100644 index 0000000..9019afd --- /dev/null +++ b/cconf.c @@ -0,0 +1,225 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cconf.h" + +/* we count NULL as part of the string ondisk */ +#define strsize(str) (strlen(str) + 1) + +static int sha1_write(SHA_CTX *ctx, int fd, void *buf, size_t size) { + + SHA1_Update(ctx, buf, size); + return write(fd, buf, size); +} + +static void write_int(SHA_CTX *ctx, int fd, int val) { + + val = htonl(val); + sha1_write(ctx, fd, &val, sizeof val); +} + +static void* read_entry_nr(void *buf, unsigned int *out) { + + memcpy(out, buf, sizeof(*out)); + *out = ntohl(*out); + return buf + sizeof(*out); +} + +void cconf_free(struct cconf *c) { + + int i, j; + + if (!c) + return; + + if (c->map.buf) { + free(c->target); + munmap(c->map.buf, c->map.size); + } else if (c->nr) { + for(i=0; i < c->nr; i++) { + struct target *t = c->target + i; + free(t->src); + free(t->dest); + for(j=0; j < t->nr; j++) + free(t->filter[j]); + free(t->filter); + } + free(c->target); + } +} + +struct target* cconf_new_target(struct cconf *c) { + + struct target *t; + + c->target = realloc(c->target, (sizeof(struct target) * (c->nr + 1))); + + t = c->target + (c->nr++); + memset(t, 0, sizeof(*t)); + + return t; +} + +void cconf_add_filter(struct target *t, char *filter) { + + if (!filter) + return; + + t->filter = realloc(t->filter, sizeof(t->filter) * (t->nr + 1)); + t->filter[t->nr++] = filter; +} + +static size_t parse_filter(void *buf, struct target *target) { + + size_t offset = read_entry_nr(buf, &target->nr) - buf; + + if (target->nr) { + int i; + + target->filter = malloc(sizeof(char *) * target->nr); + + for(i=0; i < target->nr; i++) { + target->filter[i] = (char *) buf + offset; + offset += strsize(buf + offset); + } + } + return offset; +} + +static size_t parse_target(void *buf, struct target *target) { + + size_t offset; + + target->src = (char *) buf; + offset = strsize(buf); + + target->dest = (char *) buf + offset; + offset += strsize(buf + offset); + + return offset; +} + +static struct cconf* parse(void *buf, size_t size) { + + struct cconf *c = calloc(1, sizeof(struct cconf)); + int i; + + /* move! */ + c->map.buf = buf; + c->map.size = size; + + buf = read_entry_nr(buf + sizeof(struct cconf_header), &c->nr); + + c->target = calloc(sizeof(struct target), c->nr); + + for(i=0; i < c->nr; i++) { + struct target *target = c->target + i; + + buf += parse_target(buf, target); + buf += parse_filter(buf, target); + } + return c; +} + +static int validate_hdr(struct cconf_header *hdr, size_t size) { + + SHA_CTX ctx; + unsigned char sha1[20]; + + if (hdr->signature != htonl(CCONF_SIGNATURE) || + hdr->version != htonl(1)) + return -1; + SHA1_Init(&ctx); + SHA1_Update(&ctx, hdr, offsetof(struct cconf_header, crc)); + SHA1_Update(&ctx, hdr + 1, size - sizeof(*hdr)); + SHA1_Final(sha1, &ctx); + if (memcmp(sha1, hdr->crc, sizeof(hdr->crc))) + return -1; + return 0; +} + +struct cconf* cconf_read(const char *file) { + + int fd; + struct stat st; + void *buf; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + if (fstat(fd, &st) < 0) { + close(fd); + return NULL; + } + + buf = MAP_FAILED; + if (!fstat(fd, &st) && st.st_size > sizeof(struct cconf_header)) { + buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + } + close(fd); + + if (buf == MAP_FAILED) + return NULL; + + if (validate_hdr(buf, st.st_size) < 0) + goto error; + + return parse(buf, st.st_size); +error: + munmap(buf, st.st_size); + return NULL; +} + +int cconf_write(int fd, struct cconf *c) { + + int i; + SHA_CTX ctx; + struct cconf_header hdr; + + hdr.signature = htonl(CCONF_SIGNATURE); + hdr.version = htonl(1); + + SHA1_Init(&ctx); + SHA1_Update(&ctx, &hdr, offsetof(struct cconf_header, crc)); + + /* leave room for the header to be written later as CRC + will be calculated as we write the rest of the data */ + lseek(fd, sizeof(hdr), SEEK_SET); + + /* put number of targets */ + write_int(&ctx, fd, c->nr); + + for(i = 0; i < c->nr; i++) { + int j; + struct target *target = c->target + i; + + if (!target->src || !target->dest) + return -1; + sha1_write(&ctx, fd, target->src, strsize(target->src)); + sha1_write(&ctx, fd, target->dest, strsize(target->dest)); + + /* write number of filters */ + write_int(&ctx, fd, target->nr); + + for(j=0; j < target->nr; j++) { + sha1_write(&ctx, fd, target->filter[j], + strsize(target->filter[j])); + } + } + + SHA1_Final(hdr.crc, &ctx); + + /* write header */ + lseek(fd, 0, SEEK_SET); + sha1_write(&ctx, fd, &hdr, sizeof(hdr)); + + return 0; +} diff --git a/cconf.h b/cconf.h new file mode 100644 index 0000000..3f43ad0 --- /dev/null +++ b/cconf.h @@ -0,0 +1,43 @@ + +#ifndef CCONF_H +#define CCONF_H + +/* + * data structure for 'Dlight compiled config' file format. + */ + +/* \232 D C C */ +#define CCONF_SIGNATURE 0xe8444343 +struct cconf_header { + unsigned int signature; + unsigned int version; + unsigned char crc[20]; +}; + +struct target { + char *src; /* source. (url) */ + char *dest; /* destination, path on filesystem */ + char **filter; + unsigned int nr; +}; + +struct cconf { + struct target *target; + unsigned int nr; + struct { + void *buf; + unsigned long size; + } map; +}; + +void cconf_free(struct cconf *c); + +struct target* cconf_new_target(struct cconf *c); + +void cconf_add_filter(struct target *t, char *filter); + +int cconf_write(int fd, struct cconf *c); + +struct cconf* cconf_read(const char *filename); + +#endif /* CCONF_H */ diff --git a/compile.c b/compile.c new file mode 100644 index 0000000..83b0f9a --- /dev/null +++ b/compile.c @@ -0,0 +1,283 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "env.h" +#include "cconf.h" + +#define error(...) fprintf(stderr, "error: " __VA_ARGS__) + +#define isalias(x) (isalnum(x) || (x) == '-') + +#define MAXNAME 1024 + +static int dest_table_nr; + +static struct dest_table { + char *key; + char *value; +} *dest_table; + +static struct cconf cconf; +static int config_lineno = 1; +static FILE *config_fd; + +static int get_next_ch(void) { + + int c = getc(config_fd); + if (c == '\n') + config_lineno++; + return c; +} + +static int find_destination(const char *key) { + + int i; + for(i=0; i < dest_table_nr; i++) + if (!strcmp(dest_table[i].key, key)) + return i; + return -1; +} + +static char* fetch_destination(char *key) { + + int index = find_destination(key); + if (index < 0) + index = 0; + return dest_table[index].value; +} + +static void free_destination(struct dest_table *entry) { + + if (entry->key) + free(entry->key); + if (entry->value) + free(entry->value); +} + + +static void insert_destination(const char *key, const char *value) { + + + int index = find_destination(key); + + if (index < 0) { + dest_table = realloc(dest_table, + sizeof(struct dest_table) * (dest_table_nr + 1)); + index = dest_table_nr++; + } else { + free_destination(&dest_table[index]); + } + + if (!value) + value = ""; + + dest_table[index].key = strdup(key); + dest_table[index].value = strdup(value); +} + +static char* parse_value() { + + static char value[1024]; + int c, len = 0, space = 0; + + for(;;) { + c = get_next_ch(); + if (c == EOF || c == '\n') + break; + if (isspace(c)) { + if (len) + space++; + continue; + } + for(; space; space--) + value[len++] = ' '; + value[len++] = c; + } + value[len] = '\0'; + + return value; +} + + +static int parse_alias() { + + static char name[MAXNAME]; + const char *value; + int c, len = 0; + + for(;;) { + c = get_next_ch(); + if (c == EOF || isspace(c)) + break; + if (!isalias(c)) { + error("Invalid character '%c' in alias\n", c); + return -1; + } + if (len >= sizeof(name)) + return -1; + name[len++] = tolower(c); + } + name[len] = '\0'; + + value = NULL; + if (c != '\n') { + value = parse_value(); + if (!value) + return -1; + } + + insert_destination(name, value); + + return 0; +} + +static int parse_filter(struct target *target) { + + char *value = parse_value(); + if (!value) + return -1; + cconf_add_filter(target, strdup(value)); + return 0; +} + +static int parse_target(struct target *target) { + + char src[4096], alias[4096]; + int c, len = 0, trailing_space = 0; + + for(;;) { + c = get_next_ch(); + if (c == EOF || isspace(c)) + break; + if (len >= sizeof(src)) + return -1; + src[len++] = c; + } + src[len] = '\0'; + + /* next, get alias */ + len = 0; + for(;;) { + c = get_next_ch(); + if (c == EOF || c == '\n') + break; + if (isspace(c)) { + if (len) + trailing_space = 1; + continue; + } + if (!isalias(c)) { + error("Invalid character '%c' in alias\n", c); + return -1; + } + if (trailing_space) { + error("Space not allowed in alias\n"); + return -1; + } + if (len >= sizeof(alias)) + return -1; + alias[len++] = tolower(c); + } + alias[len] = '\0'; + + if (!len && !dest_table_nr) { + error("No destination found for target '%s'\n", src); + return -1; + } + + target->src = strdup(src); + target->dest = strdup(len ? fetch_destination(alias) : + dest_table[0].value); + + return 0; +} + +static int parse_config_file(const char *file) { + + struct target *target = NULL; + + config_fd = fopen(file, "r"); + if (!config_fd) { + perror(file); + return -1; + } + + for(;;) { + int c = get_next_ch(); + if (c == EOF) + return 0; + if (c == ':') { + if (parse_alias() < 0) + break; + continue; + } + if (target && c == '\t') { + if (parse_filter(target) < 0) + break; + continue; + } + if (isspace(c)) + continue; + target = cconf_new_target(&cconf); + ungetc(c, config_fd); + if (parse_target(target) < 0) + break; + } + error("failed to parse line %i in %s\n", config_lineno, file); + + fclose(config_fd); + return -1; +} + +static int commit_lock(const char *file) { + + char target[4096]; + int len; + + len = strlen(file) - 5; /* .lock */ + + memcpy(target, file, len); + target[len] = '\0'; + + return rename(file, target); +} + +int main(int argc, char **argv) { + + int lockfd; + char lockfile[4096]; + + snprintf(lockfile, sizeof(lockfile), "%s/%s", + env_get_dir(), "config.lock"); + + /* Remove lockfile if forced */ + if (argc > 1 && !strcmp(argv[1], "-f")) + unlink(lockfile); + + lockfd = open(lockfile, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (lockfd < 0) { + if (errno == EEXIST) { + error("config is locked\n"); + } else { + perror("unable to create new configfile"); + } + return 1; + } + + if (parse_config_file("./config") < 0) + goto error; + + if (!cconf_write(lockfd, &cconf) && + !commit_lock(lockfile)) + return 0; +error: + unlink(lockfile); + return 1; +} diff --git a/config.sample b/config.sample new file mode 100644 index 0000000..269570b --- /dev/null +++ b/config.sample @@ -0,0 +1,13 @@ + +:dest1 /path/to/dest1 +:dest2 /path/to/dest2 + +url1 + regex1 + regex2 + regex3 + regex4 + +url2 dest2 + regex5 + regex6 diff --git a/dlhist.c b/dlhist.c new file mode 100644 index 0000000..03a9b4f --- /dev/null +++ b/dlhist.c @@ -0,0 +1,319 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "env.h" +#include "dlhist.h" + +/* + * TODO: maybe better to store hashes instead of strings in the file + * to 1, make records fixed size. 2, faster memory copy. + */ + +/* \195 D L H */ +#define SIGNATURE 0xC3444C48 +#define STORAGE_FILE "dlhist" + +#define TABLE_MIN_SIZE 128 + +#define HASH_TABLE_LOAD(c, s) ((double) (c) / ((s) ? (s) : 1)) + +struct header { + unsigned int signature; + unsigned int version; + unsigned int size; +}; + +struct hash_entry { + unsigned int time; + char *key; +}; + +#define he_empty(x) (!(x) || (x)->key == NULL) + +static int fd = -1; + +static struct hash_entry *table; +static unsigned int table_size; +static unsigned int table_count; + +static unsigned hash(const char *s) { + + unsigned h; + + for(h=0; *s; s++) { + if (!strncmp(s, "://", 3)) { + h = 0; + s += 2; + } else if (!strncmp(s, "/", 2)) { + break; + } + h = ((unsigned)*s) + (h << 6) + (h << 16) - h; + } + return h; +} + +static struct hash_entry* lookup(const char *key) { + + unsigned index = hash(key) % table_size; + + /* linear probing */ + while(table[index].key) { + if (!strcmp(table[index].key, key)) + break; + index = (index + 1) % table_size; + } + return table + index; +} + +static inline void he_set(struct hash_entry *he, const char *key) { + + if (!he_empty(he)) + return; + he->key = strdup(key); + table_count++; +} + +static int he_insert(struct hash_entry *he) { + + struct hash_entry *dest = lookup(he->key); + + if (he_empty(dest)) { + memcpy(dest, he, sizeof(*he)); + table_count++; + return 1; + } + return 0; +} + +static void he_remove(struct hash_entry *he) { + + free(he->key); + memset(he, 0, sizeof(*he)); + table_count--; +} + +static void resize_table() { + + unsigned int load, i, old_size = table_size; + struct hash_entry *old = table; + + load = HASH_TABLE_LOAD(table_count, table_size); + + /* check if resize should be done */ + if ((load < 0.5 && table_size <= TABLE_MIN_SIZE) || + (load >= 0.5 && load <= 0.75)) + return; + + /* + * set size to a load factor that is in the + * middle in the valid range. + */ + table_size = table_count / 0.625; + if (table_size < TABLE_MIN_SIZE) + table_size = TABLE_MIN_SIZE; + + table_count = 0; + table = calloc(sizeof(*table), table_size); + + for(i=0; i < old_size; i++) { + struct hash_entry *he = old + i; + if (!he_empty(he)) + he_insert(he); + } + free(old); +} + +static void build_table(const char *buf, size_t len) { + + size_t offset = 0; + + table = calloc(sizeof(*table), table_size); + + while(offset < len) { + unsigned int keylen; + struct hash_entry entry; + + memcpy(&entry.time, buf + offset, sizeof(entry.time)); + offset += sizeof(entry.time); + + memcpy(&keylen, buf + offset, sizeof(keylen)); + offset += sizeof(keylen); + + entry.time = ntohl(entry.time); + keylen = ntohl(keylen); + + entry.key = malloc(keylen); + memcpy(entry.key, buf + offset, keylen); + offset += keylen; + + if (he_insert(&entry) < 0) + free(entry.key); + } +} + +int dlhist_open() { + + char filename[4096], *buf = NULL; + unsigned offset = 0; + struct stat st; + struct header *hdr; + + /* Open file */ + snprintf(filename, sizeof(filename), + "%s/%s", env_get_dir(), STORAGE_FILE); + + fd = open(filename, O_CREAT | O_RDWR, 0600); + if (fd < 0 || fstat(fd, &st) < 0) { + perror("dlhist_open"); + goto error; + } + + if (st.st_size >= sizeof(*hdr)) { + + buf = malloc(st.st_size); + if (!buf) + goto error; + + read(fd, buf, st.st_size); + + /* Validate header */ + hdr = (struct header *) buf; + if (hdr->signature != htonl(SIGNATURE) || + hdr->version != htonl(1)) { + fprintf(stderr, "dlhist_open: Invalid header\n"); + goto error; + } + + /* Get current table size */ + table_size = htonl(hdr->size); + + offset = sizeof(*hdr); + } + + if (table_size < 1) + table_size = TABLE_MIN_SIZE; + + build_table(buf + offset, st.st_size - offset); + + if (buf) + free(buf); + return 0; +error: + if (buf) + free(buf); + if (fd >= 0) + close(fd); + fd = -1; + return -1; +} + +int dlhist_lookup(const char *url) { + + if (table_size < 1) + return 0; + return !he_empty(lookup(url)); +} + +void dlhist_update(const char *url) { + + struct hash_entry *he; + + if (table_size < 1) + return; + + /* + * set time and key before resize, + * hash_entry pointer is invalid after that operation. + */ + he = lookup(url); + he->time = time(NULL); + if (he_empty(he)) { + he_set(he, url); + resize_table(); + } +} + +void dlhist_purge(unsigned int timestamp) { + + unsigned int i, t = 0, now = time(NULL); + + if (now < timestamp) + return; + + t = now - timestamp; + for(i=0; i < table_size; i++) { + struct hash_entry *entry = table + i; + + if (entry->key && entry->time <= t) + he_remove(entry); + } + + resize_table(); +} + +void dlhist_flush() { + + int i; + struct header hdr; + + if (fd < 0) + return; + + ftruncate(fd, 0); + lseek(fd, 0, SEEK_SET); + + /* Write header */ + hdr.signature = htonl(SIGNATURE); + hdr.version = htonl(1); + hdr.size = htonl(table_size); + + write(fd, &hdr, sizeof(hdr)); + + /* Write hash entries */ + for(i=0; i < table_size; i++) { + unsigned int keylen; + struct hash_entry ondisk, *entry = table + i; + + if (he_empty(entry)) + continue; + + ondisk.time = htonl(entry->time); + keylen = htonl(strlen(entry->key) + 1); + + write(fd, &ondisk.time, sizeof(ondisk.time)); + write(fd, &keylen, sizeof(keylen)); + write(fd, entry->key, strlen(entry->key) + 1); + } + + /* Make sure we flush to disk */ + fsync(fd); +} + +void dlhist_close() { + + int i; + + dlhist_flush(); + + if (fd >= 0) + close(fd); + fd = -1; + + if (table) { + for(i=0; i < table_size; i++) { + struct hash_entry *he = table + i; + if (!he_empty(he)) + free(he->key); + } + free(table); + } + table = NULL; + table_count = table_size = 0; +} diff --git a/dlhist.h b/dlhist.h new file mode 100644 index 0000000..45cf98d --- /dev/null +++ b/dlhist.h @@ -0,0 +1,17 @@ + +#ifndef DLHIST_H +#define DLHIST_H + +int dlhist_open(); + +int dlhist_lookup(const char *url); + +void dlhist_update(const char *url); + +void dlhist_purge(unsigned int timestamp); + +void dlhist_flush(); + +void dlhist_close(); + +#endif /* DLHIST */ \ No newline at end of file diff --git a/dlight.c b/dlight.c new file mode 100644 index 0000000..b69e80b --- /dev/null +++ b/dlight.c @@ -0,0 +1,87 @@ + +#include +#include +#include +#include "env.h" +#include "cconf.h" +#include "dlhist.h" +#include "filter.h" +#include "http.h" +#include "rss.h" + +#define error(...) fprintf(stderr, "error: " __VA_ARGS__) + +static void process_items(rss_t rss, struct target *t) { + + struct rss_item item; + + while(rss_walk_next(rss, &item)) { + + if (!filter_match_list(t->filter, t->nr, item.title) + || dlhist_lookup(item.link)) { + continue; + } + + if (http_download_file(item.link, t->dest) < 0 && + errno != EEXIST) { + error("download failed: %s\n", strerror(errno)); + continue; + } + + dlhist_update(item.link); + } +} + +static void process(struct cconf *config) { + + int i; + struct http_data *data; + + dlhist_purge(7200); + + for(i=0; i < config->nr; i++) { + struct target *t = config->target + i; + rss_t rss; + + data = http_fetch_page(t->src); + if (!data) + continue; + + rss = rss_parse(data->block, data->len); + if (!rss) { + error("failed to parse rss: %s\n", t->src); + continue; + } + + process_items(rss, t); + rss_free(rss); + http_free(data); + } +} + +int main(int argc, char *argv[]) { + + struct cconf *config; + char configfile[4096]; + + snprintf(configfile, sizeof(configfile), "%s/%s", + env_get_dir(), "config"); + + config = cconf_read(configfile); + if (!config) { + perror(configfile); + return 1; + } + + if (dlhist_open() < 0) { + perror("dlhist"); + return 1; + } + + process(config); + + dlhist_close(); + cconf_free(config); + + return 0; +} diff --git a/env.c b/env.c new file mode 100644 index 0000000..14639eb --- /dev/null +++ b/env.c @@ -0,0 +1,40 @@ + +#include +#include +#include +#include +#include +#include +#include +#include "env.h" + +static char base[4096]; + +static void get_base() { + + char *ptr; + int len; + + ptr = getenv("HOME"); + if (!ptr) + ptr = "."; + + len = strlen(ptr); + if (len < sizeof(base) - 9) { + memcpy(base, ptr, len); + memcpy(base+len, "/.dlight", 9); + } +} + +const char* env_get_dir() { + + if (!*base) { + get_base(); + if (mkdir(base, 0700) < 0 && errno != EEXIST) { + fprintf(stderr, "unable to create '%s': %s\n", + base, strerror(errno)); + exit(1); + } + } + return base; +} diff --git a/env.h b/env.h new file mode 100644 index 0000000..483da32 --- /dev/null +++ b/env.h @@ -0,0 +1,7 @@ + +#ifndef ENV_H +#define ENV_H + +const char* env_get_dir(); + +#endif /* ENV_H */ \ No newline at end of file diff --git a/filter.c b/filter.c new file mode 100644 index 0000000..f625166 --- /dev/null +++ b/filter.c @@ -0,0 +1,76 @@ + +#include +#include +#include +#include +#include "filter.h" + +static inline pcre* compile(const char *pattern) { + + const char *error; + int eoffset; + pcre *regex; + + regex = pcre_compile(pattern, 0, &error, &eoffset, NULL); + if (!regex) { + fprintf(stderr, "Error compiling expression\n"); + return NULL; + } + + return regex; +} + +static inline int match(pcre *pcre, const char *subject) { + + int ovector[1]; + + return pcre_exec(pcre, NULL, subject, strlen(subject), 0, 0, + ovector, sizeof(ovector)); +} + +int filter_check_syntax(const char *pattern) { + + const char *error; + int eoffset; + pcre *regex; + + regex = pcre_compile(pattern, 0, &error, &eoffset, NULL); + if (!regex) { + fprintf(stderr, "filter: error in expression '%s': %s\n", + pattern, error); + return 0; + } + return 1; +} + +int filter_match(const char *pattern, const char *subject) { + + pcre *regex; + int rc; + + if (!pattern || !subject) + return 0; + + regex = compile(pattern); + if (!regex) + return 0; + + rc = match(regex, subject); + + pcre_free(regex); + + return rc > 0; +} + +int filter_match_list(char **patterns, unsigned n, const char *subject) { + + int i; + + for(i=0; i < n; i++) { + + /* return true at the first matching pattern */ + if (filter_match(patterns[i], subject)) + return 1; + } + return 0; +} \ No newline at end of file diff --git a/filter.h b/filter.h new file mode 100644 index 0000000..603a573 --- /dev/null +++ b/filter.h @@ -0,0 +1,11 @@ + +#ifndef FILTER_H +#define FILTER_H + +int filter_check_syntax(const char *pattern); + +int filter_match(const char *pattern, const char *subject); + +int filter_match_list(char **patterns, unsigned n, const char *subject); + +#endif /* FILTER_H */ diff --git a/http.c b/http.c new file mode 100644 index 0000000..b0ed3af --- /dev/null +++ b/http.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "http.h" + +static char* strnstrr(const char *str, const char *needle, size_t size) { + + char *ptr; + size_t len, pos; + + if (!needle || !*needle) + return (char *) str; + + len = strlen(needle); + pos = size; + for(ptr=(char*)str; *ptr; ptr = memchr(ptr+1, *needle, pos-1)) { + pos = size - (ptr - str); + if (pos < len) + break; + if (!strncmp(ptr, needle, len)) + return ptr + len; + } + return NULL; +} + +static char* url_filename(const char *url) { + + const char *start = url; + char *name = NULL; + size_t size; + + for(; *url; url++) { + if (*url != '/') + continue; + if (*(url+1)) { + start = url+1; + } else { + url--; + break; + } + } + size = url - start; + if (size) { + name = malloc(size + 1); + memcpy(name, start, size + 1); + name[size+1] = '\0'; + } + return name; +} + +#define HDR_CONDISP "Content-Disposition:" + +static size_t hdr_fname_cb(void *src, size_t smemb, size_t nmemb, void *data) { + + int pos, size = smemb * nmemb; + char *ptr = (char *) src; + char **filename = (char**) data; + + if (*filename || size < sizeof(HDR_CONDISP)-1 || + memcmp(ptr, HDR_CONDISP, sizeof(HDR_CONDISP)-1)) + return size; + + pos = sizeof(HDR_CONDISP)-1; + ptr = strnstrr(ptr + pos, "filename=\"", size); + if (ptr) { + int start, len; + start = pos = ptr - ((char*) src); + ptr = (char *) src; + for(len=0;;len++) { + if (ptr[pos] == '"' && ptr[pos-1] != '\\') + break; + if (++pos > size) + return 0; + } + if (len) + *filename = strndup(ptr + start, len); + } + return size; +} + +static size_t write_cb(void *src, size_t smemb, size_t nmemb, void *data) { + + struct http_data *dest = (struct http_data *) data; + size_t size = smemb * nmemb; + + dest->block = realloc(dest->block, dest->len + size); + if (dest->block == NULL) { + printf("out of memory\n"); + return 0; + } + memcpy(dest->block + dest->len, src, size); + dest->len += size; + + return size; +} + +static CURL* setup_connection(const char *url) { + + CURL *handle = curl_easy_init(); + + curl_easy_setopt(handle, CURLOPT_URL, url); + curl_easy_setopt(handle, CURLOPT_USERAGENT, "libcurl-agent/1.0"); + curl_easy_setopt(handle, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(handle, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(handle, CURLOPT_TIMEOUT, 10); + + return handle; +} + +struct http_data* http_fetch_page(const char *url) { + + CURL *handle = curl_easy_init(); + CURLcode res; + struct http_data *data = malloc(sizeof(struct http_data)); + + data->block = NULL; + data->len = 0; + + handle = setup_connection(url); + + curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt(handle, CURLOPT_WRITEDATA, data); + + res = curl_easy_perform(handle); + + if (res != CURLE_OK) { + printf("curl: (%s) %s\n", url, curl_easy_strerror(res)); + goto error; + } + + curl_easy_cleanup(handle); + + return data; +error: + curl_easy_cleanup(handle); + http_free(data); + return NULL; +} + +int http_download_file(const char *url, const char *dir) { + + int fd, err; + char *filename = NULL; + char path[4096]; + CURL *handle; + CURLcode res; + struct http_data *data = malloc(sizeof(struct http_data)); + + data->block = NULL; + data->len = 0; + + handle = setup_connection(url); + + curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt(handle, CURLOPT_WRITEDATA, data); + + curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, hdr_fname_cb); + curl_easy_setopt(handle, CURLOPT_HEADERDATA, &filename); + + res = curl_easy_perform(handle); + + if (res != CURLE_OK) { + printf("curl: (%s) %s\n", url, curl_easy_strerror(res)); + goto error; + } + + if (!filename) { + filename = url_filename(url); + } + + /* now, write to file */ + snprintf(path, sizeof(path), "%s/%s", dir, filename); + fd = open(path, O_CREAT | O_EXCL | O_WRONLY, 0644); + if (fd < 0) + goto error; + + write(fd, data->block, data->len); + + close(fd); + + free(filename); + http_free(data); + curl_easy_cleanup(handle); + + return 0; +error: + err = errno; + if (filename) + free(filename); + http_free(data); + curl_easy_cleanup(handle); + errno = err; + return -1; +} + +void http_free(struct http_data *data) { + + if (!data) + return; + if (data->block) + free(data->block); + free(data); +} diff --git a/http.h b/http.h new file mode 100644 index 0000000..68651dd --- /dev/null +++ b/http.h @@ -0,0 +1,20 @@ + +#ifndef HTTP_H +#define HTTP_H + +#include + +struct http_data { + void *block; + size_t len; +}; + +struct http_data* http_fetch_page(const char *url); + +int http_download_page(const char *url, const char *file); + +int http_download_file(const char *url, const char *dir); + +void http_free(struct http_data *data); + +#endif diff --git a/read-config.c b/read-config.c new file mode 100644 index 0000000..8dcf520 --- /dev/null +++ b/read-config.c @@ -0,0 +1,50 @@ + +#include +#include +#include +#include "cconf.h" +#include "env.h" + +static char *usage = "dlight-read-config [ | -h ]\n"; + +int main(int argc, char **argv) { + + int i; + struct cconf *c; + char file[4096]; + + if (argc > 1) { + if (!strcmp(argv[1], "-h")) { + fprintf(stderr, usage); + return 1; + } + strncpy(file, argv[1], sizeof(file)); + } else { + snprintf(file, sizeof(file), "%s/config", env_get_dir()); + } + + c = cconf_read(file); + if (!c) { + perror(file); + return 1; + } + + printf("--- Config file: %s ---\n", file); + for(i=0; i < c->nr; i++) { + int j; + struct target *t = c->target + i; + + printf("src: %s\n", t->src); + printf("dest: %s\n", t->dest); + + for(j=0; j < t->nr; j++) + printf("filter: %s\n", t->filter[j]); + + printf("---\n"); + } + + cconf_free(c); + free(c); + + return 0; +} diff --git a/rss.c b/rss.c new file mode 100644 index 0000000..07ac35e --- /dev/null +++ b/rss.c @@ -0,0 +1,134 @@ + +#include +#include +#include "rss.h" + +/* Sidestep warnings about signedness (xmlChar = unsigned char) */ +#define xmlStrcmp(a, b) xmlStrcmp((xmlChar *)a, (xmlChar *)b) +#define xmlGetProp(n, p) xmlGetProp(n, (xmlChar *) p) + +struct __walk_info { + xmlNodePtr current; +}; + +struct __rssdoc { + xmlDocPtr xmldoc; + xmlNodePtr channel; + xmlNodePtr firstitem; +}; + +struct __rss { + struct __rssdoc doc; + struct __walk_info info; +}; + +static xmlNodePtr getchild(xmlNodePtr node, const char *name) { + + if (node) { + xmlNodePtr it; + + for(it = node->children; it; it = it->next) { + if (!xmlStrcmp(it->name, name)) + return it; + } + } + return NULL; +} + +static const char* getnodetext(xmlNodePtr node) { + + if (node) { + if (node->type == XML_ELEMENT_NODE) + node = node->children; + if (node->type == XML_TEXT_NODE) + return (const char *) node->content; + } + return ""; +} + +static int validate(struct __rssdoc *doc) { + + xmlChar *attr; + xmlNodePtr node; + + if (!doc->xmldoc) + return -1; + + node = doc->xmldoc->children; + + if (xmlStrcmp(node->name, "rss")) + return -1; + attr = xmlGetProp(node, "version"); + if (!attr) + return -1; + + /* get channel node */ + node = xmlFirstElementChild(node); + + if (!node || xmlStrcmp(node->name, "channel")) + return -1; + doc->channel = node; + + /* get first item */ + node = getchild(node, "item"); + if (!node) + return -1; + + doc->firstitem = node; + + while(node) { + if (xmlStrcmp(node->name, "item")) + return -1; + + node = xmlNextElementSibling(node); + } + return 0; +} + +rss_t rss_parse(void *buf, size_t size) { + + rss_t rss = malloc(sizeof(struct __rss)); + + rss->doc.xmldoc = xmlReadMemory(buf, size, "noname.xml", NULL, 0); + + if (validate(&rss->doc) < 0) { + rss_free(rss); + return NULL; + } + + rss->info.current = rss->doc.firstitem; + + return rss; +} + +void rss_free(rss_t r) { + + if (!r) + return; + if (r->doc.xmldoc) + xmlFreeDoc(r->doc.xmldoc); + free(r); +} + +int rss_walk_next(rss_t rss, struct rss_item *item) { + + if (rss && rss->info.current) { + /* fill item */ + xmlNodePtr cur = rss->info.current; + item->title = getnodetext(getchild(cur, "title")); + item->link = getnodetext(getchild(cur, "link")); + + rss->info.current = xmlNextElementSibling(rss->info.current); + return 1; + } + return 0; +} + +int rss_walk_reset(rss_t rss) { + + if (rss) { + rss->info.current = rss->doc.firstitem; + return 1; + } + return 0; +} diff --git a/rss.h b/rss.h new file mode 100644 index 0000000..0ed677e --- /dev/null +++ b/rss.h @@ -0,0 +1,25 @@ + +#ifndef RSS_ITEM_H +#define RSS_ITEM_H + +#include + +typedef struct __rss* rss_t; +typedef struct __walk_info* rss_walk_info; + +struct rss_item { + const char *title; + const char *link; +}; + +rss_t rss_parse(void *buf, size_t size); + +void rss_free(rss_t r); + +/* walking interface */ + +int rss_walk_next(rss_t rss, struct rss_item *item); + +int rss_walk_reset(rss_t rss); + +#endif