Archived
1
0
Fork 0
This repository has been archived on 2026-05-10. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
dlight/dlight.c
Henrik Hautakoski 1350330dd2 dlhist: rename to proc-cache.
A new datastructure is about to take dlhist place. dlhist is currently
implemented as a mixture of an "process cache" that should record what
rss items has been processed (that is why the url is used as a unique
identifier), but right now it only stores an url if it has been
downloaded. A new datastructure that should be "download history"
shall be implemented, that will keep track of what title and where
it has been downloaded to. this will make it possible to only
download an rss title to a location once.

Splitting this datastructure into two separated structures is trivial
as a "process cache" will threat URL's as a unique identifier and
a "download history" will threat the title in an rss item as a
unique identifier (and also track it's destinations).

This commit does not change any functionality, I just rename
this to keep the "dlhist" prefix and source files clear for
when implementing the real dlhist.
2011-11-14 16:11:18 +01:00

130 lines
2.3 KiB
C

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include "env.h"
#include "error.h"
#include "cconf.h"
#include "proc-cache.h"
#include "filter.h"
#include "http.h"
#include "rss.h"
#define PROC_CACHE_PURGE_INTERVAL (60*60*6) /* 6 hours (in seconds) */
static int write_http_file(struct http_file *file, const char *dest) {
char path[4096];
int rc, fd;
snprintf(path, sizeof(path), "%s/%s",
dest, file->filename);
fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0664);
if (fd < 0 && errno != EEXIST) {
error("failed to write file: %s", path);
return -1;
}
rc = write(fd, file->data.block, file->data.len);
close(fd);
return rc;
}
static void process_items(rss_t rss, struct target *t) {
int i;
struct rss_item item;
while(rss_walk_next(rss, &item)) {
struct http_file *file = NULL;
if (proc_cache_lookup(item.link))
continue;
for(i=0; i < t->nr; i++) {
struct filter *filter = &t->filter[i];
if (!filter_match(filter->pattern, item.title))
continue;
/* fetch the file if we haven't already. */
if (file == NULL) {
file = http_fetch_file(item.link);
if (file == NULL) {
error("download failed");
continue;
}
}
/* save file to disk. */
if (write_http_file(file, filter->dest) < 0)
continue;
printf("Downloaded: %s (%s) to %s\n",
item.title, item.link, filter->dest);
proc_cache_update(item.link);
}
http_free_file(file);
}
}
static void process(struct cconf *config) {
int i;
struct buffer *data;
proc_cache_purge(PROC_CACHE_PURGE_INTERVAL);
for(i=0; i < config->nr; i++) {
struct target *t = config->target + i;
rss_t rss;
data = http_fetch_page(t->src);
if (!data)
continue;
rss = rss_parse(data->block, data->len);
if (!rss) {
error("failed to parse rss: %s", t->src);
continue;
}
process_items(rss, t);
rss_free(rss);
http_free(data);
}
}
int main(int argc, char *argv[]) {
struct cconf *config;
char configfile[4096];
snprintf(configfile, sizeof(configfile), "%s/%s",
env_get_dir(), "config");
config = cconf_read(configfile);
if (!config) {
perror(configfile);
return 1;
}
if (proc_cache_open() < 0)
return 1;
process(config);
proc_cache_close();
cconf_free(config);
return 0;
}