diff --git a/configure.in b/configure.in index e75ff81..2eaef30 100644 --- a/configure.in +++ b/configure.in @@ -428,6 +428,34 @@ elif test "x$force_ipv6" = "xyes"; then AC_MSG_ERROR([IPv6 support requested but not found; aborting]) fi +dnl +dnl Extended Attribute support +dnl +AC_ARG_ENABLE(xattr, +[ --disable-xattr disable support for POSIX Extended Attributes], +ENABLE_XATTR=$enableval, ENABLE_XATTR=yes) + +case "$host_os" in + *linux* | *darwin*) xattr_syscalls="fsetxattr fremovexattr" ;; + *irix*) xattr_syscalls="attr_setf attr_removef" ;; + *) AC_MSG_NOTICE([Disabling Extended Attribute support: yous system is + not know to support extended attributes.]) + ENABLE_XATTR=no +esac + +for syscall in $xattr_syscalls; do + if test "X${ENABLE_XATTR}" = "Xyes"; then + AC_CHECK_FUNCS($syscall, [], [ + AC_MSG_NOTICE([Disabling Extended Attribute support: your system does + not support $syscall(2)]) + ENABLE_XATTR=no + ]) + fi +done + +test x"${ENABLE_XATTR}" = xyes && AC_DEFINE([ENABLE_XATTR], 1, + [Define if you want file metadata storing into PEA compiled in.]) + dnl dnl Set of available languages. diff --git a/src/http.c b/src/http.c index 3b254de..0dabb30 100644 --- a/src/http.c +++ b/src/http.c @@ -63,6 +63,26 @@ as that of the covered work. */ #include "convert.h" #include "spider.h" +#ifdef ENABLE_XATTR +#ifdef __linux +#include +#endif +#ifdef __APPLE__ +#include +#define fsetxattr(file, name, buffer, size, flags) \ + fsetxattr((file), (name), (buffer), (size), 0, (flags)) +#define fremovexattr(file, name) \ + fremovexattr((file), (name), 0) +#endif +#ifdef __sgi +#include +#define fsetxattr(file, name, buffer, size, flags) \ + attr_setf((file), (name), (buffer), (size), (flags)) +#define fremovexattr(file, name) \ + attr_removef((file), (name), 0) +#endif +#endif + #ifdef TESTING #include "test.h" #endif @@ -1348,6 +1368,178 @@ free_hstat (struct http_stat *hs) #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \ || opt.dirstruct || opt.output_document) +#ifdef ENABLE_XATTR +/* Saves PEA with name and value to file under descriptor fd. If the value is + * NULL, it will remove the PEA. + * Return 0 on success, -1 on error. Failed removal while PEA not supported is + * not considered as error. + */ +int resetpea(const int fd, const char *name, const char *value) { + if (!name) return -1; + if (value) + { + if (fsetxattr(fd, name, value, strlen(value), 0)) + { + logprintf (LOG_NOTQUIET, _("Saving PEA %s=`%s' failed: %s\n"), + name, value, strerror(errno)); + return -1; + } + } + else + { + if (fremovexattr(fd, name) && errno!=ENODATA && errno!=ENOTSUP) + { + logprintf(LOG_NOTQUIET, _("Removing PEA %s failed: %s\n"), + name, strerror(errno)); + return -1; + } + } + return 0; +} + + +/* Searches parameter name in header. Name of the parameter consists of lower + * or upper letters with preserved offset. The lower and upper has to have the + * same length. (Because strcasestr(3) is non-standard and is locale dependend.) + * Returns pointer to the start of the start of the parameter name if found, + * NULL otherwise */ +const char *findparameter(const char *header, const char *lower, + const char *upper) { + const char *pos = header; + + if (!header || !lower || !upper) return NULL; + + while (*pos != '\0') + { + /* skip leading spaces */ + while (ISSPACE(*pos)) pos++; + + /* check for name */ + int i; + for (i=0; i < strlen(lower) && + (pos[i] == lower[i] || pos[i] == upper[i]); i++); + if (i == strlen(lower) && pos[i] == '=') + /* We found the parameter */ + return pos; + + /* This token is not the paramter. Skip to the next parameter. */ + /* We are looking for semicolon which is not inside quoted-string. */ + pos+=i; + char *delim = strpbrk(pos, "\";"); + if (!delim) + /* No next parameter */ + return NULL; + if (*delim == '"') + { + /* Curent parameter has quoted-string value */ + delim = strchr(delim+1, '"'); + if (!delim) + { + logprintf(LOG_ALWAYS, + _("Unclosed quoted-string. The HTTP header " + "Content-Type is invalid: `%s''\n"), header); + return NULL; + } + delim = strchr(delim+1, ';'); + if (!delim) + /* No next parameter */ + return NULL; + } + /* Move behind the semicolon */ + pos=delim+1; + } + /* We will reach this line if the last character of header is semicolon */ + return NULL; +} + + +/* Save long term metatdata (Content-Type, Content-Encoding, charset) from HTTP + * response to user POSIX Extended Attributes of retrieved file. + * Returns 0 on success, -1 on error. + * + * For more details about user PEA namespace see + * [http://freedesktop.org/wiki/CommonExtendedAttributes] and + * [http://0pointer.de/lennart/projects/mod_mime_xattr/]. + * */ +int savepea(const struct response *resp, const int fd) { + char *type, *encoding; + char *eo_mimetype, *so_charset=NULL, *eo_charset; + int retval; + + if (!resp) return -1; + + encoding = resp_header_strdup (resp, "Content-Encoding"); + type = resp_header_strdup (resp, "Content-Type"); + + if (type) + { + char *semicolon = strchr (type, ';'); + if (semicolon) + { + /* Cut off parameteres (like charset) from MIME type*/ + eo_mimetype = semicolon; + while (eo_mimetype > type && ISSPACE (eo_mimetype[-1])) + --eo_mimetype; + *eo_mimetype = '\0'; + + /* Find the charset, See RFC2068 */ + so_charset = (char *) findparameter( + semicolon+1, "charset", "CHARSET"); + if (so_charset) + { + so_charset+=8; /* length of "charset=" */ + + if (*so_charset == '"') + /* parameter value can be quoted-string */ + { + so_charset++; + eo_charset = strchr(so_charset, '"'); + if (eo_charset) + *eo_charset = '\0'; + else + { + logprintf(LOG_ALWAYS, + _("Unclosed charset parameter value in " + "Content-Type header: `%s'\n"), + semicolon+1); + so_charset = NULL; + } + } + + else + /* or parameter value is normal word delimited with next + * semicolon or end of string */ + { + eo_charset = strchr(so_charset, ';'); + if (eo_charset) + { + while (eo_charset > so_charset && + ISSPACE (eo_charset[-1])) + --eo_charset; + *eo_charset = '\0'; + } + } + } + } + } + + + /* Set the attributes. Error is not fatal but will be reported. */ + retval = + resetpea(fd, "user.mime_type", type) | + resetpea(fd, "user.mime_encoding", encoding) | + resetpea(fd, "user.charset", so_charset); + + xfree_null (type); + type = NULL; + xfree_null (encoding); + encoding = NULL; + + return retval; +} +#endif + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -2074,7 +2266,6 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); contlen = last_byte_pos - first_byte_pos + 1; } } - resp_free (resp); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -2278,6 +2469,14 @@ File `%s' already there; not retrieving.\n\n"), hs->local_file); logprintf (LOG_NOTQUIET, _("Saving to: `%s'\n"), HYPHENP (hs->local_file) ? "STDOUT" : hs->local_file); } + +#ifdef ENABLE_XATTR + /* Save long term metatdata (Content-Type, Encoding, Charset) from HTTP + * response to user POSIX Extended Attributes */ + savepea(resp, fileno(fp)); +#endif + /* We don't need response anymore */ + resp_free (resp); /* This confuses the timestamping code that checks for file size. #### The timestamping code should be smarter about file size. */