From cfa333370a7950cd348757bfc20f25f43a9b161a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Sun, 4 Sep 2011 21:25:37 +0200 Subject: [PATCH] PEA support It allows wget to store some file meta-data retrieved from HTTP into POSIX extended attributes. Current implementation does following mapping: Content-Type => user.mime_type Content-Type with charset parameter => user.charset Encoding => user.mime_encoding --- configure.ac | 30 ++++++++ src/config.h.in | 3 + src/http.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 1 deletions(-) diff --git a/configure.ac b/configure.ac index 9007a0e..bd848e6 100644 --- a/configure.ac +++ b/configure.ac @@ -517,6 +517,36 @@ AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"]) dnl +dnl Extended Attribute support +dnl + +AC_ARG_ENABLE(xattr, +[ --disable-xattr disable support for POSIX Extended Attributes], +ENABLE_XATTR=$enableval, ENABLE_XATTR=yes) + +case "$host_os" in + *linux* | *darwin*) xattr_syscalls="fsetxattr fremovexattr" ;; + *irix*) xattr_syscalls="attr_setf attr_removef" ;; + *) AC_MSG_NOTICE([Disabling Extended Attribute support: your system is + not known to support extended attributes.]) + ENABLE_XATTR=no +esac + +for syscall in $xattr_syscalls; do + if test "X${ENABLE_XATTR}" = "Xyes"; then + AC_CHECK_FUNCS($syscall, [], [ + AC_MSG_NOTICE([Disabling Extended Attribute support: your system does + not support $syscall(2)]) + ENABLE_XATTR=no + ]) + fi +done + +test x"${ENABLE_XATTR}" = xyes && AC_DEFINE([ENABLE_XATTR], 1, + [Define if you want file meta-data storing into PEA compiled in.]) + + +dnl dnl Create output dnl AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile util/Makefile diff --git a/src/config.h.in b/src/config.h.in index 8a013ca..478a312 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -41,6 +41,9 @@ /* Define if IRI support is enabled. */ #undef ENABLE_IRI +/* Define if you want storing file meta-data into PEA compiled in. */ +#undef ENABLE_XATTR + /* Define to 1 if translation of program messages to the user's native language is requested. */ #undef ENABLE_NLS diff --git a/src/http.c b/src/http.c index fc88118..ac68516 100644 --- a/src/http.c +++ b/src/http.c @@ -59,6 +59,26 @@ as that of the covered work. */ #include "convert.h" #include "spider.h" +#ifdef ENABLE_XATTR +#ifdef __linux +#include +#endif /* def __linux */ +#ifdef __APPLE__ +#include +#define fsetxattr(file, name, buffer, size, flags) \ + fsetxattr((file), (name), (buffer), (size), 0, (flags)) +#define fremovexattr(file, name) \ + fremovexattr((file), (name), 0) +#endif /* def __APPLE__ */ +#ifdef __sgi +#include +#define fsetxattr(file, name, buffer, size, flags) \ + attr_setf((file), (name), (buffer), (size), (flags)) +#define fremovexattr(file, name) \ + attr_removef((file), (name), 0) +#endif /* def __sgi */ +#endif /* def ENABLE_XATTR */ + #ifdef TESTING #include "test.h" #endif @@ -1495,6 +1515,178 @@ File %s already there; not retrieving.\n\n"), quote (filename)); #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \ || opt.dirstruct || opt.output_document) +#ifdef ENABLE_XATTR +/* Saves PEA with name and value to file under descriptor fd. If the value is + * NULL, it will remove the PEA. + * Return 0 on success, -1 on error. Failed removal while PEA not supported is + * not considered as error. + */ +int resetpea(const int fd, const char *name, const char *value) { + if (!name) return -1; + if (value) + { + if (fsetxattr(fd, name, value, strlen(value), 0)) + { + logprintf (LOG_NOTQUIET, _("Saving PEA %s=%s failed: %s\n"), + name, quote (value), strerror(errno)); + return -1; + } + } + else + { + if (fremovexattr(fd, name) && errno!=ENODATA && errno!=ENOTSUP) + { + logprintf(LOG_NOTQUIET, _("Removing PEA %s failed: %s\n"), + name, strerror(errno)); + return -1; + } + } + return 0; +} + + +/* Searches parameter name in header. Name of the parameter consists of lower + * or upper letters with preserved offset. The lower and upper has to have the + * same length. (Because strcasestr(3) is non-standard and is locale dependend.) + * Returns pointer to the start of the start of the parameter name if found, + * NULL otherwise */ +const char *findparameter(const char *header, const char *lower, + const char *upper) { + const char *pos = header; + + if (!header || !lower || !upper) return NULL; + + while (*pos != '\0') + { + /* skip leading spaces */ + while (c_isspace(*pos)) pos++; + + /* check for name */ + int i; + for (i=0; i < strlen(lower) && + (pos[i] == lower[i] || pos[i] == upper[i]); i++); + if (i == strlen(lower) && pos[i] == '=') + /* We found the parameter */ + return pos; + + /* This token is not the paramter. Skip to the next parameter. */ + /* We are looking for semicolon which is not inside quoted-string. */ + pos+=i; + char *delim = strpbrk(pos, "\";"); + if (!delim) + /* No next parameter */ + return NULL; + if (*delim == '"') + { + /* Curent parameter has quoted-string value */ + delim = strchr(delim+1, '"'); + if (!delim) + { + logprintf(LOG_ALWAYS, + _("Unclosed quoted-string. The HTTP header " + "Content-Type is invalid: `%s''\n"), header); + return NULL; + } + delim = strchr(delim+1, ';'); + if (!delim) + /* No next parameter */ + return NULL; + } + /* Move behind the semicolon */ + pos=delim+1; + } + /* We will reach this line if the last character of header is semicolon */ + return NULL; +} + + +/* Save long term metatdata (Content-Type, Content-Encoding, charset) from HTTP + * response to user POSIX Extended Attributes of retrieved file. + * Returns 0 on success, -1 on error. + * + * For more details about user PEA namespace see + * [http://freedesktop.org/wiki/CommonExtendedAttributes] and + * [http://0pointer.de/lennart/projects/mod_mime_xattr/]. + * */ +int savepea(const struct response *resp, const int fd) { + char *type, *encoding; + char *eo_mimetype, *so_charset=NULL, *eo_charset; + int retval; + + if (!resp) return -1; + + encoding = resp_header_strdup (resp, "Content-Encoding"); + type = resp_header_strdup (resp, "Content-Type"); + + if (type) + { + char *semicolon = strchr (type, ';'); + if (semicolon) + { + /* Cut off parameteres (like charset) from MIME type*/ + eo_mimetype = semicolon; + while (eo_mimetype > type && c_isspace (eo_mimetype[-1])) + --eo_mimetype; + *eo_mimetype = '\0'; + + /* Find the charset, See RFC2068 */ + so_charset = (char *) findparameter( + semicolon+1, "charset", "CHARSET"); + if (so_charset) + { + so_charset+=8; /* length of "charset=" */ + + if (*so_charset == '"') + /* parameter value can be quoted-string */ + { + so_charset++; + eo_charset = strchr(so_charset, '"'); + if (eo_charset) + *eo_charset = '\0'; + else + { + logprintf(LOG_ALWAYS, + _("Unclosed charset parameter value in " + "Content-Type header: `%s'\n"), + semicolon+1); + so_charset = NULL; + } + } + + else + /* or parameter value is normal word delimited with next + * semicolon or end of string */ + { + eo_charset = strchr(so_charset, ';'); + if (eo_charset) + { + while (eo_charset > so_charset && + c_isspace (eo_charset[-1])) + --eo_charset; + *eo_charset = '\0'; + } + } + } + } + } + + + /* Set the attributes. Error is not fatal but will be reported. */ + retval = + resetpea(fd, "user.mime_type", type) | + resetpea(fd, "user.mime_encoding", encoding) | + resetpea(fd, "user.charset", so_charset); + + xfree_null (type); + type = NULL; + xfree_null (encoding); + encoding = NULL; + + return retval; +} +#endif + + /* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it @@ -2303,7 +2495,6 @@ read_header: contlen = last_byte_pos - first_byte_pos + 1; } } - resp_free (resp); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -2560,6 +2751,14 @@ read_header: HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file)); } +#ifdef ENABLE_XATTR + /* Save long term metatdata (Content-Type, Encoding, Charset) from HTTP + * response to user POSIX Extended Attributes */ + savepea(resp, fileno(fp)); +#endif + /* We don't need response anymore */ + resp_free (resp); + /* This confuses the timestamping code that checks for file size. #### The timestamping code should be smarter about file size. */ if (opt.save_headers && hs->restval == 0) -- 1.7.3.4