From 19bd8afa6b63dccadc2006292154f1633e2a29c9 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sat, 6 May 2017 16:36:16 +0200 Subject: some rework: - added old style 'redir' function and options along to a new libcurl internal 'follow' parameter 'curl' - moved picohttpparser to it's own subdirectory - added uriparser to be used instead of the home-grown parser in 'redir' --- .gitignore | 13 +- ACKNOWLEDGEMENTS | 13 + configure.ac | 38 +- plugins/Makefile.am | 12 +- plugins/check_curl.c | 320 ++++- plugins/picohttpparser.c | 620 --------- plugins/picohttpparser.h | 89 -- plugins/picohttpparser/Makefile.am | 3 + plugins/picohttpparser/picohttpparser.c | 620 +++++++++ plugins/picohttpparser/picohttpparser.h | 89 ++ plugins/uriparser/Makefile.am | 9 + plugins/uriparser/Uri.h | 777 +++++++++++ plugins/uriparser/UriBase.h | 197 +++ plugins/uriparser/UriCommon.c | 567 ++++++++ plugins/uriparser/UriCommon.h | 104 ++ plugins/uriparser/UriCompare.c | 168 +++ plugins/uriparser/UriDefsAnsi.h | 82 ++ plugins/uriparser/UriDefsConfig.h | 101 ++ plugins/uriparser/UriDefsUnicode.h | 82 ++ plugins/uriparser/UriEscape.c | 453 +++++++ plugins/uriparser/UriFile.c | 226 ++++ plugins/uriparser/UriIp4.c | 329 +++++ plugins/uriparser/UriIp4.h | 92 ++ plugins/uriparser/UriIp4Base.c | 96 ++ plugins/uriparser/UriIp4Base.h | 59 + plugins/uriparser/UriNormalize.c | 728 ++++++++++ plugins/uriparser/UriNormalizeBase.c | 119 ++ plugins/uriparser/UriNormalizeBase.h | 53 + plugins/uriparser/UriParse.c | 2241 +++++++++++++++++++++++++++++++ plugins/uriparser/UriParseBase.c | 90 ++ plugins/uriparser/UriParseBase.h | 55 + plugins/uriparser/UriQuery.c | 460 +++++++ plugins/uriparser/UriRecompose.c | 577 ++++++++ plugins/uriparser/UriResolve.c | 316 +++++ plugins/uriparser/UriShorten.c | 320 +++++ 35 files changed, 9370 insertions(+), 748 deletions(-) delete mode 100644 plugins/picohttpparser.c delete mode 100644 plugins/picohttpparser.h create mode 100644 plugins/picohttpparser/Makefile.am create mode 100644 plugins/picohttpparser/picohttpparser.c create mode 100644 plugins/picohttpparser/picohttpparser.h create mode 100644 plugins/uriparser/Makefile.am create mode 100644 plugins/uriparser/Uri.h create mode 100644 plugins/uriparser/UriBase.h create mode 100644 plugins/uriparser/UriCommon.c create mode 100644 plugins/uriparser/UriCommon.h create mode 100644 plugins/uriparser/UriCompare.c create mode 100644 plugins/uriparser/UriDefsAnsi.h create mode 100644 plugins/uriparser/UriDefsConfig.h create mode 100644 plugins/uriparser/UriDefsUnicode.h create mode 100644 plugins/uriparser/UriEscape.c create mode 100644 plugins/uriparser/UriFile.c create mode 100644 plugins/uriparser/UriIp4.c create mode 100644 plugins/uriparser/UriIp4.h create mode 100644 plugins/uriparser/UriIp4Base.c create mode 100644 plugins/uriparser/UriIp4Base.h create mode 100644 plugins/uriparser/UriNormalize.c create mode 100644 plugins/uriparser/UriNormalizeBase.c create mode 100644 plugins/uriparser/UriNormalizeBase.h create mode 100644 plugins/uriparser/UriParse.c create mode 100644 plugins/uriparser/UriParseBase.c create mode 100644 plugins/uriparser/UriParseBase.h create mode 100644 plugins/uriparser/UriQuery.c create mode 100644 plugins/uriparser/UriRecompose.c create mode 100644 plugins/uriparser/UriResolve.c create mode 100644 plugins/uriparser/UriShorten.c diff --git a/.gitignore b/.gitignore index 5fa4c813..1c8cbdec 100644 --- a/.gitignore +++ b/.gitignore @@ -202,7 +202,18 @@ NP-VERSION-FILE /plugins/negate /plugins/stamp-h* /plugins/urlize -/plugins/libpicohttpparser.a + +# /plugins/picohttpparser +/plugins/picohttpparser/Makefile +/plugins/picohttpparser/Makefile.in +/plugins/picohttpparser/.deps +/plugins/picohttpparser/libpicohttpparser.a + +# /plugins/uriparser +/plugins/uriparser/Makefile +/plugins/uriparser/Makefile.in +/plugins/uriparser/.deps +/plugins/uriparser/liburiparser.a # /plugins/t/ /plugins/t/*.tmp diff --git a/ACKNOWLEDGEMENTS b/ACKNOWLEDGEMENTS index 50c714c3..06e84e25 100644 --- a/ACKNOWLEDGEMENTS +++ b/ACKNOWLEDGEMENTS @@ -31,3 +31,16 @@ Gnulib team Copyright (C) 2001, 2003, 2004, 2006 Free Software Foundation, Inc http://www.gnu.org/software/gnulib/ Use of lib files that originally were used from coreutils + +Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + Shigeo Mitsunari +picohttpparser +https://github.com/h2o/picohttpparser +Use of the library for HTTP header parsing in check_curl. + +Copyright (C) 2007, Weijia Song +Copyright (C) 2007, Sebastian Pipping +All rights reserved. +uriparser - RFC 3986 URI parsing library +http://uriparser.sourceforge.net/ +Use of the library for URL parsing in check_curl. diff --git a/configure.ac b/configure.ac index 04ec5bde..c56163c1 100644 --- a/configure.ac +++ b/configure.ac @@ -385,19 +385,31 @@ if test "$ac_cv_header_wtsapi32_h" = "yes"; then AC_SUBST(WTSAPI32LIBS) fi -dnl Check for cURL library -LIBCURL_CHECK_CONFIG(yes, 7.15.2, [ - EXTRAS="$EXTRAS check_curl\$(EXEEXT)" - LIBCURLINCLUDE="$LIBCURL_CPPFLAGS" - LIBCURLLIBS="$LIBCURL" - LIBCURLCFLAGS="$LIBCURL_CPPFLAGS" - AC_SUBST(LIBCURLINCLUDE) - AC_SUBST(LIBCURLLIBS) - AC_SUBST(LIBCURLCFLAGS) -], [ - AC_MSG_WARN([Skipping curl plugin]) - AC_MSG_WARN([install libcurl libs to compile this plugin (see REQUIREMENTS).]) -]) +AC_ARG_ENABLE(check-curl, + AC_HELP_STRING([--enable-check-curl], + [Enables compilation of check_curl (default: no)]), + [enable_check_curl=$enableval], + [enable_check_curl=no]) +if test "$enable_check_curl" = "yes" ; then + dnl Check for cURL library + LIBCURL_CHECK_CONFIG(yes, 7.15.2, [ + EXTRAS="$EXTRAS check_curl\$(EXEEXT)" + LIBCURLINCLUDE="$LIBCURL_CPPFLAGS" + LIBCURLLIBS="$LIBCURL" + LIBCURLCFLAGS="$LIBCURL_CPPFLAGS" + AC_SUBST(LIBCURLINCLUDE) + AC_SUBST(LIBCURLLIBS) + AC_SUBST(LIBCURLCFLAGS) + AC_SUBST(PICOHTTPPARSER_DIR, picohttpparser) + AC_SUBST(URIPARSER_DIR, uriparser) + ], [ + AC_MSG_WARN([Skipping curl plugin]) + AC_MSG_WARN([install libcurl libs to compile this plugin (see REQUIREMENTS).]) + ]) +fi +AM_CONDITIONAL([WITH_CHECK_CURL], [test "$enable_check_curl" = "yes"]) +AM_COND_IF([WITH_CHECK_CURL], + [AC_CONFIG_FILES([plugins/picohttpparser/Makefile plugins/uriparser/Makefile])]) dnl Fallback to who(1) if the system doesn't provide an utmpx(5) interface if test "$ac_cv_header_utmpx_h" = "no" -a "$ac_cv_header_wtsapi32_h" = "no" diff --git a/plugins/Makefile.am b/plugins/Makefile.am index ff745c39..34a7da8b 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -40,17 +40,17 @@ EXTRA_PROGRAMS = check_mysql check_radius check_pgsql check_snmp check_hpjd \ check_nagios check_by_ssh check_dns check_nt check_ide_smart \ check_procs check_mysql_query check_apt check_dbi check_curl +SUBDIRS = @PICOHTTPPARSER_DIR@ @URIPARSER_DIR@ + EXTRA_DIST = t tests PLUGINHDRS = common.h -noinst_LIBRARIES = libnpcommon.a libpicohttpparser.a +noinst_LIBRARIES = libnpcommon.a libnpcommon_a_SOURCES = utils.c netutils.c sslutils.c runcmd.c \ popen.c utils.h netutils.h popen.h common.h runcmd.c runcmd.h -libpicohttpparser_a_SOURCES = picohttpparser.c - BASEOBJS = libnpcommon.a ../lib/libmonitoringplug.a ../gl/libgnu.a NETOBJS = $(BASEOBJS) $(EXTRA_NETOBLS) NETLIBS = $(NETOBJS) $(SOCKETLIBS) @@ -71,9 +71,9 @@ test-debug: check_apt_LDADD = $(BASEOBJS) check_cluster_LDADD = $(BASEOBJS) -check_curl_CFLAGS = $(AM_CFLAGS) $(LIBCURLCFLAGS) -check_curl_CPPFLAGS = $(AM_CPPFLAGS) $(LIBCURLINCLUDE) -check_curl_LDADD = $(NETLIBS) $(LIBCURLLIBS) $(SSLOBJS) libpicohttpparser.a +check_curl_CFLAGS = $(AM_CFLAGS) $(LIBCURLCFLAGS) -Ipicohttpparser -Iuriparser +check_curl_CPPFLAGS = $(AM_CPPFLAGS) $(LIBCURLINCLUDE) -Ipicohttpparser -Iuriparser +check_curl_LDADD = $(NETLIBS) $(LIBCURLLIBS) $(SSLOBJS) picohttpparser/libpicohttpparser.a uriparser/liburiparser.a check_dbi_LDADD = $(NETLIBS) $(DBILIBS) check_dig_LDADD = $(NETLIBS) check_disk_LDADD = $(BASEOBJS) diff --git a/plugins/check_curl.c b/plugins/check_curl.c index 5f6905f9..72db27ad 100644 --- a/plugins/check_curl.c +++ b/plugins/check_curl.c @@ -51,17 +51,35 @@ const char *email = "devel@monitoring-plugins.org"; #include "picohttpparser.h" +#include "uriparser/Uri.h" + +#include + #define MAKE_LIBCURL_VERSION(major, minor, patch) ((major)*0x10000 + (minor)*0x100 + (patch)) #define DEFAULT_BUFFER_SIZE 2048 #define DEFAULT_SERVER_URL "/" #define HTTP_EXPECT "HTTP/1." +#define DEFAULT_MAX_REDIRS 15 +#define INET_ADDR_MAX_SIZE INET6_ADDRSTRLEN enum { + MAX_IPV4_HOSTLENGTH = 255, HTTP_PORT = 80, HTTPS_PORT = 443, MAX_PORT = 65535 }; +enum { + STICKY_NONE = 0, + STICKY_HOST = 1, + STICKY_PORT = 2 +}; + +enum { + FOLLOW_HTTP_CURL = 0, + FOLLOW_LIBCURL = 1 +}; + /* for buffers for header and body */ typedef struct { char *buf; @@ -128,6 +146,8 @@ int verbose = 0; int show_extended_perfdata = FALSE; int min_page_len = 0; int max_page_len = 0; +int redir_depth = 0; +int max_depth = DEFAULT_MAX_REDIRS; char *http_method = NULL; char *http_post_data = NULL; char *http_content_type = NULL; @@ -155,8 +175,12 @@ char string_expect[MAX_INPUT_BUFFER] = ""; char server_expect[MAX_INPUT_BUFFER] = HTTP_EXPECT; int server_expect_yn = 0; char user_auth[MAX_INPUT_BUFFER] = ""; +char **http_opt_headers; +int http_opt_headers_count = 0; int display_html = FALSE; int onredirect = STATE_OK; +int followmethod = FOLLOW_HTTP_CURL; +int followsticky = STICKY_NONE; int use_ssl = FALSE; int use_sni = TRUE; int check_cert = FALSE; @@ -181,6 +205,7 @@ curlhelp_ssl_library ssl_library = CURLHELP_SSL_LIBRARY_UNKNOWN; int process_arguments (int, char**); void handle_curl_option_return_code (CURLcode res, const char* option); int check_http (void); +void redir (curlhelp_write_curlbuf*); void print_help (void); void print_usage (void); void print_curl_version (void); @@ -295,6 +320,8 @@ check_http (void) { int result = STATE_OK; int page_len = 0; + int i; + char *force_host_header = NULL; /* initialize curl */ if (curl_global_init (CURL_GLOBAL_DEFAULT) != CURLE_OK) @@ -371,6 +398,28 @@ check_http (void) snprintf (http_header, DEFAULT_BUFFER_SIZE, "Connection: close"); header_list = curl_slist_append (header_list, http_header); + /* check if Host header is explicitly set in options */ + if (http_opt_headers_count) { + for (i = 0; i < http_opt_headers_count ; i++) { + if (strncmp(http_opt_headers[i], "Host:", 5) == 0) { + force_host_header = http_opt_headers[i]; + } + } + } + + /* attach additional headers supplied by the user */ + /* optionally send any other header tag */ + if (http_opt_headers_count) { + for (i = 0; i < http_opt_headers_count ; i++) { + if (force_host_header != http_opt_headers[i]) { + header_list = curl_slist_append (header_list, http_opt_headers[i]); + } + } + /* This cannot be free'd here because a redirection will then try to access this and segfault */ + /* Covered in a testcase in tests/check_http.t */ + /* free(http_opt_headers); */ + } + /* set HTTP headers */ handle_curl_option_return_code (curl_easy_setopt( curl, CURLOPT_HTTPHEADER, header_list ), "CURLOPT_HTTPHEADER"); @@ -481,13 +530,29 @@ check_http (void) /* handle redirections */ if (onredirect == STATE_DEPENDENT) { - handle_curl_option_return_code (curl_easy_setopt (curl, CURLOPT_FOLLOWLOCATION, 1), "CURLOPT_FOLLOWLOCATION"); - /* TODO: handle the following aspects of redirection - CURLOPT_POSTREDIR: method switch - CURLINFO_REDIRECT_URL: custom redirect option - CURLOPT_REDIRECT_PROTOCOLS - CURLINFO_REDIRECT_COUNT - */ + if( followmethod == FOLLOW_LIBCURL ) { + handle_curl_option_return_code (curl_easy_setopt (curl, CURLOPT_FOLLOWLOCATION, 1), "CURLOPT_FOLLOWLOCATION"); + + /* default -1 is infinite, not good, could lead to zombie plugins! + Setting it to one bigger than maximal limit to handle errors nicely below + */ + handle_curl_option_return_code (curl_easy_setopt (curl, CURLOPT_MAXREDIRS, max_depth+1), "CURLOPT_MAXREDIRS"); + + /* for now allow only http and https (we are a http(s) check plugin in the end) */ +#if LIBCURL_VERSION_NUM >= MAKE_LIBCURL_VERSION(7, 19, 4) + handle_curl_option_return_code (curl_easy_setopt (curl, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS), "CURLOPT_REDIRECT_PROTOCOLS"); +#endif /* LIBCURL_VERSION_NUM >= MAKE_LIBCURL_VERSION(7, 19, 4) */ + + /* TODO: handle the following aspects of redirection, make them + * command line options too later: + CURLOPT_POSTREDIR: method switch + CURLINFO_REDIRECT_URL: custom redirect option + CURLOPT_REDIRECT_PROTOCOLS: allow people to step outside safe protocols + CURLINFO_REDIRECT_COUNT: get the number of redirects, print it, maybe a range option here is nice like for expected page size? + */ + } else { + // old style redirection is handled below + } } /* no-body */ @@ -533,8 +598,8 @@ check_http (void) printf ("**** REQUEST CONTENT ****\n%s\n", http_post_data); /* free header and server IP resolve lists, we don't need it anymore */ - curl_slist_free_all (header_list); - curl_slist_free_all (server_ips); + curl_slist_free_all (header_list); header_list = NULL; + curl_slist_free_all (server_ips); server_ips = NULL; /* Curl errors, result in critical Nagios state */ if (res != CURLE_OK) { @@ -691,18 +756,39 @@ GOT_FIRST_CERT: /* check redirected page if specified */ } else if (code >= 300) { if (onredirect == STATE_DEPENDENT) { - code = status_line.http_code; + if( followmethod == FOLLOW_LIBCURL ) { + code = status_line.http_code; + } else { + /* old check_http style redirection, if we come + * back here, we are in the same status as with + * the libcurl method + */ + redir (&header_buf); + } + } else { + /* this is a specific code in the command line to + * be returned when a redirection is encoutered + */ } result = max_state_alt (onredirect, result); - /* TODO: make sure the last status line has been - parsed into the status_line structure - */ /* all other codes are considered ok */ } else { result = STATE_OK; } } + /* libcurl redirection internally, handle error states here */ + if( followmethod == FOLLOW_LIBCURL ) { + handle_curl_option_return_code (curl_easy_getinfo (curl, CURLINFO_REDIRECT_COUNT, &redir_depth), "CURLINFO_REDIRECT_COUNT"); + if (verbose >= 2) + printf(_("* curl LIBINFO_REDIRECT_COUNT is %d\n"), redir_depth); + if (redir_depth > max_depth) { + snprintf (msg, DEFAULT_BUFFER_SIZE, "maximum redirection depth %d exceeded in libcurl", + max_depth); + die (STATE_WARNING, "HTTP WARNING - %s", msg); + } + } + /* check status codes, set exit status accordingly */ if( status_line.http_code != code ) { die (STATE_CRITICAL, _("HTTP CRITICAL HTTP/%d.%d %d %s - different HTTP codes (cUrl has %ld)\n"), @@ -813,6 +899,188 @@ GOT_FIRST_CERT: return result; } +int +uri_strcmp (const UriTextRangeA range, const char* s) +{ + if (!range.first) return -1; + if (range.afterLast - range.first < strlen (s)) return -1; + return strncmp (s, range.first, min( range.afterLast - range.first, strlen (s))); +} + +char* +uri_string (const UriTextRangeA range, char* buf, size_t buflen) +{ + if (!range.first) return "(null)"; + strncpy (buf, range.first, max (buflen, range.afterLast - range.first)); + buf[max (buflen, range.afterLast - range.first)] = '\0'; + buf[range.afterLast - range.first] = '\0'; + return buf; +} + +void +redir (curlhelp_write_curlbuf* header_buf) +{ + char *location = NULL; + curlhelp_statusline status_line; + struct phr_header headers[255]; + size_t nof_headers = 255; + size_t msglen; + char buf[DEFAULT_BUFFER_SIZE]; + char ipstr[INET_ADDR_MAX_SIZE]; + int new_port; + char *new_host; + char *new_url; + + int res = phr_parse_response (header_buf->buf, header_buf->buflen, + &status_line.http_minor, &status_line.http_code, &status_line.msg, &msglen, + headers, &nof_headers, 0); + + location = get_header_value (headers, nof_headers, "location"); + + if (verbose >= 2) + printf(_("* Seen redirect location %s\n"), location); + + if (++redir_depth > max_depth) + die (STATE_WARNING, + _("HTTP WARNING - maximum redirection depth %d exceeded - %s\n"), + max_depth, location, (display_html ? "" : "")); + + UriParserStateA state; + UriUriA uri; + state.uri = &uri; + if (uriParseUriA (&state, location) != URI_SUCCESS) { + if (state.errorCode == URI_ERROR_SYNTAX) { + die (STATE_UNKNOWN, + _("HTTP UNKNOWN - Could not parse redirect location '%s'%s\n"), + location, (display_html ? "" : "")); + } else if (state.errorCode == URI_ERROR_MALLOC) { + die (STATE_UNKNOWN, _("HTTP UNKNOWN - Could not allocate URL\n")); + } + } + + if (verbose >= 2) { + printf (_("** scheme: %s\n"), + uri_string (uri.scheme, buf, DEFAULT_BUFFER_SIZE)); + printf (_("** host: %s\n"), + uri_string (uri.hostText, buf, DEFAULT_BUFFER_SIZE)); + printf (_("** port: %s\n"), + uri_string (uri.portText, buf, DEFAULT_BUFFER_SIZE)); + if (uri.hostData.ip4) { + inet_ntop (AF_INET, uri.hostData.ip4->data, ipstr, sizeof (ipstr)); + printf (_("** IPv4: %s\n"), ipstr); + } + if (uri.hostData.ip6) { + inet_ntop (AF_INET, uri.hostData.ip6->data, ipstr, sizeof (ipstr)); + printf (_("** IPv6: %s\n"), ipstr); + } + if (uri.pathHead) { + printf (_("** path: ")); + const UriPathSegmentA* p = uri.pathHead; + for (; p; p = p->next) { + printf ("/%s", uri_string (p->text, buf, DEFAULT_BUFFER_SIZE)); + } + puts (""); + } + if (uri.query.first) { + printf (_("** query: %s\n"), + uri_string (uri.query, buf, DEFAULT_BUFFER_SIZE)); + } + if (uri.fragment.first) { + printf (_("** fragment: %s\n"), + uri_string (uri.fragment, buf, DEFAULT_BUFFER_SIZE)); + } + } + + use_ssl = !uri_strcmp (uri.scheme, "https"); + + /* we do a sloppy test here only, because uriparser would have failed + * above, if the port would be invalid, we just check for MAX_PORT + */ + if (uri.portText.first) { + new_port = atoi (uri_string (uri.portText, buf, DEFAULT_BUFFER_SIZE)); + } else { + new_port = HTTP_PORT; + if (use_ssl) + new_port = HTTPS_PORT; + } + if (new_port > MAX_PORT) + die (STATE_UNKNOWN, + _("HTTP UNKNOWN - Redirection to port above %d - %s\n"), + MAX_PORT, location, display_html ? "" : ""); + + /* by RFC 7231 relative URLs in Location should be taken relative to + * the original URL, so wy try to form a new absolute URL here + */ + if (!uri.scheme.first && !uri.hostText.first) { + /* TODO: implement */ + die (STATE_UNKNOWN, _("HTTP UNKNOWN - non-absolute location, not implemented yet!\n")); + new_host = strdup (host_name ? host_name : server_address); + } else { + new_host = strdup (uri_string (uri.hostText, buf, DEFAULT_BUFFER_SIZE)); + } + + new_url = (char *)calloc( 1, DEFAULT_BUFFER_SIZE); + if (uri.pathHead) { + const UriPathSegmentA* p = uri.pathHead; + for (; p; p = p->next) { + strncat (new_url, "/", DEFAULT_BUFFER_SIZE); + strncat (new_url, uri_string (p->text, buf, DEFAULT_BUFFER_SIZE), DEFAULT_BUFFER_SIZE); + } + } + + if (server_port==new_port && + !strncmp(server_address, new_host, MAX_IPV4_HOSTLENGTH) && + (host_name && !strncmp(host_name, new_host, MAX_IPV4_HOSTLENGTH)) && + !strcmp(server_url, new_url)) + die (STATE_WARNING, + _("HTTP WARNING - redirection creates an infinite loop - %s://%s:%d%s%s\n"), + use_ssl ? "https" : "http", new_host, new_port, new_url, (display_html ? "" : "")); + + /* set new values for redirected request */ + + free (host_name); + host_name = strndup (new_host, MAX_IPV4_HOSTLENGTH); + free(new_host); + + server_port = (unsigned short)new_port; + + /* reset virtual port */ + virtual_port = server_port; + + if (!(followsticky & STICKY_HOST)) { + free (server_address); + server_address = strndup (new_host, MAX_IPV4_HOSTLENGTH); + } + if (!(followsticky & STICKY_PORT)) { + server_port = new_port; + } + + free (server_url); + server_url = new_url; + + uriFreeUriMembersA (&uri); + + if (verbose) + printf (_("Redirection to %s://%s:%d%s\n"), use_ssl ? "https" : "http", + host_name ? host_name : server_address, server_port, server_url); + + /* TODO: the hash component MUST be taken from the original URL and + * attached to the URL in Location + */ + + check_http (); +} + +#if 0 + +int main(int argc, char *argv[]) { + + for (; i < argc; i++) { + + } + printf("\n"); +#endif + /* check whether a file exists */ void test_file (char *path) @@ -974,7 +1242,11 @@ process_arguments (int argc, char **argv) snprintf (user_agent, DEFAULT_BUFFER_SIZE, optarg); break; case 'k': /* Additional headers */ - header_list = curl_slist_append(header_list, optarg); + if (http_opt_headers_count == 0) + http_opt_headers = malloc (sizeof (char *) * (++http_opt_headers_count)); + else + http_opt_headers = realloc (http_opt_headers, sizeof (char *) * (++http_opt_headers_count)); + http_opt_headers[http_opt_headers_count - 1] = optarg; break; case 'L': /* show html link */ display_html = TRUE; @@ -1121,6 +1393,14 @@ process_arguments (int argc, char **argv) onredirect = STATE_UNKNOWN; else if (!strcmp (optarg, "follow")) onredirect = STATE_DEPENDENT; + else if (!strcmp (optarg, "stickyport")) + onredirect = STATE_DEPENDENT, followmethod = FOLLOW_HTTP_CURL, followsticky = STICKY_HOST|STICKY_PORT; + else if (!strcmp (optarg, "sticky")) + onredirect = STATE_DEPENDENT, followmethod = FOLLOW_HTTP_CURL, followsticky = STICKY_HOST; + else if (!strcmp (optarg, "follow")) + onredirect = STATE_DEPENDENT, followmethod = FOLLOW_HTTP_CURL, followsticky = STICKY_NONE; + else if (!strcmp (optarg, "curl")) + onredirect = STATE_DEPENDENT, followmethod = FOLLOW_LIBCURL; else usage2 (_("Invalid onredirect option"), optarg); if (verbose >= 2) printf(_("* Following redirects set to %s\n"), state_text(onredirect)); @@ -1264,7 +1544,6 @@ print_help (void) print_revision (progname, NP_VERSION); printf ("Copyright (c) 1999 Ethan Galstad \n"); - printf ("Copyright (c) 2017 Andreas Baumann \n"); printf (COPYRIGHT, copyright, email); printf ("%s\n", _("This plugin tests the HTTP service on the specified host. It can test")); @@ -1365,8 +1644,11 @@ print_help (void) printf (" %s\n", _("Print additional performance data")); printf (" %s\n", "-L, --link"); printf (" %s\n", _("Wrap output in HTML link (obsoleted by urlize)")); - printf (" %s\n", "-f, --onredirect="); - printf (" %s\n", _("How to handle redirected pages.")); + printf (" %s\n", "-f, --onredirect="); + printf (" %s\n", _("How to handle redirected pages. sticky is like follow but stick to the")); + printf (" %s\n", _("specified IP address. stickyport also ensures port stays the same.")); + printf (" %s\n", _("follow uses the old redirection algorithm of check_http.")); + printf (" %s\n", _("curl uses CURL_FOLLOWLOCATION built into libcurl.")); printf (" %s\n", "-m, --pagesize=INTEGER<:INTEGER>"); printf (" %s\n", _("Minimum page size required (bytes) : Maximum page size required (bytes)")); @@ -1436,7 +1718,7 @@ print_usage (void) printf (" %s -H | -I [-u ] [-p ]\n",progname); printf (" [-J ] [-K ] [--ca-cert ]\n"); printf (" [-w ] [-c ] [-t ] [-L] [-E] [-a auth]\n"); - printf (" [-f ]\n"); + printf (" [-f ]\n"); printf (" [-e ] [-d string] [-s string] [-l] [-r | -R ]\n"); printf (" [-P string] [-m :] [-4|-6] [-N] [-M ]\n"); printf (" [-A string] [-k string] [-S ] [--sni] [-C [,]]\n"); @@ -1739,7 +2021,7 @@ get_content_length (const curlhelp_write_curlbuf* header_buf, const curlhelp_wri content_length_s += strspn (content_length_s, " \t"); content_length = atoi (content_length_s); if (content_length != body_buf->buflen) { - /* TODO: should we warn if the actual and the reported body length doen't match? */ + /* TODO: should we warn if the actual and the reported body length don't match? */ } if (content_length_s) free (content_length_s); diff --git a/plugins/picohttpparser.c b/plugins/picohttpparser.c deleted file mode 100644 index 6a2d872d..00000000 --- a/plugins/picohttpparser.c +++ /dev/null @@ -1,620 +0,0 @@ -/* - * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, - * Shigeo Mitsunari - * - * The software is licensed under either the MIT License (below) or the Perl - * license. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#ifdef __SSE4_2__ -#ifdef _MSC_VER -#include -#else -#include -#endif -#endif -#include "picohttpparser.h" - -/* $Id: a707070d11d499609f99d09f97535642cec910a8 $ */ - -#if __GNUC__ >= 3 -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) -#else -#define likely(x) (x) -#define unlikely(x) (x) -#endif - -#ifdef _MSC_VER -#define ALIGNED(n) _declspec(align(n)) -#else -#define ALIGNED(n) __attribute__((aligned(n))) -#endif - -#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) - -#define CHECK_EOF() \ - if (buf == buf_end) { \ - *ret = -2; \ - return NULL; \ - } - -#define EXPECT_CHAR_NO_CHECK(ch) \ - if (*buf++ != ch) { \ - *ret = -1; \ - return NULL; \ - } - -#define EXPECT_CHAR(ch) \ - CHECK_EOF(); \ - EXPECT_CHAR_NO_CHECK(ch); - -#define ADVANCE_TOKEN(tok, toklen) \ - do { \ - const char *tok_start = buf; \ - static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \ - int found2; \ - buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \ - if (!found2) { \ - CHECK_EOF(); \ - } \ - while (1) { \ - if (*buf == ' ') { \ - break; \ - } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ - if ((unsigned char)*buf < '\040' || *buf == '\177') { \ - *ret = -1; \ - return NULL; \ - } \ - } \ - ++buf; \ - CHECK_EOF(); \ - } \ - tok = tok_start; \ - toklen = buf - tok_start; \ - } while (0) - -static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" - "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" - "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; - -static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) -{ - *found = 0; -#if __SSE4_2__ - if (likely(buf_end - buf >= 16)) { - __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); - - size_t left = (buf_end - buf) & ~15; - do { - __m128i b16 = _mm_loadu_si128((const __m128i *)buf); - int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); - if (unlikely(r != 16)) { - buf += r; - *found = 1; - break; - } - buf += 16; - left -= 16; - } while (likely(left != 0)); - } -#else - /* suppress unused parameter warning */ - (void)buf_end; - (void)ranges; - (void)ranges_size; -#endif - return buf; -} - -static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) -{ - const char *token_start = buf; - -#ifdef __SSE4_2__ - static const char ranges1[] = "\0\010" - /* allow HT */ - "\012\037" - /* allow SP and up to but not including DEL */ - "\177\177" - /* allow chars w. MSB set */ - ; - int found; - buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); - if (found) - goto FOUND_CTL; -#else - /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ - while (likely(buf_end - buf >= 8)) { -#define DOIT() \ - do { \ - if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ - goto NonPrintable; \ - ++buf; \ - } while (0) - DOIT(); - DOIT(); - DOIT(); - DOIT(); - DOIT(); - DOIT(); - DOIT(); - DOIT(); -#undef DOIT - continue; - NonPrintable: - if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { - goto FOUND_CTL; - } - ++buf; - } -#endif - for (;; ++buf) { - CHECK_EOF(); - if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { - if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { - goto FOUND_CTL; - } - } - } -FOUND_CTL: - if (likely(*buf == '\015')) { - ++buf; - EXPECT_CHAR('\012'); - *token_len = buf - 2 - token_start; - } else if (*buf == '\012') { - *token_len = buf - token_start; - ++buf; - } else { - *ret = -1; - return NULL; - } - *token = token_start; - - return buf; -} - -static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) -{ - int ret_cnt = 0; - buf = last_len < 3 ? buf : buf + last_len - 3; - - while (1) { - CHECK_EOF(); - if (*buf == '\015') { - ++buf; - CHECK_EOF(); - EXPECT_CHAR('\012'); - ++ret_cnt; - } else if (*buf == '\012') { - ++buf; - ++ret_cnt; - } else { - ++buf; - ret_cnt = 0; - } - if (ret_cnt == 2) { - return buf; - } - } - - *ret = -2; - return NULL; -} - -#define PARSE_INT(valp_, mul_) \ - if (*buf < '0' || '9' < *buf) { \ - buf++; \ - *ret = -1; \ - return NULL; \ - } \ - *(valp_) = (mul_) * (*buf++ - '0'); - -#define PARSE_INT_3(valp_) \ - do { \ - int res_ = 0; \ - PARSE_INT(&res_, 100) \ - *valp_ = res_; \ - PARSE_INT(&res_, 10) \ - *valp_ += res_; \ - PARSE_INT(&res_, 1) \ - *valp_ += res_; \ - } while (0) - -/* returned pointer is always within [buf, buf_end), or null */ -static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) -{ - /* we want at least [HTTP/1.] to try to parse */ - if (buf_end - buf < 9) { - *ret = -2; - return NULL; - } - EXPECT_CHAR_NO_CHECK('H'); - EXPECT_CHAR_NO_CHECK('T'); - EXPECT_CHAR_NO_CHECK('T'); - EXPECT_CHAR_NO_CHECK('P'); - EXPECT_CHAR_NO_CHECK('/'); - EXPECT_CHAR_NO_CHECK('1'); - EXPECT_CHAR_NO_CHECK('.'); - PARSE_INT(minor_version, 1); - return buf; -} - -static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, - size_t max_headers, int *ret) -{ - for (;; ++*num_headers) { - CHECK_EOF(); - if (*buf == '\015') { - ++buf; - EXPECT_CHAR('\012'); - break; - } else if (*buf == '\012') { - ++buf; - break; - } - if (*num_headers == max_headers) { - *ret = -1; - return NULL; - } - if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { - /* parsing name, but do not discard SP before colon, see - * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ - headers[*num_headers].name = buf; - static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */ - "\"\"" /* 0x22 */ - "()" /* 0x28,0x29 */ - ",," /* 0x2c */ - "//" /* 0x2f */ - ":@" /* 0x3a-0x40 */ - "[]" /* 0x5b-0x5d */ - "{\377"; /* 0x7b-0xff */ - int found; - buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); - if (!found) { - CHECK_EOF(); - } - while (1) { - if (*buf == ':') { - break; - } else if (!token_char_map[(unsigned char)*buf]) { - *ret = -1; - return NULL; - } - ++buf; - CHECK_EOF(); - } - if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) { - *ret = -1; - return NULL; - } - ++buf; - for (;; ++buf) { - CHECK_EOF(); - if (!(*buf == ' ' || *buf == '\t')) { - break; - } - } - } else { - headers[*num_headers].name = NULL; - headers[*num_headers].name_len = 0; - } - if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) { - return NULL; - } - } - return buf; -} - -static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, - size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, - size_t max_headers, int *ret) -{ - /* skip first empty line (some clients add CRLF after POST content) */ - CHECK_EOF(); - if (*buf == '\015') { - ++buf; - EXPECT_CHAR('\012'); - } else if (*buf == '\012') { - ++buf; - } - - /* parse request line */ - ADVANCE_TOKEN(*method, *method_len); - ++buf; - ADVANCE_TOKEN(*path, *path_len); - ++buf; - if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { - return NULL; - } - if (*buf == '\015') { - ++buf; - EXPECT_CHAR('\012'); - } else if (*buf == '\012') { - ++buf; - } else { - *ret = -1; - return NULL; - } - - return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); -} - -int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, - size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) -{ - const char *buf = buf_start, *buf_end = buf_start + len; - size_t max_headers = *num_headers; - int r; - - *method = NULL; - *method_len = 0; - *path = NULL; - *path_len = 0; - *minor_version = -1; - *num_headers = 0; - - /* if last_len != 0, check if the request is complete (a fast countermeasure - againt slowloris */ - if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { - return r; - } - - if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, - &r)) == NULL) { - return r; - } - - return (int)(buf - buf_start); -} - -static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, - size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) -{ - /* parse "HTTP/1.x" */ - if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { - return NULL; - } - /* skip space */ - if (*buf++ != ' ') { - *ret = -1; - return NULL; - } - /* parse status code, we want at least [:digit:][:digit:][:digit:] to try to parse */ - if (buf_end - buf < 4) { - *ret = -2; - return NULL; - } - PARSE_INT_3(status); - - /* skip space */ - if (*buf++ != ' ') { - *ret = -1; - return NULL; - } - /* get message */ - if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { - return NULL; - } - - return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); -} - -int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, - struct phr_header *headers, size_t *num_headers, size_t last_len) -{ - const char *buf = buf_start, *buf_end = buf + len; - size_t max_headers = *num_headers; - int r; - - *minor_version = -1; - *status = 0; - *msg = NULL; - *msg_len = 0; - *num_headers = 0; - - /* if last_len != 0, check if the response is complete (a fast countermeasure - against slowloris */ - if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { - return r; - } - - if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { - return r; - } - - return (int)(buf - buf_start); -} - -int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) -{ - const char *buf = buf_start, *buf_end = buf + len; - size_t max_headers = *num_headers; - int r; - - *num_headers = 0; - - /* if last_len != 0, check if the response is complete (a fast countermeasure - against slowloris */ - if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { - return r; - } - - if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { - return r; - } - - return (int)(buf - buf_start); -} - -enum { - CHUNKED_IN_CHUNK_SIZE, - CHUNKED_IN_CHUNK_EXT, - CHUNKED_IN_CHUNK_DATA, - CHUNKED_IN_CHUNK_CRLF, - CHUNKED_IN_TRAILERS_LINE_HEAD, - CHUNKED_IN_TRAILERS_LINE_MIDDLE -}; - -static int decode_hex(int ch) -{ - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } else if ('A' <= ch && ch <= 'F') { - return ch - 'A' + 0xa; - } else if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 0xa; - } else { - return -1; - } -} - -ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) -{ - size_t dst = 0, src = 0, bufsz = *_bufsz; - ssize_t ret = -2; /* incomplete */ - - while (1) { - switch (decoder->_state) { - case CHUNKED_IN_CHUNK_SIZE: - for (;; ++src) { - int v; - if (src == bufsz) - goto Exit; - if ((v = decode_hex(buf[src])) == -1) { - if (decoder->_hex_count == 0) { - ret = -1; - goto Exit; - } - break; - } - if (decoder->_hex_count == sizeof(size_t) * 2) { - ret = -1; - goto Exit; - } - decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; - ++decoder->_hex_count; - } - decoder->_hex_count = 0; - decoder->_state = CHUNKED_IN_CHUNK_EXT; - /* fallthru */ - case CHUNKED_IN_CHUNK_EXT: - /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ - for (;; ++src) { - if (src == bufsz) - goto Exit; - if (buf[src] == '\012') - break; - } - ++src; - if (decoder->bytes_left_in_chunk == 0) { - if (decoder->consume_trailer) { - decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; - break; - } else { - goto Complete; - } - } - decoder->_state = CHUNKED_IN_CHUNK_DATA; - /* fallthru */ - case CHUNKED_IN_CHUNK_DATA: { - size_t avail = bufsz - src; - if (avail < decoder->bytes_left_in_chunk) { - if (dst != src) - memmove(buf + dst, buf + src, avail); - src += avail; - dst += avail; - decoder->bytes_left_in_chunk -= avail; - goto Exit; - } - if (dst != src) - memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); - src += decoder->bytes_left_in_chunk; - dst += decoder->bytes_left_in_chunk; - decoder->bytes_left_in_chunk = 0; - decoder->_state = CHUNKED_IN_CHUNK_CRLF; - } - /* fallthru */ - case CHUNKED_IN_CHUNK_CRLF: - for (;; ++src) { - if (src == bufsz) - goto Exit; - if (buf[src] != '\015') - break; - } - if (buf[src] != '\012') { - ret = -1; - goto Exit; - } - ++src; - decoder->_state = CHUNKED_IN_CHUNK_SIZE; - break; - case CHUNKED_IN_TRAILERS_LINE_HEAD: - for (;; ++src) { - if (src == bufsz) - goto Exit; - if (buf[src] != '\015') - break; - } - if (buf[src++] == '\012') - goto Complete; - decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; - /* fallthru */ - case CHUNKED_IN_TRAILERS_LINE_MIDDLE: - for (;; ++src) { - if (src == bufsz) - goto Exit; - if (buf[src] == '\012') - break; - } - ++src; - decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; - break; - default: - assert(!"decoder is corrupt"); - } - } - -Complete: - ret = bufsz - src; -Exit: - if (dst != src) - memmove(buf + dst, buf + src, bufsz - src); - *_bufsz = dst; - return ret; -} - -int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) -{ - return decoder->_state == CHUNKED_IN_CHUNK_DATA; -} - -#undef CHECK_EOF -#undef EXPECT_CHAR -#undef ADVANCE_TOKEN diff --git a/plugins/picohttpparser.h b/plugins/picohttpparser.h deleted file mode 100644 index a8fad71d..00000000 --- a/plugins/picohttpparser.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, - * Shigeo Mitsunari - * - * The software is licensed under either the MIT License (below) or the Perl - * license. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef picohttpparser_h -#define picohttpparser_h - -#include - -#ifdef _MSC_VER -#define ssize_t intptr_t -#endif - -/* $Id: 67fd3ee74103ada60258d8a16e868f483abcca87 $ */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* contains name and value of a header (name == NULL if is a continuing line - * of a multiline header */ -struct phr_header { - const char *name; - size_t name_len; - const char *value; - size_t value_len; -}; - -/* returns number of bytes consumed if successful, -2 if request is partial, - * -1 if failed */ -int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, - int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); - -/* ditto */ -int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, - struct phr_header *headers, size_t *num_headers, size_t last_len); - -/* ditto */ -int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); - -/* should be zero-filled before start */ -struct phr_chunked_decoder { - size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ - char consume_trailer; /* if trailing headers should be consumed */ - char _hex_count; - char _state; -}; - -/* the function rewrites the buffer given as (buf, bufsz) removing the chunked- - * encoding headers. When the function returns without an error, bufsz is - * updated to the length of the decoded data available. Applications should - * repeatedly call the function while it returns -2 (incomplete) every time - * supplying newly arrived data. If the end of the chunked-encoded data is - * found, the function returns a non-negative number indicating the number of - * octets left undecoded at the tail of the supplied buffer. Returns -1 on - * error. - */ -ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); - -/* returns if the chunked decoder is in middle of chunked data */ -int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/plugins/picohttpparser/Makefile.am b/plugins/picohttpparser/Makefile.am new file mode 100644 index 00000000..5a610278 --- /dev/null +++ b/plugins/picohttpparser/Makefile.am @@ -0,0 +1,3 @@ +noinst_LIBRARIES = libpicohttpparser.a + +libpicohttpparser_a_SOURCES = picohttpparser.c diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c new file mode 100644 index 00000000..6a2d872d --- /dev/null +++ b/plugins/picohttpparser/picohttpparser.c @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#ifdef __SSE4_2__ +#ifdef _MSC_VER +#include +#else +#include +#endif +#endif +#include "picohttpparser.h" + +/* $Id: a707070d11d499609f99d09f97535642cec910a8 $ */ + +#if __GNUC__ >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + +#ifdef _MSC_VER +#define ALIGNED(n) _declspec(align(n)) +#else +#define ALIGNED(n) __attribute__((aligned(n))) +#endif + +#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) + +#define CHECK_EOF() \ + if (buf == buf_end) { \ + *ret = -2; \ + return NULL; \ + } + +#define EXPECT_CHAR_NO_CHECK(ch) \ + if (*buf++ != ch) { \ + *ret = -1; \ + return NULL; \ + } + +#define EXPECT_CHAR(ch) \ + CHECK_EOF(); \ + EXPECT_CHAR_NO_CHECK(ch); + +#define ADVANCE_TOKEN(tok, toklen) \ + do { \ + const char *tok_start = buf; \ + static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \ + int found2; \ + buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \ + if (!found2) { \ + CHECK_EOF(); \ + } \ + while (1) { \ + if (*buf == ' ') { \ + break; \ + } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ + if ((unsigned char)*buf < '\040' || *buf == '\177') { \ + *ret = -1; \ + return NULL; \ + } \ + } \ + ++buf; \ + CHECK_EOF(); \ + } \ + tok = tok_start; \ + toklen = buf - tok_start; \ + } while (0) + +static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" + "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" + "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + +static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) +{ + *found = 0; +#if __SSE4_2__ + if (likely(buf_end - buf >= 16)) { + __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); + + size_t left = (buf_end - buf) & ~15; + do { + __m128i b16 = _mm_loadu_si128((const __m128i *)buf); + int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); + if (unlikely(r != 16)) { + buf += r; + *found = 1; + break; + } + buf += 16; + left -= 16; + } while (likely(left != 0)); + } +#else + /* suppress unused parameter warning */ + (void)buf_end; + (void)ranges; + (void)ranges_size; +#endif + return buf; +} + +static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) +{ + const char *token_start = buf; + +#ifdef __SSE4_2__ + static const char ranges1[] = "\0\010" + /* allow HT */ + "\012\037" + /* allow SP and up to but not including DEL */ + "\177\177" + /* allow chars w. MSB set */ + ; + int found; + buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); + if (found) + goto FOUND_CTL; +#else + /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ + while (likely(buf_end - buf >= 8)) { +#define DOIT() \ + do { \ + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ + goto NonPrintable; \ + ++buf; \ + } while (0) + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); +#undef DOIT + continue; + NonPrintable: + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + ++buf; + } +#endif + for (;; ++buf) { + CHECK_EOF(); + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + } + } +FOUND_CTL: + if (likely(*buf == '\015')) { + ++buf; + EXPECT_CHAR('\012'); + *token_len = buf - 2 - token_start; + } else if (*buf == '\012') { + *token_len = buf - token_start; + ++buf; + } else { + *ret = -1; + return NULL; + } + *token = token_start; + + return buf; +} + +static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) +{ + int ret_cnt = 0; + buf = last_len < 3 ? buf : buf + last_len - 3; + + while (1) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + CHECK_EOF(); + EXPECT_CHAR('\012'); + ++ret_cnt; + } else if (*buf == '\012') { + ++buf; + ++ret_cnt; + } else { + ++buf; + ret_cnt = 0; + } + if (ret_cnt == 2) { + return buf; + } + } + + *ret = -2; + return NULL; +} + +#define PARSE_INT(valp_, mul_) \ + if (*buf < '0' || '9' < *buf) { \ + buf++; \ + *ret = -1; \ + return NULL; \ + } \ + *(valp_) = (mul_) * (*buf++ - '0'); + +#define PARSE_INT_3(valp_) \ + do { \ + int res_ = 0; \ + PARSE_INT(&res_, 100) \ + *valp_ = res_; \ + PARSE_INT(&res_, 10) \ + *valp_ += res_; \ + PARSE_INT(&res_, 1) \ + *valp_ += res_; \ + } while (0) + +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) +{ + /* we want at least [HTTP/1.] to try to parse */ + if (buf_end - buf < 9) { + *ret = -2; + return NULL; + } + EXPECT_CHAR_NO_CHECK('H'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('P'); + EXPECT_CHAR_NO_CHECK('/'); + EXPECT_CHAR_NO_CHECK('1'); + EXPECT_CHAR_NO_CHECK('.'); + PARSE_INT(minor_version, 1); + return buf; +} + +static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + for (;; ++*num_headers) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + break; + } else if (*buf == '\012') { + ++buf; + break; + } + if (*num_headers == max_headers) { + *ret = -1; + return NULL; + } + if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { + /* parsing name, but do not discard SP before colon, see + * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ + headers[*num_headers].name = buf; + static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */ + "\"\"" /* 0x22 */ + "()" /* 0x28,0x29 */ + ",," /* 0x2c */ + "//" /* 0x2f */ + ":@" /* 0x3a-0x40 */ + "[]" /* 0x5b-0x5d */ + "{\377"; /* 0x7b-0xff */ + int found; + buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); + if (!found) { + CHECK_EOF(); + } + while (1) { + if (*buf == ':') { + break; + } else if (!token_char_map[(unsigned char)*buf]) { + *ret = -1; + return NULL; + } + ++buf; + CHECK_EOF(); + } + if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) { + *ret = -1; + return NULL; + } + ++buf; + for (;; ++buf) { + CHECK_EOF(); + if (!(*buf == ' ' || *buf == '\t')) { + break; + } + } + } else { + headers[*num_headers].name = NULL; + headers[*num_headers].name_len = 0; + } + if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) { + return NULL; + } + } + return buf; +} + +static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + /* skip first empty line (some clients add CRLF after POST content) */ + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } + + /* parse request line */ + ADVANCE_TOKEN(*method, *method_len); + ++buf; + ADVANCE_TOKEN(*path, *path_len); + ++buf; + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } else { + *ret = -1; + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf_start + len; + size_t max_headers = *num_headers; + int r; + + *method = NULL; + *method_len = 0; + *path = NULL; + *path_len = 0; + *minor_version = -1; + *num_headers = 0; + + /* if last_len != 0, check if the request is complete (a fast countermeasure + againt slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, + &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, + size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) +{ + /* parse "HTTP/1.x" */ + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + /* skip space */ + if (*buf++ != ' ') { + *ret = -1; + return NULL; + } + /* parse status code, we want at least [:digit:][:digit:][:digit:] to try to parse */ + if (buf_end - buf < 4) { + *ret = -2; + return NULL; + } + PARSE_INT_3(status); + + /* skip space */ + if (*buf++ != ' ') { + *ret = -1; + return NULL; + } + /* get message */ + if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *minor_version = -1; + *status = 0; + *msg = NULL; + *msg_len = 0; + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +enum { + CHUNKED_IN_CHUNK_SIZE, + CHUNKED_IN_CHUNK_EXT, + CHUNKED_IN_CHUNK_DATA, + CHUNKED_IN_CHUNK_CRLF, + CHUNKED_IN_TRAILERS_LINE_HEAD, + CHUNKED_IN_TRAILERS_LINE_MIDDLE +}; + +static int decode_hex(int ch) +{ + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } else if ('A' <= ch && ch <= 'F') { + return ch - 'A' + 0xa; + } else if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } else { + return -1; + } +} + +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) +{ + size_t dst = 0, src = 0, bufsz = *_bufsz; + ssize_t ret = -2; /* incomplete */ + + while (1) { + switch (decoder->_state) { + case CHUNKED_IN_CHUNK_SIZE: + for (;; ++src) { + int v; + if (src == bufsz) + goto Exit; + if ((v = decode_hex(buf[src])) == -1) { + if (decoder->_hex_count == 0) { + ret = -1; + goto Exit; + } + break; + } + if (decoder->_hex_count == sizeof(size_t) * 2) { + ret = -1; + goto Exit; + } + decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; + ++decoder->_hex_count; + } + decoder->_hex_count = 0; + decoder->_state = CHUNKED_IN_CHUNK_EXT; + /* fallthru */ + case CHUNKED_IN_CHUNK_EXT: + /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + if (decoder->bytes_left_in_chunk == 0) { + if (decoder->consume_trailer) { + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + } else { + goto Complete; + } + } + decoder->_state = CHUNKED_IN_CHUNK_DATA; + /* fallthru */ + case CHUNKED_IN_CHUNK_DATA: { + size_t avail = bufsz - src; + if (avail < decoder->bytes_left_in_chunk) { + if (dst != src) + memmove(buf + dst, buf + src, avail); + src += avail; + dst += avail; + decoder->bytes_left_in_chunk -= avail; + goto Exit; + } + if (dst != src) + memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); + src += decoder->bytes_left_in_chunk; + dst += decoder->bytes_left_in_chunk; + decoder->bytes_left_in_chunk = 0; + decoder->_state = CHUNKED_IN_CHUNK_CRLF; + } + /* fallthru */ + case CHUNKED_IN_CHUNK_CRLF: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src] != '\012') { + ret = -1; + goto Exit; + } + ++src; + decoder->_state = CHUNKED_IN_CHUNK_SIZE; + break; + case CHUNKED_IN_TRAILERS_LINE_HEAD: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src++] == '\012') + goto Complete; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; + /* fallthru */ + case CHUNKED_IN_TRAILERS_LINE_MIDDLE: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + default: + assert(!"decoder is corrupt"); + } + } + +Complete: + ret = bufsz - src; +Exit: + if (dst != src) + memmove(buf + dst, buf + src, bufsz - src); + *_bufsz = dst; + return ret; +} + +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) +{ + return decoder->_state == CHUNKED_IN_CHUNK_DATA; +} + +#undef CHECK_EOF +#undef EXPECT_CHAR +#undef ADVANCE_TOKEN diff --git a/plugins/picohttpparser/picohttpparser.h b/plugins/picohttpparser/picohttpparser.h new file mode 100644 index 00000000..a8fad71d --- /dev/null +++ b/plugins/picohttpparser/picohttpparser.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef picohttpparser_h +#define picohttpparser_h + +#include + +#ifdef _MSC_VER +#define ssize_t intptr_t +#endif + +/* $Id: 67fd3ee74103ada60258d8a16e868f483abcca87 $ */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* contains name and value of a header (name == NULL if is a continuing line + * of a multiline header */ +struct phr_header { + const char *name; + size_t name_len; + const char *value; + size_t value_len; +}; + +/* returns number of bytes consumed if successful, -2 if request is partial, + * -1 if failed */ +int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, + int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* should be zero-filled before start */ +struct phr_chunked_decoder { + size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ + char consume_trailer; /* if trailing headers should be consumed */ + char _hex_count; + char _state; +}; + +/* the function rewrites the buffer given as (buf, bufsz) removing the chunked- + * encoding headers. When the function returns without an error, bufsz is + * updated to the length of the decoded data available. Applications should + * repeatedly call the function while it returns -2 (incomplete) every time + * supplying newly arrived data. If the end of the chunked-encoded data is + * found, the function returns a non-negative number indicating the number of + * octets left undecoded at the tail of the supplied buffer. Returns -1 on + * error. + */ +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); + +/* returns if the chunked decoder is in middle of chunked data */ +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/plugins/uriparser/Makefile.am b/plugins/uriparser/Makefile.am new file mode 100644 index 00000000..592e694c --- /dev/null +++ b/plugins/uriparser/Makefile.am @@ -0,0 +1,9 @@ +noinst_LIBRARIES = liburiparser.a + +liburiparser_a_SOURCES = UriCommon.c UriCompare.c \ + UriEscape.c UriFile.c UriIp4.c \ + UriIp4Base.c UriNormalize.c \ + UriNormalizeBase.c UriParse.c \ + UriParseBase.c UriQuery.c \ + UriRecompose.c UriResolve.c \ + UriShorten.c diff --git a/plugins/uriparser/Uri.h b/plugins/uriparser/Uri.h new file mode 100644 index 00000000..4a185808 --- /dev/null +++ b/plugins/uriparser/Uri.h @@ -0,0 +1,777 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file Uri.h + * Holds the RFC 3986 %URI parser interface. + * NOTE: This header includes itself twice. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include "UriDefsConfig.h" +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "Uri.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "Uri.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_H_ANSI 1 +# include "UriDefsAnsi.h" +# else +# define URI_H_UNICODE 1 +# include "UriDefsUnicode.h" +# endif + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifndef URI_DOXYGEN +# include "UriBase.h" +#endif + + + +/** + * Specifies a range of characters within a string. + * The range includes all characters from first + * to one before afterLast. So if both are + * non-NULL the difference is the length of the text range. + * + * @see UriUriA + * @see UriPathSegmentA + * @see UriHostDataA + * @since 0.3.0 + */ +typedef struct URI_TYPE(TextRangeStruct) { + const URI_CHAR * first; /**< Pointer to first character */ + const URI_CHAR * afterLast; /**< Pointer to character after the last one still in */ +} URI_TYPE(TextRange); /**< @copydoc UriTextRangeStructA */ + + + +/** + * Represents a path segment within a %URI path. + * More precisely it is a node in a linked + * list of path segments. + * + * @see UriUriA + * @since 0.3.0 + */ +typedef struct URI_TYPE(PathSegmentStruct) { + URI_TYPE(TextRange) text; /**< Path segment name */ + struct URI_TYPE(PathSegmentStruct) * next; /**< Pointer to the next path segment in the list, can be NULL if last already */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(PathSegment); /**< @copydoc UriPathSegmentStructA */ + + + +/** + * Holds structured host information. + * This is either a IPv4, IPv6, plain + * text for IPvFuture or all zero for + * a registered name. + * + * @see UriUriA + * @since 0.3.0 + */ +typedef struct URI_TYPE(HostDataStruct) { + UriIp4 * ip4; /**< IPv4 address */ + UriIp6 * ip6; /**< IPv6 address */ + URI_TYPE(TextRange) ipFuture; /**< IPvFuture address */ +} URI_TYPE(HostData); /**< @copydoc UriHostDataStructA */ + + + +/** + * Represents an RFC 3986 %URI. + * Missing components can be {NULL, NULL} ranges. + * + * @see uriParseUriA + * @see uriFreeUriMembersA + * @see UriParserStateA + * @since 0.3.0 + */ +typedef struct URI_TYPE(UriStruct) { + URI_TYPE(TextRange) scheme; /**< Scheme (e.g. "http") */ + URI_TYPE(TextRange) userInfo; /**< User info (e.g. "user:pass") */ + URI_TYPE(TextRange) hostText; /**< Host text (set for all hosts, excluding square brackets) */ + URI_TYPE(HostData) hostData; /**< Structured host type specific data */ + URI_TYPE(TextRange) portText; /**< Port (e.g. "80") */ + URI_TYPE(PathSegment) * pathHead; /**< Head of a linked list of path segments */ + URI_TYPE(PathSegment) * pathTail; /**< Tail of the list behind pathHead */ + URI_TYPE(TextRange) query; /**< Query without leading "?" */ + URI_TYPE(TextRange) fragment; /**< Query without leading "#" */ + UriBool absolutePath; /**< Absolute path flag, distincting "a" and "/a" */ + UriBool owner; /**< Memory owner flag */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(Uri); /**< @copydoc UriUriStructA */ + + + +/** + * Represents a state of the %URI parser. + * Missing components can be NULL to reflect + * a components absence. + * + * @see uriFreeUriMembersA + * @since 0.3.0 + */ +typedef struct URI_TYPE(ParserStateStruct) { + URI_TYPE(Uri) * uri; /**< Plug in the %URI structure to be filled while parsing here */ + int errorCode; /**< Code identifying the occured error */ + const URI_CHAR * errorPos; /**< Pointer to position in case of a syntax error */ + + void * reserved; /**< Reserved to the parser */ +} URI_TYPE(ParserState); /**< @copydoc UriParserStateStructA */ + + + +/** + * Represents a query element. + * More precisely it is a node in a linked + * list of query elements. + * + * @since 0.7.0 + */ +typedef struct URI_TYPE(QueryListStruct) { + const URI_CHAR * key; /**< Key of the query element */ + const URI_CHAR * value; /**< Value of the query element, can be NULL */ + + struct URI_TYPE(QueryListStruct) * next; /**< Pointer to the next key/value pair in the list, can be NULL if last already */ +} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */ + + + +/** + * Parses a RFC 3986 URI. + * + * @param state INOUT: Parser state with set output %URI, must not be NULL + * @param first IN: Pointer to the first character to parse, must not be NULL + * @param afterLast IN: Pointer to the character after the last to parse, must not be NULL + * @return 0 on success, error code otherwise + * + * @see uriParseUriA + * @see uriToStringA + * @since 0.3.0 + */ +int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/** + * Parses a RFC 3986 %URI. + * + * @param state INOUT: Parser state with set output %URI, must not be NULL + * @param text IN: Text to parse, must not be NULL + * @return 0 on success, error code otherwise + * + * @see uriParseUriExA + * @see uriToStringA + * @since 0.3.0 + */ +int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, + const URI_CHAR * text); + + + +/** + * Frees all memory associated with the members + * of the %URI structure. Note that the structure + * itself is not freed, only its members. + * + * @param uri INOUT: %URI structure whose members should be freed + * + * @since 0.3.0 + */ +void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri); + + + +/** + * Percent-encodes all unreserved characters from the input string and + * writes the encoded version to the output string. + * Be sure to allocate 3 times the space of the input buffer for + * the output buffer for normalizeBreaks == URI_FALSE and 6 times + * the space for normalizeBreaks == URI_TRUE + * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * + * @param inFirst IN: Pointer to first character of the input text + * @param inAfterLast IN: Pointer after the last character of the input text + * @param out OUT: Encoded text destination + * @param spaceToPlus IN: Wether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. + * @return Position of terminator in output string + * + * @see uriEscapeA + * @see uriUnescapeInPlaceExA + * @since 0.5.2 + */ +URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, + const URI_CHAR * inAfterLast, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Percent-encodes all unreserved characters from the input string and + * writes the encoded version to the output string. + * Be sure to allocate 3 times the space of the input buffer for + * the output buffer for normalizeBreaks == URI_FALSE and 6 times + * the space for normalizeBreaks == URI_TRUE + * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * + * @param in IN: Text source + * @param out OUT: Encoded text destination + * @param spaceToPlus IN: Wether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. + * @return Position of terminator in output string + * + * @see uriEscapeExA + * @see uriUnescapeInPlaceA + * @since 0.5.0 + */ +URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Unescapes percent-encoded groups in a given string. + * E.g. "%20" will become " ". Unescaping is done in place. + * The return value will be point to the new position + * of the terminating zero. Use this value to get the new + * length of the string. NULL is only returned if inout + * is NULL. + * + * @param inout INOUT: Text to unescape/decode + * @param plusToSpace IN: Whether to convert '+' to ' ' or not + * @param breakConversion IN: Line break conversion mode + * @return Pointer to new position of the terminating zero + * + * @see uriUnescapeInPlaceA + * @see uriEscapeExA + * @since 0.5.0 + */ +const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, + UriBool plusToSpace, UriBreakConversion breakConversion); + + + +/** + * Unescapes percent-encoded groups in a given string. + * E.g. "%20" will become " ". Unescaping is done in place. + * The return value will be point to the new position + * of the terminating zero. Use this value to get the new + * length of the string. NULL is only returned if inout + * is NULL. + * + * NOTE: '+' is not decoded to ' ' and line breaks are not converted. + * Use the more advanced UnescapeInPlaceEx for that features instead. + * + * @param inout INOUT: Text to unescape/decode + * @return Pointer to new position of the terminating zero + * + * @see uriUnescapeInPlaceExA + * @see uriEscapeA + * @since 0.3.0 + */ +const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout); + + + +/** + * Performs reference resolution as described in + * section 5.2.2 of RFC 3986. + * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. + * + * @param absoluteDest OUT: Result %URI + * @param relativeSource IN: Reference to resolve + * @param absoluteBase IN: Base %URI to apply + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA, uriAddBaseUriExA + * @since 0.4.0 + */ +int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absoluteDest, + const URI_TYPE(Uri) * relativeSource, + const URI_TYPE(Uri) * absoluteBase); + + + +/** + * Performs reference resolution as described in + * section 5.2.2 of RFC 3986. + * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. + * + * @param absoluteDest OUT: Result %URI + * @param relativeSource IN: Reference to resolve + * @param absoluteBase IN: Base %URI to apply + * @param options IN: Configuration to apply + * @return Error code or 0 on success + * + * @see uriRemoveBaseUriA, uriAddBaseUriA + * @since 0.8.1 + */ +int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absoluteDest, + const URI_TYPE(Uri) * relativeSource, + const URI_TYPE(Uri) * absoluteBase, + UriResolutionOptions options); + + + +/** + * Tries to make a relative %URI (a reference) from an + * absolute %URI and a given base %URI. This can only work if + * the absolute %URI shares scheme and authority with + * the base %URI. If it does not the result will still be + * an absolute URI (with scheme part if necessary). + * NOTE: On success you have to call uriFreeUriMembersA on + * \p dest manually later. + * + * @param dest OUT: Result %URI + * @param absoluteSource IN: Absolute %URI to make relative + * @param absoluteBase IN: Base %URI + * @param domainRootMode IN: Create %URI with path relative to domain root + * @return Error code or 0 on success + * + * @see uriAddBaseUriA, uriAddBaseUriExA + * @since 0.5.2 + */ +int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absoluteSource, + const URI_TYPE(Uri) * absoluteBase, + UriBool domainRootMode); + + + +/** + * Checks two URIs for equivalence. Comparison is done + * the naive way, without prior normalization. + * NOTE: Two NULL URIs are equal as well. + * + * @param a IN: First %URI + * @param b IN: Second %URI + * @return URI_TRUE when equal, URI_FAlSE else + * + * @since 0.4.0 + */ +UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, const URI_TYPE(Uri) * b); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given %URI excluding the + * terminator. + * + * @param uri IN: %URI to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @return Error code or 0 on success + * + * @see uriToStringA + * @since 0.5.0 + */ +int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, + int * charsRequired); + + + +/** + * Converts a %URI structure back to text as described in + * section 5.3 of RFC 3986. + * + * @param dest OUT: Output destination + * @param uri IN: %URI to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the %URI is too long! + * @return Error code or 0 on success + * + * @see uriToStringCharsRequiredA + * @since 0.4.0 + */ +int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten); + + + +/** + * Determines the components of a %URI that are not normalized. + * + * @param uri IN: %URI to check + * @return Normalization job mask + * + * @see uriNormalizeSyntaxA + * @since 0.5.0 + */ +unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri); + + + +/** + * Normalizes a %URI using a normalization mask. + * The normalization mask decides what components are normalized. + * + * NOTE: If necessary the %URI becomes owner of all memory + * behind the text pointed to. Text is duplicated in that case. + * + * @param uri INOUT: %URI to normalize + * @param mask IN: Normalization mask + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.5.0 + */ +int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask); + + + +/** + * Normalizes all components of a %URI. + * + * NOTE: If necessary the %URI becomes owner of all memory + * behind the text pointed to. Text is duplicated in that case. + * + * @param uri INOUT: %URI to normalize + * @return Error code or 0 on success + * + * @see uriNormalizeSyntaxExA + * @see uriNormalizeSyntaxMaskRequiredA + * @since 0.5.0 + */ +int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri); + + + +/** + * Converts a Unix filename to a %URI string. + * The destination buffer must be large enough to hold 7 + 3 * len(filename) + 1 + * characters in case of an absolute filename or 3 * len(filename) + 1 in case + * of a relative filename. + * + * EXAMPLE + * Input: "/bin/bash" + * Output: "file:///bin/bash" + * + * @param filename IN: Unix filename to convert + * @param uriString OUT: Destination to write %URI string to + * @return Error code or 0 on success + * + * @see uriUriStringToUnixFilenameA + * @see uriWindowsFilenameToUriStringA + * @since 0.5.2 + */ +int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString); + + + +/** + * Converts a Windows filename to a %URI string. + * The destination buffer must be large enough to hold 8 + 3 * len(filename) + 1 + * characters in case of an absolute filename or 3 * len(filename) + 1 in case + * of a relative filename. + * + * EXAMPLE + * Input: "E:\\Documents and Settings" + * Output: "file:///E:/Documents%20and%20Settings" + * + * @param filename IN: Windows filename to convert + * @param uriString OUT: Destination to write %URI string to + * @return Error code or 0 on success + * + * @see uriUriStringToWindowsFilenameA + * @see uriUnixFilenameToUriStringA + * @since 0.5.2 + */ +int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString); + + + +/** + * Extracts a Unix filename from a %URI string. + * The destination buffer must be large enough to hold len(uriString) + 1 - 7 + * characters in case of an absolute %URI or len(uriString) + 1 in case + * of a relative %URI. + * + * @param uriString IN: %URI string to convert + * @param filename OUT: Destination to write filename to + * @return Error code or 0 on success + * + * @see uriUnixFilenameToUriStringA + * @see uriUriStringToWindowsFilenameA + * @since 0.5.2 + */ +int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, + URI_CHAR * filename); + + + +/** + * Extracts a Windows filename from a %URI string. + * The destination buffer must be large enough to hold len(uriString) + 1 - 8 + * characters in case of an absolute %URI or len(uriString) + 1 in case + * of a relative %URI. + * + * @param uriString IN: %URI string to convert + * @param filename OUT: Destination to write filename to + * @return Error code or 0 on success + * + * @see uriWindowsFilenameToUriStringA + * @see uriUriStringToUnixFilenameA + * @since 0.5.2 + */ +int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, + URI_CHAR * filename); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given query list excluding the + * terminator. It is assumed that line breaks are will be + * normalized to "%0D%0A". + * + * @param queryList IN: Query list to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @return Error code or 0 on success + * + * @see uriComposeQueryCharsRequiredExA + * @see uriComposeQueryA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList, + int * charsRequired); + + + +/** + * Calculates the number of characters needed to store the + * string representation of the given query list excluding the + * terminator. + * + * @param queryList IN: Query list to measure + * @param charsRequired OUT: Length of the string representation in characters excluding terminator + * @param spaceToPlus IN: Wether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryCharsRequiredA + * @see uriComposeQueryExA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList, + int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Converts a query list structure back to a query string. + * The composed string does not start with '?', + * on the way ' ' is converted to '+' and line breaks are + * normalized to "%0D%0A". + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! + * @return Error code or 0 on success + * + * @see uriComposeQueryExA + * @see uriComposeQueryMallocA + * @see uriComposeQueryCharsRequiredA + * @see uriDissectQueryMallocA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQuery)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten); + + + +/** + * Converts a query list structure back to a query string. + * The composed string does not start with '?'. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param maxChars IN: Maximum number of characters to copy including terminator + * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! + * @param spaceToPlus IN: Wether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryA + * @see uriComposeQueryMallocExA + * @see uriComposeQueryCharsRequiredExA + * @see uriDissectQueryMallocExA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Converts a query list structure back to a query string. + * Memory for this string is allocated internally. + * The composed string does not start with '?', + * on the way ' ' is converted to '+' and line breaks are + * normalized to "%0D%0A". + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @return Error code or 0 on success + * + * @see uriComposeQueryMallocExA + * @see uriComposeQueryA + * @see uriDissectQueryMallocA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList); + + + +/** + * Converts a query list structure back to a query string. + * Memory for this string is allocated internally. + * The composed string does not start with '?'. + * + * @param dest OUT: Output destination + * @param queryList IN: Query list to convert + * @param spaceToPlus IN: Wether to convert ' ' to '+' or not + * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. + * @return Error code or 0 on success + * + * @see uriComposeQueryMallocA + * @see uriComposeQueryExA + * @see uriDissectQueryMallocExA + * @since 0.7.0 + */ +int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks); + + + +/** + * Constructs a query list from the raw query string of a given URI. + * On the way '+' is converted back to ' ', line breaks are not modified. + * + * @param dest OUT: Output destination + * @param itemCount OUT: Number of items found, can be NULL + * @param first IN: Pointer to first character after '?' + * @param afterLast IN: Pointer to character after the last one still in + * @return Error code or 0 on success + * + * @see uriDissectQueryMallocExA + * @see uriComposeQueryA + * @see uriFreeQueryListA + * @since 0.7.0 + */ +int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/** + * Constructs a query list from the raw query string of a given URI. + * + * @param dest OUT: Output destination + * @param itemCount OUT: Number of items found, can be NULL + * @param first IN: Pointer to first character after '?' + * @param afterLast IN: Pointer to character after the last one still in + * @param plusToSpace IN: Whether to convert '+' to ' ' or not + * @param breakConversion IN: Line break conversion mode + * @return Error code or 0 on success + * + * @see uriDissectQueryMallocA + * @see uriComposeQueryExA + * @see uriFreeQueryListA + * @since 0.7.0 + */ +int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion); + + + +/** + * Frees all memory associated with the given query list. + * The structure itself is freed as well. + * + * @param queryList INOUT: Query list to free + * + * @since 0.7.0 + */ +void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList); + + + +#ifdef __cplusplus +} +#endif + + + +#endif +#endif diff --git a/plugins/uriparser/UriBase.h b/plugins/uriparser/UriBase.h new file mode 100644 index 00000000..bc63b059 --- /dev/null +++ b/plugins/uriparser/UriBase.h @@ -0,0 +1,197 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriBase.h + * Holds definitions independent of the encoding pass. + */ + +#ifndef URI_BASE_H +#define URI_BASE_H 1 + + + +/* Version helper macro */ +#define URI_ANSI_TO_UNICODE(x) L##x + + + +/* Version */ +#define URI_VER_MAJOR 0 +#define URI_VER_MINOR 8 +#define URI_VER_RELEASE 4 +#define URI_VER_SUFFIX_ANSI "" +#define URI_VER_SUFFIX_UNICODE URI_ANSI_TO_UNICODE(URI_VER_SUFFIX_ANSI) + + + +/* More version helper macros */ +#define URI_INT_TO_ANSI_HELPER(x) #x +#define URI_INT_TO_ANSI(x) URI_INT_TO_ANSI_HELPER(x) + +#define URI_INT_TO_UNICODE_HELPER(x) URI_ANSI_TO_UNICODE(#x) +#define URI_INT_TO_UNICODE(x) URI_INT_TO_UNICODE_HELPER(x) + +#define URI_VER_ANSI_HELPER(ma, mi, r, s) \ + URI_INT_TO_ANSI(ma) "." \ + URI_INT_TO_ANSI(mi) "." \ + URI_INT_TO_ANSI(r) \ + s + +#define URI_VER_UNICODE_HELPER(ma, mi, r, s) \ + URI_INT_TO_UNICODE(ma) L"." \ + URI_INT_TO_UNICODE(mi) L"." \ + URI_INT_TO_UNICODE(r) \ + s + + + +/* Full version strings */ +#define URI_VER_ANSI URI_VER_ANSI_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_ANSI) +#define URI_VER_UNICODE URI_VER_UNICODE_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_UNICODE) + + + +/* Unused parameter macro */ +#ifdef __GNUC__ +# define URI_UNUSED(x) unused_##x __attribute__((unused)) +#else +# define URI_UNUSED(x) x +#endif + + + +typedef int UriBool; /**< Boolean type */ + +#define URI_TRUE 1 +#define URI_FALSE 0 + + + +/* Shared errors */ +#define URI_SUCCESS 0 +#define URI_ERROR_SYNTAX 1 /* Parsed text violates expected format */ +#define URI_ERROR_NULL 2 /* One of the params passed was NULL + although it mustn't be */ +#define URI_ERROR_MALLOC 3 /* Requested memory could not be allocated */ +#define URI_ERROR_OUTPUT_TOO_LARGE 4 /* Some output is to large for the receiving buffer */ +#define URI_ERROR_NOT_IMPLEMENTED 8 /* The called function is not implemented yet */ +#define URI_ERROR_RANGE_INVALID 9 /* The parameters passed contained invalid ranges */ + + +/* Errors specific to ToString */ +#define URI_ERROR_TOSTRING_TOO_LONG URI_ERROR_OUTPUT_TOO_LARGE /* Deprecated, test for URI_ERROR_OUTPUT_TOO_LARGE instead */ + +/* Errors specific to AddBaseUri */ +#define URI_ERROR_ADDBASE_REL_BASE 5 /* Given base is not absolute */ + +/* Errors specific to RemoveBaseUri */ +#define URI_ERROR_REMOVEBASE_REL_BASE 6 /* Given base is not absolute */ +#define URI_ERROR_REMOVEBASE_REL_SOURCE 7 /* Given base is not absolute */ + + + +#ifndef URI_DOXYGEN +# include /* For NULL, snprintf */ +# include /* For wchar_t */ +# include /* For strlen, memset, memcpy */ +# include /* For malloc */ +#endif /* URI_DOXYGEN */ + + + +/** + * Holds an IPv4 address. + */ +typedef struct UriIp4Struct { + unsigned char data[4]; /**< Each octet in one byte */ +} UriIp4; /**< @copydoc UriIp4Struct */ + + + +/** + * Holds an IPv6 address. + */ +typedef struct UriIp6Struct { + unsigned char data[16]; /**< Each quad in two bytes */ +} UriIp6; /**< @copydoc UriIp6Struct */ + + + +/** + * Specifies a line break conversion mode. + */ +typedef enum UriBreakConversionEnum { + URI_BR_TO_LF, /**< Convert to Unix line breaks ("\\x0a") */ + URI_BR_TO_CRLF, /**< Convert to Windows line breaks ("\\x0d\\x0a") */ + URI_BR_TO_CR, /**< Convert to Macintosh line breaks ("\\x0d") */ + URI_BR_TO_UNIX = URI_BR_TO_LF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_LF */ + URI_BR_TO_WINDOWS = URI_BR_TO_CRLF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CRLF */ + URI_BR_TO_MAC = URI_BR_TO_CR, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CR */ + URI_BR_DONT_TOUCH /**< Copy line breaks unmodified */ +} UriBreakConversion; /**< @copydoc UriBreakConversionEnum */ + + + +/** + * Specifies which component of a %URI has to be normalized. + */ +typedef enum UriNormalizationMaskEnum { + URI_NORMALIZED = 0, /**< Do not normalize anything */ + URI_NORMALIZE_SCHEME = 1 << 0, /**< Normalize scheme (fix uppercase letters) */ + URI_NORMALIZE_USER_INFO = 1 << 1, /**< Normalize user info (fix uppercase percent-encodings) */ + URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */ + URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */ + URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */ + URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */ +} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */ + + + +/** + * Specifies how to resolve %URI references. + */ +typedef enum UriResolutionOptionsEnum { + URI_RESOLVE_STRICTLY = 0, /**< Full RFC conformance */ + URI_RESOLVE_IDENTICAL_SCHEME_COMPAT = 1 << 0 /**< Treat %URI to resolve with identical scheme as having no scheme */ +} UriResolutionOptions; /**< @copydoc UriResolutionOptionsEnum */ + + + +#endif /* URI_BASE_H */ diff --git a/plugins/uriparser/UriCommon.c b/plugins/uriparser/UriCommon.c new file mode 100644 index 00000000..37d6b39d --- /dev/null +++ b/plugins/uriparser/UriCommon.c @@ -0,0 +1,567 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCommon.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCommon.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +/*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X"); +/*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT("."); +/*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT(".."); + + + +void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) { + memset(uri, 0, sizeof(URI_TYPE(Uri))); +} + + + +/* Compares two text ranges for equal text content */ +int URI_FUNC(CompareRange)( + const URI_TYPE(TextRange) * a, + const URI_TYPE(TextRange) * b) { + int diff; + + /* NOTE: Both NULL means equal! */ + if ((a == NULL) || (b == NULL)) { + return ((a == NULL) ? 0 : 1) - ((b == NULL) ? 0 : 1); + } + + /* NOTE: Both NULL means equal! */ + if ((a->first == NULL) || (b->first == NULL)) { + return ((a->first == NULL) ? 0 : 1) - ((b->first == NULL) ? 0 : 1); + } + + diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first)); + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } + + diff = URI_STRNCMP(a->first, b->first, (a->afterLast - a->first)); + + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } + + return diff; +} + + + +/* Properly removes "." and ".." path segments */ +UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, + UriBool relative) { + if (uri == NULL) { + return URI_TRUE; + } + return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner); +} + + + +UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, + UriBool relative, UriBool pathOwned) { + URI_TYPE(PathSegment) * walker; + if ((uri == NULL) || (uri->pathHead == NULL)) { + return URI_TRUE; + } + + walker = uri->pathHead; + walker->reserved = NULL; /* Prev pointer */ + do { + UriBool removeSegment = URI_FALSE; + int len = (int)(walker->text.afterLast - walker->text.first); + switch (len) { + case 1: + if ((walker->text.first)[0] == _UT('.')) { + /* "." segment -> remove if not essential */ + URI_TYPE(PathSegment) * const prev = walker->reserved; + URI_TYPE(PathSegment) * const nextBackup = walker->next; + + /* Is this dot segment essential? */ + removeSegment = URI_TRUE; + if (relative && (walker == uri->pathHead) && (walker->next != NULL)) { + const URI_CHAR * ch = walker->next->text.first; + for (; ch < walker->next->text.afterLast; ch++) { + if (*ch == _UT(':')) { + removeSegment = URI_FALSE; + break; + } + } + } + + if (removeSegment) { + /* Last segment? */ + if (walker->next != NULL) { + /* Not last segment */ + walker->next->reserved = prev; + + if (prev == NULL) { + /* First but not last segment */ + uri->pathHead = walker->next; + } else { + /* Middle segment */ + prev->next = walker->next; + } + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + } else { + /* Last segment */ + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + + if (prev == NULL) { + /* Last and first */ + if (URI_FUNC(IsHostSet)(uri)) { + /* Replace "." with empty segment to represent trailing slash */ + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + } else { + free(walker); + + uri->pathHead = NULL; + uri->pathTail = NULL; + } + } else { + /* Last but not first, replace "." with empty segment to represent trailing slash */ + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + } + } + + walker = nextBackup; + } + } + break; + + case 2: + if (((walker->text.first)[0] == _UT('.')) + && ((walker->text.first)[1] == _UT('.'))) { + /* Path ".." -> remove this and the previous segment */ + URI_TYPE(PathSegment) * const prev = walker->reserved; + URI_TYPE(PathSegment) * prevPrev; + URI_TYPE(PathSegment) * const nextBackup = walker->next; + + removeSegment = URI_TRUE; + if (relative) { + if (prev == NULL) { + removeSegment = URI_FALSE; + } else if ((prev != NULL) + && ((prev->text.afterLast - prev->text.first) == 2) + && ((prev->text.first)[0] == _UT('.')) + && ((prev->text.first)[1] == _UT('.'))) { + removeSegment = URI_FALSE; + } + } + + if (removeSegment) { + if (prev != NULL) { + /* Not first segment */ + prevPrev = prev->reserved; + if (prevPrev != NULL) { + /* Not even prev is the first one */ + prevPrev->next = walker->next; + if (walker->next != NULL) { + walker->next->reserved = prevPrev; + } else { + /* Last segment -> insert "" segment to represent trailing slash, update tail */ + URI_TYPE(PathSegment) * const segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + free((URI_CHAR *)prev->text.first); + } + free(prev); + + return URI_FALSE; /* Raises malloc error */ + } + memset(segment, 0, sizeof(URI_TYPE(PathSegment))); + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + prevPrev->next = segment; + uri->pathTail = segment; + } + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + free((URI_CHAR *)prev->text.first); + } + free(prev); + + walker = nextBackup; + } else { + /* Prev is the first segment */ + if (walker->next != NULL) { + uri->pathHead = walker->next; + walker->next->reserved = NULL; + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + } else { + /* Re-use segment for "" path segment to represent trailing slash, update tail */ + URI_TYPE(PathSegment) * const segment = walker; + if (pathOwned && (segment->text.first != segment->text.afterLast)) { + free((URI_CHAR *)segment->text.first); + } + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + uri->pathHead = segment; + uri->pathTail = segment; + } + + if (pathOwned && (prev->text.first != prev->text.afterLast)) { + free((URI_CHAR *)prev->text.first); + } + free(prev); + + walker = nextBackup; + } + } else { + URI_TYPE(PathSegment) * const anotherNextBackup = walker->next; + /* First segment -> update head pointer */ + uri->pathHead = walker->next; + if (walker->next != NULL) { + walker->next->reserved = NULL; + } else { + /* Last segment -> update tail */ + uri->pathTail = NULL; + } + + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + + walker = anotherNextBackup; + } + } + } + break; + + } + + if (!removeSegment) { + if (walker->next != NULL) { + walker->next->reserved = walker; + } else { + /* Last segment -> update tail */ + uri->pathTail = walker; + } + walker = walker->next; + } + } while (walker != NULL); + + return URI_TRUE; +} + + + +/* Properly removes "." and ".." path segments */ +UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri) { + const UriBool ABSOLUTE = URI_FALSE; + return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE); +} + + + +unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) { + switch (hexdig) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + return (unsigned char)(9 + hexdig - _UT('9')); + + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + return (unsigned char)(15 + hexdig - _UT('f')); + + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + return (unsigned char)(15 + hexdig - _UT('F')); + + default: + return 0; + } +} + + + +URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) { + /* Uppercase recommended in section 2.1. of RFC 3986 * + * http://tools.ietf.org/html/rfc3986#section-2.1 */ + return URI_FUNC(HexToLetterEx)(value, URI_TRUE); +} + + + +URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) { + switch (value) { + case 0: return _UT('0'); + case 1: return _UT('1'); + case 2: return _UT('2'); + case 3: return _UT('3'); + case 4: return _UT('4'); + case 5: return _UT('5'); + case 6: return _UT('6'); + case 7: return _UT('7'); + case 8: return _UT('8'); + case 9: return _UT('9'); + + case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a'); + case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b'); + case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c'); + case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d'); + case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e'); + default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f'); + } +} + + + +/* Checks if a URI has the host component set. */ +UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) { + return (uri != NULL) + && ((uri->hostText.first != NULL) + || (uri->hostData.ip4 != NULL) + || (uri->hostData.ip6 != NULL) + || (uri->hostData.ipFuture.first != NULL) + ); +} + + + +/* Copies the path segment list from one URI to another. */ +UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * source) { + if (source->pathHead == NULL) { + /* No path component */ + dest->pathHead = NULL; + dest->pathTail = NULL; + } else { + /* Copy list but not the text contained */ + URI_TYPE(PathSegment) * sourceWalker = source->pathHead; + URI_TYPE(PathSegment) * destPrev = NULL; + do { + URI_TYPE(PathSegment) * cur = malloc(sizeof(URI_TYPE(PathSegment))); + if (cur == NULL) { + /* Fix broken list */ + if (destPrev != NULL) { + destPrev->next = NULL; + } + return URI_FALSE; /* Raises malloc error */ + } + + /* From this functions usage we know that * + * the dest URI cannot be uri->owner */ + cur->text = sourceWalker->text; + if (destPrev == NULL) { + /* First segment ever */ + dest->pathHead = cur; + } else { + destPrev->next = cur; + } + destPrev = cur; + sourceWalker = sourceWalker->next; + } while (sourceWalker != NULL); + dest->pathTail = destPrev; + dest->pathTail->next = NULL; + } + + dest->absolutePath = source->absolutePath; + return URI_TRUE; +} + + + +/* Copies the authority part of an URI over to another. */ +UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * source) { + /* From this functions usage we know that * + * the dest URI cannot be uri->owner */ + + /* Copy userInfo */ + dest->userInfo = source->userInfo; + + /* Copy hostText */ + dest->hostText = source->hostText; + + /* Copy hostData */ + if (source->hostData.ip4 != NULL) { + dest->hostData.ip4 = malloc(sizeof(UriIp4)); + if (dest->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + *(dest->hostData.ip4) = *(source->hostData.ip4); + dest->hostData.ip6 = NULL; + dest->hostData.ipFuture.first = NULL; + dest->hostData.ipFuture.afterLast = NULL; + } else if (source->hostData.ip6 != NULL) { + dest->hostData.ip4 = NULL; + dest->hostData.ip6 = malloc(sizeof(UriIp6)); + if (dest->hostData.ip6 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + *(dest->hostData.ip6) = *(source->hostData.ip6); + dest->hostData.ipFuture.first = NULL; + dest->hostData.ipFuture.afterLast = NULL; + } else { + dest->hostData.ip4 = NULL; + dest->hostData.ip6 = NULL; + dest->hostData.ipFuture = source->hostData.ipFuture; + } + + /* Copy portText */ + dest->portText = source->portText; + + return URI_TRUE; +} + + + +UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri) { + URI_TYPE(PathSegment) * segment; + + if ( /* Case 1: absolute path, empty first segment */ + (uri->absolutePath + && (uri->pathHead != NULL) + && (uri->pathHead->text.afterLast == uri->pathHead->text.first)) + + /* Case 2: relative path, empty first and second segment */ + || (!uri->absolutePath + && (uri->pathHead != NULL) + && (uri->pathHead->next != NULL) + && (uri->pathHead->text.afterLast == uri->pathHead->text.first) + && (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) { + /* NOOP */ + } else { + return URI_TRUE; + } + + segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + + /* Insert "." segment in front */ + segment->next = uri->pathHead; + segment->text.first = URI_FUNC(ConstPwd); + segment->text.afterLast = URI_FUNC(ConstPwd) + 1; + uri->pathHead = segment; + return URI_TRUE; +} + + + +void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri) { + /* Fix path if only one empty segment */ + if (!uri->absolutePath + && !URI_FUNC(IsHostSet)(uri) + && (uri->pathHead != NULL) + && (uri->pathHead->next == NULL) + && (uri->pathHead->text.first == uri->pathHead->text.afterLast)) { + free(uri->pathHead); + uri->pathHead = NULL; + uri->pathTail = NULL; + } +} + + + +#endif diff --git a/plugins/uriparser/UriCommon.h b/plugins/uriparser/UriCommon.h new file mode 100644 index 00000000..f6bc2ccf --- /dev/null +++ b/plugins/uriparser/UriCommon.h @@ -0,0 +1,104 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_COMMON_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCommon.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCommon.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_COMMON_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_COMMON_H_ANSI 1 +# include +# else +# define URI_COMMON_H_UNICODE 1 +# include +# endif + + + +/* Used to point to from empty path segments. + * X.first and X.afterLast must be the same non-NULL value then. */ +extern const URI_CHAR * const URI_FUNC(SafeToPointTo); +extern const URI_CHAR * const URI_FUNC(ConstPwd); +extern const URI_CHAR * const URI_FUNC(ConstParent); + + + +void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri); + +int URI_FUNC(CompareRange)( + const URI_TYPE(TextRange) * a, + const URI_TYPE(TextRange) * b); + +UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri); +UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, UriBool relative); +UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, + UriBool relative, UriBool pathOwned); + +unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig); +URI_CHAR URI_FUNC(HexToLetter)(unsigned int value); +URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase); + +UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri); + +UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source); +UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source); + +UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri); +void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri); + + +#endif +#endif diff --git a/plugins/uriparser/UriCompare.c b/plugins/uriparser/UriCompare.c new file mode 100644 index 00000000..6896f64e --- /dev/null +++ b/plugins/uriparser/UriCompare.c @@ -0,0 +1,168 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCompare.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCompare.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include +# include "UriCommon.h" +#endif + + + +UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, + const URI_TYPE(Uri) * b) { + /* NOTE: Both NULL means equal! */ + if ((a == NULL) || (b == NULL)) { + return ((a == NULL) && (b == NULL)) ? URI_TRUE : URI_FALSE; + } + + /* scheme */ + if (URI_FUNC(CompareRange)(&(a->scheme), &(b->scheme))) { + return URI_FALSE; + } + + /* absolutePath */ + if ((a->scheme.first == NULL)&& (a->absolutePath != b->absolutePath)) { + return URI_FALSE; + } + + /* userInfo */ + if (URI_FUNC(CompareRange)(&(a->userInfo), &(b->userInfo))) { + return URI_FALSE; + } + + /* Host */ + if (((a->hostData.ip4 == NULL) != (b->hostData.ip4 == NULL)) + || ((a->hostData.ip6 == NULL) != (b->hostData.ip6 == NULL)) + || ((a->hostData.ipFuture.first == NULL) + != (b->hostData.ipFuture.first == NULL))) { + return URI_FALSE; + } + + if (a->hostData.ip4 != NULL) { + if (memcmp(a->hostData.ip4->data, b->hostData.ip4->data, 4)) { + return URI_FALSE; + } + } + + if (a->hostData.ip6 != NULL) { + if (memcmp(a->hostData.ip6->data, b->hostData.ip6->data, 16)) { + return URI_FALSE; + } + } + + if (a->hostData.ipFuture.first != NULL) { + if (URI_FUNC(CompareRange)(&(a->hostData.ipFuture), &(b->hostData.ipFuture))) { + return URI_FALSE; + } + } + + if ((a->hostData.ip4 == NULL) + && (a->hostData.ip6 == NULL) + && (a->hostData.ipFuture.first == NULL)) { + if (URI_FUNC(CompareRange)(&(a->hostText), &(b->hostText))) { + return URI_FALSE; + } + } + + /* portText */ + if (URI_FUNC(CompareRange)(&(a->portText), &(b->portText))) { + return URI_FALSE; + } + + /* Path */ + if ((a->pathHead == NULL) != (b->pathHead == NULL)) { + return URI_FALSE; + } + + if (a->pathHead != NULL) { + URI_TYPE(PathSegment) * walkA = a->pathHead; + URI_TYPE(PathSegment) * walkB = b->pathHead; + do { + if (URI_FUNC(CompareRange)(&(walkA->text), &(walkB->text))) { + return URI_FALSE; + } + if ((walkA->next == NULL) != (walkB->next == NULL)) { + return URI_FALSE; + } + walkA = walkA->next; + walkB = walkB->next; + } while (walkA != NULL); + } + + /* query */ + if (URI_FUNC(CompareRange)(&(a->query), &(b->query))) { + return URI_FALSE; + } + + /* fragment */ + if (URI_FUNC(CompareRange)(&(a->fragment), &(b->fragment))) { + return URI_FALSE; + } + + return URI_TRUE; /* Equal*/ +} + + + +#endif diff --git a/plugins/uriparser/UriDefsAnsi.h b/plugins/uriparser/UriDefsAnsi.h new file mode 100644 index 00000000..deaefaa5 --- /dev/null +++ b/plugins/uriparser/UriDefsAnsi.h @@ -0,0 +1,82 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsAnsi.h + * Holds definitions for the ANSI pass. + * NOTE: This header is included N times, not once. + */ + +/* Allow multi inclusion */ +#include "UriDefsConfig.h" + + + +#undef URI_CHAR +#define URI_CHAR char + +#undef _UT +#define _UT(x) x + + + +#undef URI_FUNC +#define URI_FUNC(x) uri##x##A + +#undef URI_TYPE +#define URI_TYPE(x) Uri##x##A + + + +#undef URI_STRLEN +#define URI_STRLEN strlen +#undef URI_STRCPY +#define URI_STRCPY strcpy +#undef URI_STRCMP +#define URI_STRCMP strcmp +#undef URI_STRNCMP +#define URI_STRNCMP strncmp + +/* TODO Remove on next source-compatibility break */ +#undef URI_SNPRINTF +#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) +# define URI_SNPRINTF _snprintf +#else +# define URI_SNPRINTF snprintf +#endif diff --git a/plugins/uriparser/UriDefsConfig.h b/plugins/uriparser/UriDefsConfig.h new file mode 100644 index 00000000..d87ccb63 --- /dev/null +++ b/plugins/uriparser/UriDefsConfig.h @@ -0,0 +1,101 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsConfig.h + * Adjusts the internal configuration after processing external definitions. + */ + +#ifndef URI_DEFS_CONFIG_H +#define URI_DEFS_CONFIG_H 1 + + + +/* Deny external overriding */ +#undef URI_ENABLE_ANSI /* Internal for !URI_NO_ANSI */ +#undef URI_ENABLE_UNICODE /* Internal for !URI_NO_UNICODE */ + + + +/* Encoding */ +#ifdef URI_NO_ANSI +# ifdef URI_NO_UNICODE +/* No encoding at all */ +# error URI_NO_ANSI and URI_NO_UNICODE cannot go together. +# else +/* Unicode only */ +# define URI_ENABLE_UNICODE 1 +# endif +#else +# ifdef URI_NO_UNICODE +/* ANSI only */ +# define URI_ENABLE_ANSI 1 +# else +/* Both ANSI and Unicode */ +# define URI_ENABLE_ANSI 1 +# define URI_ENABLE_UNICODE 1 +# endif +#endif + + + +/* Function inlining, not ANSI/ISO C! */ +#if (defined(URI_DOXYGEN) || defined(URI_SIZEDOWN)) +# define URI_INLINE +#elif defined(__INTEL_COMPILER) +/* Intel C/C++ */ +/* http://predef.sourceforge.net/precomp.html#sec20 */ +/* http://www.intel.com/support/performancetools/c/windows/sb/CS-007751.htm#2 */ +# define URI_INLINE __force_inline +#elif defined(_MSC_VER) +/* Microsoft Visual C++ */ +/* http://predef.sourceforge.net/precomp.html#sec32 */ +/* http://msdn2.microsoft.com/en-us/library/ms882281.aspx */ +# define URI_INLINE __forceinline +#elif (__STDC_VERSION__ >= 199901L) +/* C99, "inline" is a keyword */ +# define URI_INLINE inline +#else +/* No inlining */ +# define URI_INLINE +#endif + + + +#endif /* URI_DEFS_CONFIG_H */ diff --git a/plugins/uriparser/UriDefsUnicode.h b/plugins/uriparser/UriDefsUnicode.h new file mode 100644 index 00000000..fa4befcd --- /dev/null +++ b/plugins/uriparser/UriDefsUnicode.h @@ -0,0 +1,82 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriDefsUnicode.h + * Holds definitions for the Unicode pass. + * NOTE: This header is included N times, not once. + */ + +/* Allow multi inclusion */ +#include "UriDefsConfig.h" + + + +#undef URI_CHAR +#define URI_CHAR wchar_t + +#undef _UT +#define _UT(x) L##x + + + +#undef URI_FUNC +#define URI_FUNC(x) uri##x##W + +#undef URI_TYPE +#define URI_TYPE(x) Uri##x##W + + + +#undef URI_STRLEN +#define URI_STRLEN wcslen +#undef URI_STRCPY +#define URI_STRCPY wcscpy +#undef URI_STRCMP +#define URI_STRCMP wcscmp +#undef URI_STRNCMP +#define URI_STRNCMP wcsncmp + +/* TODO Remove on next source-compatibility break */ +#undef URI_SNPRINTF +#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) +# define URI_SNPRINTF _snwprintf +#else +# define URI_SNPRINTF swprintf +#endif diff --git a/plugins/uriparser/UriEscape.c b/plugins/uriparser/UriEscape.c new file mode 100644 index 00000000..79ee3a68 --- /dev/null +++ b/plugins/uriparser/UriEscape.c @@ -0,0 +1,453 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriEscape.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriEscape.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks) { + return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks); +} + + + +URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, + const URI_CHAR * inAfterLast, URI_CHAR * out, + UriBool spaceToPlus, UriBool normalizeBreaks) { + const URI_CHAR * read = inFirst; + URI_CHAR * write = out; + UriBool prevWasCr = URI_FALSE; + if ((out == NULL) || (inFirst == out)) { + return NULL; + } else if (inFirst == NULL) { + if (out != NULL) { + out[0] = _UT('\0'); + } + return out; + } + + for (;;) { + if ((inAfterLast != NULL) && (read >= inAfterLast)) { + write[0] = _UT('\0'); + return write; + } + + switch (read[0]) { + case _UT('\0'): + write[0] = _UT('\0'); + return write; + + case _UT(' '): + if (spaceToPlus) { + write[0] = _UT('+'); + write++; + } else { + write[0] = _UT('%'); + write[1] = _UT('2'); + write[2] = _UT('0'); + write += 3; + } + prevWasCr = URI_FALSE; + break; + + case _UT('a'): /* ALPHA */ + case _UT('A'): + case _UT('b'): + case _UT('B'): + case _UT('c'): + case _UT('C'): + case _UT('d'): + case _UT('D'): + case _UT('e'): + case _UT('E'): + case _UT('f'): + case _UT('F'): + case _UT('g'): + case _UT('G'): + case _UT('h'): + case _UT('H'): + case _UT('i'): + case _UT('I'): + case _UT('j'): + case _UT('J'): + case _UT('k'): + case _UT('K'): + case _UT('l'): + case _UT('L'): + case _UT('m'): + case _UT('M'): + case _UT('n'): + case _UT('N'): + case _UT('o'): + case _UT('O'): + case _UT('p'): + case _UT('P'): + case _UT('q'): + case _UT('Q'): + case _UT('r'): + case _UT('R'): + case _UT('s'): + case _UT('S'): + case _UT('t'): + case _UT('T'): + case _UT('u'): + case _UT('U'): + case _UT('v'): + case _UT('V'): + case _UT('w'): + case _UT('W'): + case _UT('x'): + case _UT('X'): + case _UT('y'): + case _UT('Y'): + case _UT('z'): + case _UT('Z'): + case _UT('0'): /* DIGIT */ + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('-'): /* "-" / "." / "_" / "~" */ + case _UT('.'): + case _UT('_'): + case _UT('~'): + /* Copy unmodified */ + write[0] = read[0]; + write++; + + prevWasCr = URI_FALSE; + break; + + case _UT('\x0a'): + if (normalizeBreaks) { + if (!prevWasCr) { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write[3] = _UT('%'); + write[4] = _UT('0'); + write[5] = _UT('A'); + write += 6; + } + } else { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('A'); + write += 3; + } + prevWasCr = URI_FALSE; + break; + + case _UT('\x0d'): + if (normalizeBreaks) { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write[3] = _UT('%'); + write[4] = _UT('0'); + write[5] = _UT('A'); + write += 6; + } else { + write[0] = _UT('%'); + write[1] = _UT('0'); + write[2] = _UT('D'); + write += 3; + } + prevWasCr = URI_TRUE; + break; + + default: + /* Percent encode */ + { + const unsigned char code = (unsigned char)read[0]; + write[0] = _UT('%'); + write[1] = URI_FUNC(HexToLetter)(code >> 4); + write[2] = URI_FUNC(HexToLetter)(code & 0x0f); + write += 3; + } + prevWasCr = URI_FALSE; + break; + } + + read++; + } +} + + + +const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) { + return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH); +} + + + +const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, + UriBool plusToSpace, UriBreakConversion breakConversion) { + URI_CHAR * read = inout; + URI_CHAR * write = inout; + UriBool prevWasCr = URI_FALSE; + + if (inout == NULL) { + return NULL; + } + + for (;;) { + switch (read[0]) { + case _UT('\0'): + if (read > write) { + write[0] = _UT('\0'); + } + return write; + + case _UT('%'): + switch (read[1]) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + switch (read[2]) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + case _UT('a'): + case _UT('b'): + case _UT('c'): + case _UT('d'): + case _UT('e'): + case _UT('f'): + case _UT('A'): + case _UT('B'): + case _UT('C'): + case _UT('D'): + case _UT('E'): + case _UT('F'): + { + /* Percent group found */ + const unsigned char left = URI_FUNC(HexdigToInt)(read[1]); + const unsigned char right = URI_FUNC(HexdigToInt)(read[2]); + const int code = 16 * left + right; + switch (code) { + case 10: + switch (breakConversion) { + case URI_BR_TO_LF: + if (!prevWasCr) { + write[0] = (URI_CHAR)10; + write++; + } + break; + + case URI_BR_TO_CRLF: + if (!prevWasCr) { + write[0] = (URI_CHAR)13; + write[1] = (URI_CHAR)10; + write += 2; + } + break; + + case URI_BR_TO_CR: + if (!prevWasCr) { + write[0] = (URI_CHAR)13; + write++; + } + break; + + case URI_BR_DONT_TOUCH: + default: + write[0] = (URI_CHAR)10; + write++; + + } + prevWasCr = URI_FALSE; + break; + + case 13: + switch (breakConversion) { + case URI_BR_TO_LF: + write[0] = (URI_CHAR)10; + write++; + break; + + case URI_BR_TO_CRLF: + write[0] = (URI_CHAR)13; + write[1] = (URI_CHAR)10; + write += 2; + break; + + case URI_BR_TO_CR: + write[0] = (URI_CHAR)13; + write++; + break; + + case URI_BR_DONT_TOUCH: + default: + write[0] = (URI_CHAR)13; + write++; + + } + prevWasCr = URI_TRUE; + break; + + default: + write[0] = (URI_CHAR)(code); + write++; + + prevWasCr = URI_FALSE; + + } + read += 3; + } + break; + + default: + /* Copy two chars unmodified and */ + /* look at this char again */ + if (read > write) { + write[0] = read[0]; + write[1] = read[1]; + } + read += 2; + write += 2; + + prevWasCr = URI_FALSE; + } + break; + + default: + /* Copy one char unmodified and */ + /* look at this char again */ + if (read > write) { + write[0] = read[0]; + } + read++; + write++; + + prevWasCr = URI_FALSE; + } + break; + + case _UT('+'): + if (plusToSpace) { + /* Convert '+' to ' ' */ + write[0] = _UT(' '); + } else { + /* Copy one char unmodified */ + if (read > write) { + write[0] = read[0]; + } + } + read++; + write++; + + prevWasCr = URI_FALSE; + break; + + default: + /* Copy one char unmodified */ + if (read > write) { + write[0] = read[0]; + } + read++; + write++; + + prevWasCr = URI_FALSE; + } + } +} + + + +#endif diff --git a/plugins/uriparser/UriFile.c b/plugins/uriparser/UriFile.c new file mode 100644 index 00000000..9cf788fc --- /dev/null +++ b/plugins/uriparser/UriFile.c @@ -0,0 +1,226 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriFile.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriFile.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +#endif + + + +static URI_INLINE int URI_FUNC(FilenameToUriString)(const URI_CHAR * filename, + URI_CHAR * uriString, UriBool fromUnix) { + const URI_CHAR * input = filename; + const URI_CHAR * lastSep = input - 1; + UriBool firstSegment = URI_TRUE; + URI_CHAR * output = uriString; + UriBool absolute; + UriBool is_windows_network; + + if ((filename == NULL) || (uriString == NULL)) { + return URI_ERROR_NULL; + } + + is_windows_network = (filename[0] == _UT('\\')) && (filename[1] == _UT('\\')); + absolute = fromUnix + ? (filename[0] == _UT('/')) + : ((filename[0] != _UT('\0')) && (filename[1] == _UT(':')) + || is_windows_network); + + if (absolute) { + const URI_CHAR * const prefix = fromUnix + ? _UT("file://") + : is_windows_network + ? _UT("file:") + : _UT("file:///"); + const int prefixLen = URI_STRLEN(prefix); + + /* Copy prefix */ + memcpy(uriString, prefix, prefixLen * sizeof(URI_CHAR)); + output += prefixLen; + } + + /* Copy and escape on the fly */ + for (;;) { + if ((input[0] == _UT('\0')) + || (fromUnix && input[0] == _UT('/')) + || (!fromUnix && input[0] == _UT('\\'))) { + /* Copy text after last seperator */ + if (lastSep + 1 < input) { + if (!fromUnix && absolute && (firstSegment == URI_TRUE)) { + /* Quick hack to not convert "C:" to "C%3A" */ + const int charsToCopy = (int)(input - (lastSep + 1)); + memcpy(output, lastSep + 1, charsToCopy * sizeof(URI_CHAR)); + output += charsToCopy; + } else { + output = URI_FUNC(EscapeEx)(lastSep + 1, input, output, + URI_FALSE, URI_FALSE); + } + } + firstSegment = URI_FALSE; + } + + if (input[0] == _UT('\0')) { + output[0] = _UT('\0'); + break; + } else if (fromUnix && (input[0] == _UT('/'))) { + /* Copy separators unmodified */ + output[0] = _UT('/'); + output++; + lastSep = input; + } else if (!fromUnix && (input[0] == _UT('\\'))) { + /* Convert backslashes to forward slashes */ + output[0] = _UT('/'); + output++; + lastSep = input; + } + input++; + } + + return URI_SUCCESS; +} + + + +static URI_INLINE int URI_FUNC(UriStringToFilename)(const URI_CHAR * uriString, + URI_CHAR * filename, UriBool toUnix) { + if ((uriString == NULL) || (filename == NULL)) { + return URI_ERROR_NULL; + } + + { + const UriBool file_two_slashes = + URI_STRNCMP(uriString, _UT("file://"), URI_STRLEN(_UT("file://"))) == 0; + const UriBool file_three_slashes = file_two_slashes + && (URI_STRNCMP(uriString, _UT("file:///"), URI_STRLEN(_UT("file:///"))) == 0); + + const size_t charsToSkip = file_two_slashes + ? file_three_slashes + ? toUnix + /* file:///bin/bash */ + ? URI_STRLEN(_UT("file://")) + /* file:///E:/Documents%20and%20Settings */ + : URI_STRLEN(_UT("file:///")) + /* file://Server01/Letter.txt */ + : URI_STRLEN(_UT("file://")) + : 0; + const size_t charsToCopy = URI_STRLEN(uriString + charsToSkip) + 1; + + const UriBool is_windows_network_with_authority = + (toUnix == URI_FALSE) + && file_two_slashes + && ! file_three_slashes; + + URI_CHAR * const unescape_target = is_windows_network_with_authority + ? (filename + 2) + : filename; + + if (is_windows_network_with_authority) { + filename[0] = '\\'; + filename[1] = '\\'; + } + + memcpy(unescape_target, uriString + charsToSkip, charsToCopy * sizeof(URI_CHAR)); + URI_FUNC(UnescapeInPlaceEx)(filename, URI_FALSE, URI_BR_DONT_TOUCH); + } + + /* Convert forward slashes to backslashes */ + if (!toUnix) { + URI_CHAR * walker = filename; + while (walker[0] != _UT('\0')) { + if (walker[0] == _UT('/')) { + walker[0] = _UT('\\'); + } + walker++; + } + } + + return URI_SUCCESS; +} + + + +int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) { + return URI_FUNC(FilenameToUriString)(filename, uriString, URI_TRUE); +} + + + +int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) { + return URI_FUNC(FilenameToUriString)(filename, uriString, URI_FALSE); +} + + + +int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, URI_CHAR * filename) { + return URI_FUNC(UriStringToFilename)(uriString, filename, URI_TRUE); +} + + + +int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, URI_CHAR * filename) { + return URI_FUNC(UriStringToFilename)(uriString, filename, URI_FALSE); +} + + + +#endif diff --git a/plugins/uriparser/UriIp4.c b/plugins/uriparser/UriIp4.c new file mode 100644 index 00000000..c00910bb --- /dev/null +++ b/plugins/uriparser/UriIp4.c @@ -0,0 +1,329 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4.c + * Holds the IPv4 parser implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriIp4.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriIp4.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriIp4Base.h" +# include +#endif + + + +/* Prototypes */ +static const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +/* + * [ipFourAddress]->[decOctet]<.>[decOctet]<.>[decOctet]<.>[decOctet] + */ +int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput, + const URI_CHAR * first, const URI_CHAR * afterLast) { + const URI_CHAR * after; + UriIp4Parser parser; + + /* Essential checks */ + if ((octetOutput == NULL) || (first == NULL) + || (afterLast <= first)) { + return URI_ERROR_SYNTAX; + } + + /* Reset parser */ + parser.stackCount = 0; + + /* Octet #1 */ + after = URI_FUNC(ParseDecOctet)(&parser, first, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput); + + /* Octet #2 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 1); + + /* Octet #3 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 2); + + /* Octet #4 */ + after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast); + if (after != afterLast) { + return URI_ERROR_SYNTAX; + } + uriStackToOctet(&parser, octetOutput + 3); + + return URI_SUCCESS; +} + + + +/* + * [decOctet]-><0> + * [decOctet]-><1>[decOctetOne] + * [decOctet]-><2>[decOctetTwo] + * [decOctet]-><3>[decOctetThree] + * [decOctet]-><4>[decOctetThree] + * [decOctet]-><5>[decOctetThree] + * [decOctet]-><6>[decOctetThree] + * [decOctet]-><7>[decOctetThree] + * [decOctet]-><8>[decOctetThree] + * [decOctet]-><9>[decOctetThree] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return NULL; + } + + switch (*first) { + case _UT('0'): + uriPushToStack(parser, 0); + return first + 1; + + case _UT('1'): + uriPushToStack(parser, 1); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetOne)(parser, first + 1, afterLast); + + case _UT('2'): + uriPushToStack(parser, 2); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetTwo)(parser, first + 1, afterLast); + + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + default: + return NULL; + } +} + + + +/* + * [decOctetOne]-> + * [decOctetOne]->[DIGIT][decOctetThree] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [decOctetTwo]-> + * [decOctetTwo]-><0>[decOctetThree] + * [decOctetTwo]-><1>[decOctetThree] + * [decOctetTwo]-><2>[decOctetThree] + * [decOctetTwo]-><3>[decOctetThree] + * [decOctetTwo]-><4>[decOctetThree] + * [decOctetTwo]-><5>[decOctetFour] + * [decOctetTwo]-><6> + * [decOctetTwo]-><7> + * [decOctetTwo]-><8> + * [decOctetTwo]-><9> +*/ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast); + + case _UT('5'): + uriPushToStack(parser, 5); + return (const URI_CHAR *)URI_FUNC(ParseDecOctetFour)(parser, first + 1, afterLast); + + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +/* + * [decOctetThree]-> + * [decOctetThree]->[DIGIT] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + case _UT('6'): + case _UT('7'): + case _UT('8'): + case _UT('9'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +/* + * [decOctetFour]-> + * [decOctetFour]-><0> + * [decOctetFour]-><1> + * [decOctetFour]-><2> + * [decOctetFour]-><3> + * [decOctetFour]-><4> + * [decOctetFour]-><5> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser, + const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('0'): + case _UT('1'): + case _UT('2'): + case _UT('3'): + case _UT('4'): + case _UT('5'): + uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9'))); + return first + 1; + + default: + return first; + } +} + + + +#endif diff --git a/plugins/uriparser/UriIp4.h b/plugins/uriparser/UriIp4.h new file mode 100644 index 00000000..885a35ca --- /dev/null +++ b/plugins/uriparser/UriIp4.h @@ -0,0 +1,92 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4.h + * Holds the IPv4 parser interface. + * NOTE: This header includes itself twice. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_IP4_TWICE_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include "UriDefsConfig.h" +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriIp4.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriIp4.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_IP4_TWICE_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_IP4_TWICE_H_ANSI 1 +# include "UriDefsAnsi.h" +# else +# define URI_IP4_TWICE_H_UNICODE 1 +# include "UriDefsUnicode.h" +# include +# endif + + + +/** + * Converts a IPv4 text representation into four bytes. + * + * @param octetOutput Output destination + * @param first First character of IPv4 text to parse + * @param afterLast Position to stop parsing at + * @return Error code or 0 on success + */ +int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput, + const URI_CHAR * first, const URI_CHAR * afterLast); + + + +#endif +#endif diff --git a/plugins/uriparser/UriIp4Base.c b/plugins/uriparser/UriIp4Base.c new file mode 100644 index 00000000..5cd298fa --- /dev/null +++ b/plugins/uriparser/UriIp4Base.c @@ -0,0 +1,96 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriIp4Base.c + * Holds code independent of the encoding pass. + */ + +#ifndef URI_DOXYGEN +# include "UriIp4Base.h" +#endif + + + +void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet) { + switch (parser->stackCount) { + case 1: + *octet = parser->stackOne; + break; + + case 2: + *octet = parser->stackOne * 10 + + parser->stackTwo; + break; + + case 3: + *octet = parser->stackOne * 100 + + parser->stackTwo * 10 + + parser->stackThree; + break; + + default: + ; + } + parser->stackCount = 0; +} + + + +void uriPushToStack(UriIp4Parser * parser, unsigned char digit) { + switch (parser->stackCount) { + case 0: + parser->stackOne = digit; + parser->stackCount = 1; + break; + + case 1: + parser->stackTwo = digit; + parser->stackCount = 2; + break; + + case 2: + parser->stackThree = digit; + parser->stackCount = 3; + break; + + default: + ; + } +} diff --git a/plugins/uriparser/UriIp4Base.h b/plugins/uriparser/UriIp4Base.h new file mode 100644 index 00000000..23b838e2 --- /dev/null +++ b/plugins/uriparser/UriIp4Base.h @@ -0,0 +1,59 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_IP4_BASE_H +#define URI_IP4_BASE_H 1 + + + +typedef struct UriIp4ParserStruct { + unsigned char stackCount; + unsigned char stackOne; + unsigned char stackTwo; + unsigned char stackThree; +} UriIp4Parser; + + + +void uriPushToStack(UriIp4Parser * parser, unsigned char digit); +void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet); + + + +#endif /* URI_IP4_BASE_H */ diff --git a/plugins/uriparser/UriNormalize.c b/plugins/uriparser/UriNormalize.c new file mode 100644 index 00000000..49db9fff --- /dev/null +++ b/plugins/uriparser/UriNormalize.c @@ -0,0 +1,728 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriNormalize.c + * Holds the RFC 3986 %URI normalization implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriNormalize.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriNormalize.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriNormalizeBase.h" +# include "UriCommon.h" +#endif + + + +static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, + unsigned int * outMask); + +static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, + unsigned int maskTest, URI_TYPE(TextRange) * range); +static UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri, + unsigned int * doneMask); + +static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, + const URI_CHAR ** afterLast); +static UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast); +static void URI_FUNC(FixPercentEncodingEngine)( + const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, + const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast); + +static UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, + const URI_CHAR * afterLast); +static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast); + +static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, + const URI_CHAR * afterLast); +static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast); + +static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, + unsigned int revertMask); + + + +static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, + unsigned int revertMask) { + if (revertMask & URI_NORMALIZE_SCHEME) { + free((URI_CHAR *)uri->scheme.first); + uri->scheme.first = NULL; + uri->scheme.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_USER_INFO) { + free((URI_CHAR *)uri->userInfo.first); + uri->userInfo.first = NULL; + uri->userInfo.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_HOST) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + free((URI_CHAR *)uri->hostData.ipFuture.first); + uri->hostData.ipFuture.first = NULL; + uri->hostData.ipFuture.afterLast = NULL; + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } else if ((uri->hostText.first != NULL) + && (uri->hostData.ip4 == NULL) + && (uri->hostData.ip6 == NULL)) { + /* Regname */ + free((URI_CHAR *)uri->hostText.first); + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + } + + /* NOTE: Port cannot happen! */ + + if (revertMask & URI_NORMALIZE_PATH) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + while (walker != NULL) { + URI_TYPE(PathSegment) * const next = walker->next; + if (walker->text.afterLast > walker->text.first) { + free((URI_CHAR *)walker->text.first); + } + free(walker); + walker = next; + } + uri->pathHead = NULL; + uri->pathTail = NULL; + } + + if (revertMask & URI_NORMALIZE_QUERY) { + free((URI_CHAR *)uri->query.first); + uri->query.first = NULL; + uri->query.afterLast = NULL; + } + + if (revertMask & URI_NORMALIZE_FRAGMENT) { + free((URI_CHAR *)uri->fragment.first); + uri->fragment.first = NULL; + uri->fragment.afterLast = NULL; + } +} + + + +static URI_INLINE UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + const URI_CHAR * i = first; + for (; i < afterLast; i++) { + /* 6.2.2.1 Case Normalization: uppercase letters in scheme or host */ + if ((*i >= _UT('A')) && (*i <= _UT('Z'))) { + return URI_TRUE; + } + } + } + return URI_FALSE; +} + + + +static URI_INLINE UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + const URI_CHAR * i = first; + for (; i + 2 < afterLast; i++) { + if (i[0] == _UT('%')) { + /* 6.2.2.1 Case Normalization: * + * lowercase percent-encodings */ + if (((i[1] >= _UT('a')) && (i[1] <= _UT('f'))) + || ((i[2] >= _UT('a')) && (i[2] <= _UT('f')))) { + return URI_TRUE; + } else { + /* 6.2.2.2 Percent-Encoding Normalization: * + * percent-encoded unreserved characters */ + const unsigned char left = URI_FUNC(HexdigToInt)(i[1]); + const unsigned char right = URI_FUNC(HexdigToInt)(i[2]); + const int code = 16 * left + right; + if (uriIsUnreserved(code)) { + return URI_TRUE; + } + } + } + } + } + return URI_FALSE; +} + + + +static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, + const URI_CHAR * afterLast) { + if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { + URI_CHAR * i = (URI_CHAR *)first; + const int lowerUpperDiff = (_UT('a') - _UT('A')); + for (; i < afterLast; i++) { + if ((*i >= _UT('A')) && (*i <=_UT('Z'))) { + *i = (URI_CHAR)(*i + lowerUpperDiff); + } + } + } +} + + + +static URI_INLINE UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast) { + int lenInChars; + const int lowerUpperDiff = (_UT('a') - _UT('A')); + URI_CHAR * buffer; + int i = 0; + + if ((first == NULL) || (afterLast == NULL) || (*first == NULL) + || (*afterLast == NULL)) { + return URI_FALSE; + } + + lenInChars = (int)(*afterLast - *first); + if (lenInChars == 0) { + return URI_TRUE; + } else if (lenInChars < 0) { + return URI_FALSE; + } + + buffer = malloc(lenInChars * sizeof(URI_CHAR)); + if (buffer == NULL) { + return URI_FALSE; + } + + for (; i < lenInChars; i++) { + if (((*first)[i] >= _UT('A')) && ((*first)[i] <=_UT('Z'))) { + buffer[i] = (URI_CHAR)((*first)[i] + lowerUpperDiff); + } else { + buffer[i] = (*first)[i]; + } + } + + *first = buffer; + *afterLast = buffer + lenInChars; + return URI_TRUE; +} + + + +/* NOTE: Implementation must stay inplace-compatible */ +static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)( + const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, + const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast) { + URI_CHAR * write = (URI_CHAR *)outFirst; + const int lenInChars = (int)(inAfterLast - inFirst); + int i = 0; + + /* All but last two */ + for (; i + 2 < lenInChars; i++) { + if (inFirst[i] != _UT('%')) { + write[0] = inFirst[i]; + write++; + } else { + /* 6.2.2.2 Percent-Encoding Normalization: * + * percent-encoded unreserved characters */ + const URI_CHAR one = inFirst[i + 1]; + const URI_CHAR two = inFirst[i + 2]; + const unsigned char left = URI_FUNC(HexdigToInt)(one); + const unsigned char right = URI_FUNC(HexdigToInt)(two); + const int code = 16 * left + right; + if (uriIsUnreserved(code)) { + write[0] = (URI_CHAR)(code); + write++; + } else { + /* 6.2.2.1 Case Normalization: * + * lowercase percent-encodings */ + write[0] = _UT('%'); + write[1] = URI_FUNC(HexToLetter)(left); + write[2] = URI_FUNC(HexToLetter)(right); + write += 3; + } + + i += 2; /* For the two chars of the percent group we just ate */ + } + } + + /* Last two */ + for (; i < lenInChars; i++) { + write[0] = inFirst[i]; + write++; + } + + *outAfterLast = write; +} + + + +static URI_INLINE void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, + const URI_CHAR ** afterLast) { + /* Death checks */ + if ((first == NULL) || (afterLast == NULL) || (*afterLast == NULL)) { + return; + } + + /* Fix inplace */ + URI_FUNC(FixPercentEncodingEngine)(first, *afterLast, first, afterLast); +} + + + +static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, + const URI_CHAR ** afterLast) { + int lenInChars; + URI_CHAR * buffer; + + /* Death checks */ + if ((first == NULL) || (afterLast == NULL) + || (*first == NULL) || (*afterLast == NULL)) { + return URI_FALSE; + } + + /* Old text length */ + lenInChars = (int)(*afterLast - *first); + if (lenInChars == 0) { + return URI_TRUE; + } else if (lenInChars < 0) { + return URI_FALSE; + } + + /* New buffer */ + buffer = malloc(lenInChars * sizeof(URI_CHAR)); + if (buffer == NULL) { + return URI_FALSE; + } + + /* Fix on copy */ + URI_FUNC(FixPercentEncodingEngine)(*first, *afterLast, buffer, afterLast); + *first = buffer; + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, + unsigned int maskTest, URI_TYPE(TextRange) * range) { + if (((*doneMask & maskTest) == 0) + && (range->first != NULL) + && (range->afterLast != NULL) + && (range->afterLast > range->first)) { + const int lenInChars = (int)(range->afterLast - range->first); + const int lenInBytes = lenInChars * sizeof(URI_CHAR); + URI_CHAR * dup = malloc(lenInBytes); + if (dup == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + memcpy(dup, range->first, lenInBytes); + range->first = dup; + range->afterLast = dup + lenInChars; + *doneMask |= maskTest; + } + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri, + unsigned int * doneMask) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME, + &(uri->scheme)) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO, + &(uri->userInfo)) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY, + &(uri->query)) + || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT, + &(uri->fragment))) { + return URI_FALSE; /* Raises malloc error */ + } + + /* Host */ + if ((*doneMask & URI_NORMALIZE_HOST) == 0) { + if ((uri->hostData.ip4 == NULL) + && (uri->hostData.ip6 == NULL)) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, + &(uri->hostData.ipFuture))) { + return URI_FALSE; /* Raises malloc error */ + } + uri->hostText.first = uri->hostData.ipFuture.first; + uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; + } else if (uri->hostText.first != NULL) { + /* Regname */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, + &(uri->hostText))) { + return URI_FALSE; /* Raises malloc error */ + } + } + } + } + + /* Path */ + if ((*doneMask & URI_NORMALIZE_PATH) == 0) { + while (walker != NULL) { + if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text))) { + /* Free allocations done so far and kill path */ + + /* Kill path to one before walker (if any) */ + URI_TYPE(PathSegment) * ranger = uri->pathHead; + while (ranger != walker) { + URI_TYPE(PathSegment) * const next = ranger->next; + if ((ranger->text.first != NULL) + && (ranger->text.afterLast != NULL) + && (ranger->text.afterLast > ranger->text.first)) { + free((URI_CHAR *)ranger->text.first); + free(ranger); + } + ranger = next; + } + + /* Kill path from walker */ + while (walker != NULL) { + URI_TYPE(PathSegment) * const next = walker->next; + free(walker); + walker = next; + } + + uri->pathHead = NULL; + uri->pathTail = NULL; + return URI_FALSE; /* Raises malloc error */ + } + walker = walker->next; + } + *doneMask |= URI_NORMALIZE_PATH; + } + + /* Port text, must come last so we don't have to undo that one if it fails. * + * Otherwise we would need and extra enum flag for it although the port * + * cannot go unnormalized... */ + if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText))) { + return URI_FALSE; /* Raises malloc error */ + } + + return URI_TRUE; +} + + + +unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri) { + unsigned int res; +#if defined(__GNUC__) && ((__GNUC__ > 4) \ + || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2))) + /* Slower code that fixes a warning, not sure if this is a smart idea */ + URI_TYPE(Uri) writeableClone; + memcpy(&writeableClone, uri, 1 * sizeof(URI_TYPE(Uri))); + URI_FUNC(NormalizeSyntaxEngine)(&writeableClone, 0, &res); +#else + URI_FUNC(NormalizeSyntaxEngine)((URI_TYPE(Uri) *)uri, 0, &res); +#endif + return res; +} + + + +int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask) { + return URI_FUNC(NormalizeSyntaxEngine)(uri, mask, NULL); +} + + + +int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) { + return URI_FUNC(NormalizeSyntaxEx)(uri, (unsigned int)-1); +} + + + +static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, unsigned int * outMask) { + unsigned int doneMask = URI_NORMALIZED; + if (uri == NULL) { + if (outMask != NULL) { + *outMask = URI_NORMALIZED; + return URI_SUCCESS; + } else { + return URI_ERROR_NULL; + } + } + + if (outMask != NULL) { + /* Reset mask */ + *outMask = URI_NORMALIZED; + } else if (inMask == URI_NORMALIZED) { + /* Nothing to do */ + return URI_SUCCESS; + } + + /* Scheme, host */ + if (outMask != NULL) { + const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)( + uri->scheme.first, uri->scheme.afterLast); + const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)( + uri->hostText.first, uri->hostText.afterLast); + if (normalizeScheme) { + *outMask |= URI_NORMALIZE_SCHEME; + } + + if (normalizeHostCase) { + *outMask |= URI_NORMALIZE_HOST; + } else { + const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)( + uri->hostText.first, uri->hostText.afterLast); + if (normalizeHostPrecent) { + *outMask |= URI_NORMALIZE_HOST; + } + } + } else { + /* Scheme */ + if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) { + if (uri->owner) { + URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast); + } else { + if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_SCHEME; + } + } + + /* Host */ + if (inMask & URI_NORMALIZE_HOST) { + if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + if (uri->owner) { + URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first, + uri->hostData.ipFuture.afterLast); + } else { + if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first), + &(uri->hostData.ipFuture.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_HOST; + } + uri->hostText.first = uri->hostData.ipFuture.first; + uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; + } else if ((uri->hostText.first != NULL) + && (uri->hostData.ip4 == NULL) + && (uri->hostData.ip6 == NULL)) { + /* Regname */ + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first, + &(uri->hostText.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)( + &(uri->hostText.first), + &(uri->hostText.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_HOST; + } + + URI_FUNC(LowercaseInplace)(uri->hostText.first, + uri->hostText.afterLast); + } + } + } + + /* User info */ + if (outMask != NULL) { + const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)( + uri->userInfo.first, uri->userInfo.afterLast); + if (normalizeUserInfo) { + *outMask |= URI_NORMALIZE_USER_INFO; + } + } else { + if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first), + &(uri->userInfo.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_USER_INFO; + } + } + } + + /* Path */ + if (outMask != NULL) { + const URI_TYPE(PathSegment) * walker = uri->pathHead; + while (walker != NULL) { + const URI_CHAR * const first = walker->text.first; + const URI_CHAR * const afterLast = walker->text.afterLast; + if ((first != NULL) + && (afterLast != NULL) + && (afterLast > first) + && ( + (((afterLast - first) == 1) + && (first[0] == _UT('.'))) + || + (((afterLast - first) == 2) + && (first[0] == _UT('.')) + && (first[1] == _UT('.'))) + || + URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast) + )) { + *outMask |= URI_NORMALIZE_PATH; + break; + } + walker = walker->next; + } + } else if (inMask & URI_NORMALIZE_PATH) { + URI_TYPE(PathSegment) * walker; + const UriBool relative = ((uri->scheme.first == NULL) + && !uri->absolutePath) ? URI_TRUE : URI_FALSE; + + /* Fix percent-encoding for each segment */ + walker = uri->pathHead; + if (uri->owner) { + while (walker != NULL) { + URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast)); + walker = walker->next; + } + } else { + while (walker != NULL) { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first), + &(walker->text.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + walker = walker->next; + } + doneMask |= URI_NORMALIZE_PATH; + } + + /* 6.2.2.3 Path Segment Normalization */ + if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative, + (uri->owner == URI_TRUE) + || ((doneMask & URI_NORMALIZE_PATH) != 0) + )) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + URI_FUNC(FixEmptyTrailSegment)(uri); + } + + /* Query, fragment */ + if (outMask != NULL) { + const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)( + uri->query.first, uri->query.afterLast); + const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)( + uri->fragment.first, uri->fragment.afterLast); + if (normalizeQuery) { + *outMask |= URI_NORMALIZE_QUERY; + } + + if (normalizeFragment) { + *outMask |= URI_NORMALIZE_FRAGMENT; + } + } else { + /* Query */ + if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first), + &(uri->query.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_QUERY; + } + } + + /* Fragment */ + if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) { + if (uri->owner) { + URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast)); + } else { + if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first), + &(uri->fragment.afterLast))) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + doneMask |= URI_NORMALIZE_FRAGMENT; + } + } + } + + /* Dup all not duped yet */ + if ((outMask == NULL) && !uri->owner) { + if (!URI_FUNC(MakeOwner)(uri, &doneMask)) { + URI_FUNC(PreventLeakage)(uri, doneMask); + return URI_ERROR_MALLOC; + } + uri->owner = URI_TRUE; + } + + return URI_SUCCESS; +} + + + +#endif diff --git a/plugins/uriparser/UriNormalizeBase.c b/plugins/uriparser/UriNormalizeBase.c new file mode 100644 index 00000000..bac58886 --- /dev/null +++ b/plugins/uriparser/UriNormalizeBase.c @@ -0,0 +1,119 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_DOXYGEN +# include "UriNormalizeBase.h" +#endif + + + +UriBool uriIsUnreserved(int code) { + switch (code) { + case L'a': /* ALPHA */ + case L'A': + case L'b': + case L'B': + case L'c': + case L'C': + case L'd': + case L'D': + case L'e': + case L'E': + case L'f': + case L'F': + case L'g': + case L'G': + case L'h': + case L'H': + case L'i': + case L'I': + case L'j': + case L'J': + case L'k': + case L'K': + case L'l': + case L'L': + case L'm': + case L'M': + case L'n': + case L'N': + case L'o': + case L'O': + case L'p': + case L'P': + case L'q': + case L'Q': + case L'r': + case L'R': + case L's': + case L'S': + case L't': + case L'T': + case L'u': + case L'U': + case L'v': + case L'V': + case L'w': + case L'W': + case L'x': + case L'X': + case L'y': + case L'Y': + case L'z': + case L'Z': + case L'0': /* DIGIT */ + case L'1': + case L'2': + case L'3': + case L'4': + case L'5': + case L'6': + case L'7': + case L'8': + case L'9': + case L'-': /* "-" / "." / "_" / "~" */ + case L'.': + case L'_': + case L'~': + return URI_TRUE; + + default: + return URI_FALSE; + } +} diff --git a/plugins/uriparser/UriNormalizeBase.h b/plugins/uriparser/UriNormalizeBase.h new file mode 100644 index 00000000..9f3abd55 --- /dev/null +++ b/plugins/uriparser/UriNormalizeBase.h @@ -0,0 +1,53 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_NORMALIZE_BASE_H +#define URI_NORMALIZE_BASE_H 1 + + + +#include + + + +UriBool uriIsUnreserved(int code); + + + +#endif /* URI_NORMALIZE_BASE_H */ diff --git a/plugins/uriparser/UriParse.c b/plugins/uriparser/UriParse.c new file mode 100644 index 00000000..e3cdc68d --- /dev/null +++ b/plugins/uriparser/UriParse.c @@ -0,0 +1,2241 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriParse.c + * Holds the RFC 3986 %URI parsing implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriParse.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriParse.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include +# include "UriCommon.h" +# include "UriParseBase.h" +#endif + + + +#define URI_SET_DIGIT \ + _UT('0'): \ + case _UT('1'): \ + case _UT('2'): \ + case _UT('3'): \ + case _UT('4'): \ + case _UT('5'): \ + case _UT('6'): \ + case _UT('7'): \ + case _UT('8'): \ + case _UT('9') + +#define URI_SET_HEX_LETTER_UPPER \ + _UT('A'): \ + case _UT('B'): \ + case _UT('C'): \ + case _UT('D'): \ + case _UT('E'): \ + case _UT('F') + +#define URI_SET_HEX_LETTER_LOWER \ + _UT('a'): \ + case _UT('b'): \ + case _UT('c'): \ + case _UT('d'): \ + case _UT('e'): \ + case _UT('f') + +#define URI_SET_HEXDIG \ + URI_SET_DIGIT: \ + case URI_SET_HEX_LETTER_UPPER: \ + case URI_SET_HEX_LETTER_LOWER + +#define URI_SET_ALPHA \ + URI_SET_HEX_LETTER_UPPER: \ + case URI_SET_HEX_LETTER_LOWER: \ + case _UT('g'): \ + case _UT('G'): \ + case _UT('h'): \ + case _UT('H'): \ + case _UT('i'): \ + case _UT('I'): \ + case _UT('j'): \ + case _UT('J'): \ + case _UT('k'): \ + case _UT('K'): \ + case _UT('l'): \ + case _UT('L'): \ + case _UT('m'): \ + case _UT('M'): \ + case _UT('n'): \ + case _UT('N'): \ + case _UT('o'): \ + case _UT('O'): \ + case _UT('p'): \ + case _UT('P'): \ + case _UT('q'): \ + case _UT('Q'): \ + case _UT('r'): \ + case _UT('R'): \ + case _UT('s'): \ + case _UT('S'): \ + case _UT('t'): \ + case _UT('T'): \ + case _UT('u'): \ + case _UT('U'): \ + case _UT('v'): \ + case _UT('V'): \ + case _UT('w'): \ + case _UT('W'): \ + case _UT('x'): \ + case _UT('X'): \ + case _UT('y'): \ + case _UT('Y'): \ + case _UT('z'): \ + case _UT('Z') + + + +static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); +static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); + +static UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first); +static UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first); +static UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first); +static UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first); +static void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state); + +static void URI_FUNC(ResetParserState)(URI_TYPE(ParserState) * state); + +static UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); + +static void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, const URI_CHAR * errorPos); +static void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state); + + + +static URI_INLINE void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, + const URI_CHAR * errorPos) { + URI_FUNC(FreeUriMembers)(state->uri); + state->errorPos = errorPos; + state->errorCode = URI_ERROR_SYNTAX; +} + + + +static URI_INLINE void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state) { + URI_FUNC(FreeUriMembers)(state->uri); + state->errorPos = NULL; + state->errorCode = URI_ERROR_MALLOC; +} + + + +/* + * [authority]-><[>[ipLit2][authorityTwo] + * [authority]->[ownHostUserInfoNz] + * [authority]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + /* "" regname host */ + state->uri->hostText.first = URI_FUNC(SafeToPointTo); + state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo); + return afterLast; + } + + switch (*first) { + case _UT('['): + { + const URI_CHAR * const afterIpLit2 + = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast); + if (afterIpLit2 == NULL) { + return NULL; + } + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast); + } + + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + state->uri->userInfo.first = first; /* USERINFO BEGIN */ + return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast); + + default: + /* "" regname host */ + state->uri->hostText.first = URI_FUNC(SafeToPointTo); + state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo); + return first; + } +} + + + +/* + * [authorityTwo]-><:>[port] + * [authorityTwo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT(':'): + { + const URI_CHAR * const afterPort = URI_FUNC(ParsePort)(state, first + 1, afterLast); + if (afterPort == NULL) { + return NULL; + } + state->uri->portText.first = first + 1; /* PORT BEGIN */ + state->uri->portText.afterLast = afterPort; /* PORT END */ + return afterPort; + } + + default: + return first; + } +} + + + +/* + * [hexZero]->[HEXDIG][hexZero] + * [hexZero]-> + */ +static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_HEXDIG: + return URI_FUNC(ParseHexZero)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [hierPart]->[pathRootless] + * [hierPart]->[partHelperTwo] + * [hierPart]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParsePathRootless)(state, first, afterLast); + + case _UT('/'): + return URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [ipFutLoop]->[subDelims][ipFutStopGo] + * [ipFutLoop]->[unreserved][ipFutStopGo] + * [ipFutLoop]-><:>[ipFutStopGo] + */ +static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast); + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +/* + * [ipFutStopGo]->[ipFutLoop] + * [ipFutStopGo]-> + */ +static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseIpFutLoop)(state, first, afterLast); + + default: + return first; + } +} + + + +/* + * [ipFuture]->[HEXDIG][hexZero]<.>[ipFutLoop] + */ +static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + /* + First character has already been + checked before entering this rule. + + switch (*first) { + case _UT('v'): + */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; + } + + switch (first[1]) { + case URI_SET_HEXDIG: + { + const URI_CHAR * afterIpFutLoop; + const URI_CHAR * const afterHexZero + = URI_FUNC(ParseHexZero)(state, first + 2, afterLast); + if (afterHexZero == NULL) { + return NULL; + } + if ((afterHexZero >= afterLast) + || (*afterHexZero != _UT('.'))) { + URI_FUNC(StopSyntax)(state, afterHexZero); + return NULL; + } + state->uri->hostText.first = first; /* HOST BEGIN */ + state->uri->hostData.ipFuture.first = first; /* IPFUTURE BEGIN */ + afterIpFutLoop = URI_FUNC(ParseIpFutLoop)(state, afterHexZero + 1, afterLast); + if (afterIpFutLoop == NULL) { + return NULL; + } + state->uri->hostText.afterLast = afterIpFutLoop; /* HOST END */ + state->uri->hostData.ipFuture.afterLast = afterIpFutLoop; /* IPFUTURE END */ + return afterIpFutLoop; + } + + default: + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; + } + + /* + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + */ +} + + + +/* + * [ipLit2]->[ipFuture]<]> + * [ipLit2]->[IPv6address2] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('v'): + { + const URI_CHAR * const afterIpFuture + = URI_FUNC(ParseIpFuture)(state, first, afterLast); + if (afterIpFuture == NULL) { + return NULL; + } + if ((afterIpFuture >= afterLast) + || (*afterIpFuture != _UT(']'))) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + return afterIpFuture + 1; + } + + case _UT(':'): + case _UT(']'): + case URI_SET_HEXDIG: + state->uri->hostData.ip6 = malloc(1 * sizeof(UriIp6)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip6 == NULL) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParseIPv6address2)(state, first, afterLast); + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +/* + * [IPv6address2]->..<]> + */ +static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + int zipperEver = 0; + int quadsDone = 0; + int digitCount = 0; + unsigned char digitHistory[4]; + int ip4OctetsDone = 0; + + unsigned char quadsAfterZipper[14]; + int quadsAfterZipperCount = 0; + + + for (;;) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + /* Inside IPv4 part? */ + if (ip4OctetsDone > 0) { + /* Eat rest of IPv4 address */ + for (;;) { + switch (*first) { + case URI_SET_DIGIT: + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + digitHistory[digitCount++] = (unsigned char)(9 + *first - _UT('9')); + break; + + case _UT('.'): + if ((ip4OctetsDone == 4) /* NOTE! */ + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid digit or octet count */ + URI_FUNC(StopSyntax)(state, first); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount); + return NULL; + } else if ((digitCount > 2) + && (digitHistory[1] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount + 1); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2); + } else { + URI_FUNC(StopSyntax)(state, first - 1); + } + return NULL; + } + + /* Copy IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4 + ip4OctetsDone] = uriGetOctetValue(digitHistory, digitCount); + digitCount = 0; + ip4OctetsDone++; + break; + + case _UT(']'): + if ((ip4OctetsDone != 3) /* NOTE! */ + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid digit or octet count */ + URI_FUNC(StopSyntax)(state, first); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount); + return NULL; + } else if ((digitCount > 2) + && (digitHistory[1] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount + 1); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2); + } else { + URI_FUNC(StopSyntax)(state, first - 1); + } + return NULL; + } + + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Copy missing quads right before IPv4 */ + memcpy(state->uri->hostData.ip6->data + 16 - 4 - 2 * quadsAfterZipperCount, + quadsAfterZipper, 2 * quadsAfterZipperCount); + + /* Copy last IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4 + 3] = uriGetOctetValue(digitHistory, digitCount); + + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + first++; + } + } else { + /* Eat while no dot in sight */ + int letterAmong = 0; + int walking = 1; + do { + switch (*first) { + case URI_SET_HEX_LETTER_LOWER: + letterAmong = 1; + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('f')); + digitCount++; + break; + + case URI_SET_HEX_LETTER_UPPER: + letterAmong = 1; + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('F')); + digitCount++; + break; + + case URI_SET_DIGIT: + if (digitCount == 4) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + digitHistory[digitCount] = (unsigned char)(9 + *first - _UT('9')); + digitCount++; + break; + + case _UT(':'): + { + int setZipper = 0; + + if (digitCount > 0) { + if (zipperEver) { + uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount); + quadsAfterZipperCount++; + } else { + uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone); + } + quadsDone++; + digitCount = 0; + } + letterAmong = 0; + + /* Too many quads? */ + if (quadsDone >= 8 - zipperEver) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + /* "::"? */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; + } + if (first[1] == _UT(':')) { + const int resetOffset = 2 * (quadsDone + (digitCount > 0)); + + first++; + if (zipperEver) { + URI_FUNC(StopSyntax)(state, first); + return NULL; /* "::.+::" */ + } + + /* Zero everything after zipper */ + memset(state->uri->hostData.ip6->data + resetOffset, 0, 16 - resetOffset); + setZipper = 1; + + /* ":::+"? */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; /* No ']' yet */ + } + if (first[1] == _UT(':')) { + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; /* ":::+ "*/ + } + } + + if (setZipper) { + zipperEver = 1; + } + } + break; + + case _UT('.'): + if ((quadsDone > 6) /* NOTE */ + || (!zipperEver && (quadsDone < 6)) + || letterAmong + || (digitCount == 0) + || (digitCount == 4)) { + /* Invalid octet before */ + URI_FUNC(StopSyntax)(state, first); + return NULL; + } else if ((digitCount > 1) + && (digitHistory[0] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount); + return NULL; + } else if ((digitCount > 2) + && (digitHistory[1] == 0)) { + /* Leading zero */ + URI_FUNC(StopSyntax)(state, first - digitCount + 1); + return NULL; + } else if ((digitCount == 3) + && (100 * digitHistory[0] + + 10 * digitHistory[1] + + digitHistory[2] > 255)) { + /* Octet value too large */ + if (digitHistory[0] > 2) { + URI_FUNC(StopSyntax)(state, first - 3); + } else if (digitHistory[1] > 5) { + URI_FUNC(StopSyntax)(state, first - 2); + } else { + URI_FUNC(StopSyntax)(state, first - 1); + } + return NULL; + } + + /* Copy first IPv4 octet */ + state->uri->hostData.ip6->data[16 - 4] = uriGetOctetValue(digitHistory, digitCount); + digitCount = 0; + + /* Switch over to IPv4 loop */ + ip4OctetsDone = 1; + walking = 0; + break; + + case _UT(']'): + /* Too little quads? */ + if (!zipperEver && !((quadsDone == 7) && (digitCount > 0))) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + if (digitCount > 0) { + if (zipperEver) { + uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount); + quadsAfterZipperCount++; + } else { + uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone); + } + /* + quadsDone++; + digitCount = 0; + */ + } + + /* Copy missing quads to the end */ + memcpy(state->uri->hostData.ip6->data + 16 - 2 * quadsAfterZipperCount, + quadsAfterZipper, 2 * quadsAfterZipperCount); + + state->uri->hostText.afterLast = first; /* HOST END */ + return first + 1; /* Fine */ + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + first++; + + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; /* No ']' yet */ + } + } while (walking); + } + } +} + + + +/* + * [mustBeSegmentNzNc]->[pctEncoded][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[subDelims][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[unreserved][mustBeSegmentNzNc] + * [mustBeSegmentNzNc]->[uriTail] // can take + * [mustBeSegmentNzNc]->[segment][zeroMoreSlashSegs][uriTail] + * [mustBeSegmentNzNc]-><@>[mustBeSegmentNzNc] + */ +static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return afterLast; + } + + switch (*first) { + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast); + if (afterPctEncoded == NULL) { + return NULL; + } + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast); + } + + case _UT('@'): + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast); + + case _UT('/'): + { + const URI_CHAR * afterZeroMoreSlashSegs; + const URI_CHAR * afterSegment; + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + afterSegment = URI_FUNC(ParseSegment)(state, first + 1, afterLast); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + afterZeroMoreSlashSegs + = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast); + if (afterZeroMoreSlashSegs == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast); + } + + default: + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return URI_FUNC(ParseUriTail)(state, first, afterLast); + } +} + + + +/* + * [ownHost]-><[>[ipLit2][authorityTwo] + * [ownHost]->[ownHost2] // can take + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('['): + { + const URI_CHAR * const afterIpLit2 + = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast); + if (afterIpLit2 == NULL) { + return NULL; + } + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast); + } + + default: + return URI_FUNC(ParseOwnHost2)(state, first, afterLast); + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first) { + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + free(state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownHost2]->[authorityTwo] // can take + * [ownHost2]->[pctSubUnres][ownHost2] + */ +static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnHost2)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnHost2)(state, afterPctSubUnres, afterLast); + } + + default: + if (!URI_FUNC(OnExitOwnHost2)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParseAuthorityTwo)(state, first, afterLast); + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first) { + state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */ + state->uri->userInfo.first = NULL; /* Not a userInfo, reset */ + state->uri->hostText.afterLast = first; /* HOST END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + free(state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownHostUserInfo]->[ownHostUserInfoNz] + * [ownHostUserInfo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast); + + default: + if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return first; + } +} + + + +/* + * [ownHostUserInfoNz]->[pctSubUnres][ownHostUserInfo] + * [ownHostUserInfoNz]-><:>[ownPortUserInfo] + * [ownHostUserInfoNz]-><@>[ownHost] + */ +static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnHostUserInfo)(state, afterPctSubUnres, afterLast); + } + + case _UT(':'): + state->uri->hostText.afterLast = first; /* HOST END */ + state->uri->portText.first = first + 1; /* PORT BEGIN */ + return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast); + + case _UT('@'): + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast); + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first) { + state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */ + state->uri->userInfo.first = NULL; /* Not a userInfo, reset */ + state->uri->portText.afterLast = first; /* PORT END */ + + /* Valid IPv4 or just a regname? */ + state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */ + if (state->uri->hostData.ip4 == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data, + state->uri->hostText.first, state->uri->hostText.afterLast)) { + /* Not IPv4 */ + free(state->uri->hostData.ip4); + state->uri->hostData.ip4 = NULL; + } + return URI_TRUE; /* Success */ +} + + + +/* + * [ownPortUserInfo]->[ALPHA][ownUserInfo] + * [ownPortUserInfo]->[DIGIT][ownPortUserInfo] + * [ownPortUserInfo]-><.>[ownUserInfo] + * [ownPortUserInfo]-><_>[ownUserInfo] + * [ownPortUserInfo]-><~>[ownUserInfo] + * [ownPortUserInfo]-><->[ownUserInfo] + * [ownPortUserInfo]->[subDelims][ownUserInfo] + * [ownPortUserInfo]->[pctEncoded][ownUserInfo] + * [ownPortUserInfo]-><:>[ownUserInfo] + * [ownPortUserInfo]-><@>[ownHost] + * [ownPortUserInfo]-> + */ +static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return afterLast; + } + + switch (*first) { + /* begin sub-delims */ + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('\''): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT('+'): + case _UT(','): + case _UT(';'): + case _UT('='): + /* end sub-delims */ + /* begin unreserved (except alpha and digit) */ + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + /* end unreserved (except alpha and digit) */ + case _UT(':'): + case URI_SET_ALPHA: + state->uri->hostText.afterLast = NULL; /* Not a host, reset */ + state->uri->portText.first = NULL; /* Not a port, reset */ + return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast); + + case URI_SET_DIGIT: + return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast); + + case _UT('%'): + state->uri->portText.first = NULL; /* Not a port, reset */ + { + const URI_CHAR * const afterPct + = URI_FUNC(ParsePctEncoded)(state, first, afterLast); + if (afterPct == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnUserInfo)(state, afterPct, afterLast); + } + + case _UT('@'): + state->uri->hostText.afterLast = NULL; /* Not a host, reset */ + state->uri->portText.first = NULL; /* Not a port, reset */ + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast); + + default: + if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return first; + } +} + + + +/* + * [ownUserInfo]->[pctSubUnres][ownUserInfo] + * [ownUserInfo]-><:>[ownUserInfo] + * [ownUserInfo]-><@>[ownHost] + */ +static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(';'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPctSubUnres + = URI_FUNC(ParsePctSubUnres)(state, first, afterLast); + if (afterPctSubUnres == NULL) { + return NULL; + } + return URI_FUNC(ParseOwnUserInfo)(state, afterPctSubUnres, afterLast); + } + + case _UT(':'): + return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast); + + case _UT('@'): + /* SURE */ + state->uri->userInfo.afterLast = first; /* USERINFO END */ + state->uri->hostText.first = first + 1; /* HOST BEGIN */ + return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast); + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +static URI_INLINE void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state) { + state->uri->absolutePath = URI_TRUE; +} + + + +/* + * [partHelperTwo]->[pathAbsNoLeadSlash] // can take + * [partHelperTwo]->[authority][pathAbsEmpty] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(OnExitPartHelperTwo)(state); + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterAuthority + = URI_FUNC(ParseAuthority)(state, first + 1, afterLast); + const URI_CHAR * afterPathAbsEmpty; + if (afterAuthority == NULL) { + return NULL; + } + afterPathAbsEmpty = URI_FUNC(ParsePathAbsEmpty)(state, afterAuthority, afterLast); + + URI_FUNC(FixEmptyTrailSegment)(state->uri); + + return afterPathAbsEmpty; + } + + default: + URI_FUNC(OnExitPartHelperTwo)(state); + return URI_FUNC(ParsePathAbsNoLeadSlash)(state, first, afterLast); + } +} + + + +/* + * [pathAbsEmpty]->[segment][pathAbsEmpty] + * [pathAbsEmpty]-> + */ +static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParsePathAbsEmpty)(state, afterSegment, afterLast); + } + + default: + return first; + } +} + + + +/* + * [pathAbsNoLeadSlash]->[segmentNz][zeroMoreSlashSegs] + * [pathAbsNoLeadSlash]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterSegmentNz + = URI_FUNC(ParseSegmentNz)(state, first, afterLast); + if (afterSegmentNz == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast); + } + + default: + return first; + } +} + + + +/* + * [pathRootless]->[segmentNz][zeroMoreSlashSegs] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + const URI_CHAR * const afterSegmentNz + = URI_FUNC(ParseSegmentNz)(state, first, afterLast); + if (afterSegmentNz == NULL) { + return NULL; + } else { + if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast); +} + + + +/* + * [pchar]->[pctEncoded] + * [pchar]->[subDelims] + * [pchar]->[unreserved] + * [pchar]-><:> + * [pchar]-><@> + */ +static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('%'): + return URI_FUNC(ParsePctEncoded)(state, first, afterLast); + + case _UT(':'): + case _UT('@'): + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +/* + * [pctEncoded]-><%>[HEXDIG][HEXDIG] + */ +static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + /* + First character has already been + checked before entering this rule. + + switch (*first) { + case _UT('%'): + */ + if (first + 1 >= afterLast) { + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; + } + + switch (first[1]) { + case URI_SET_HEXDIG: + if (first + 2 >= afterLast) { + URI_FUNC(StopSyntax)(state, first + 2); + return NULL; + } + + switch (first[2]) { + case URI_SET_HEXDIG: + return first + 3; + + default: + URI_FUNC(StopSyntax)(state, first + 2); + return NULL; + } + + default: + URI_FUNC(StopSyntax)(state, first + 1); + return NULL; + } + + /* + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + */ +} + + + +/* + * [pctSubUnres]->[pctEncoded] + * [pctSubUnres]->[subDelims] + * [pctSubUnres]->[unreserved] + */ +static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + URI_FUNC(StopSyntax)(state, first); + return NULL; + } + + switch (*first) { + case _UT('%'): + return URI_FUNC(ParsePctEncoded)(state, first, afterLast); + + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('-'): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + return first + 1; + + default: + URI_FUNC(StopSyntax)(state, first); + return NULL; + } +} + + + +/* + * [port]->[DIGIT][port] + * [port]-> + */ +static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_DIGIT: + return URI_FUNC(ParsePort)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [queryFrag]->[pchar][queryFrag] + * [queryFrag]->[queryFrag] + * [queryFrag]->[queryFrag] + * [queryFrag]-> + */ +static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseQueryFrag)(state, afterPchar, afterLast); + } + + case _UT('/'): + case _UT('?'): + return URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast); + + default: + return first; + } +} + + + +/* + * [segment]->[pchar][segment] + * [segment]-> + */ +static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('!'): + case _UT('$'): + case _UT('%'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('-'): + case _UT('*'): + case _UT(','): + case _UT('.'): + case _UT(':'): + case _UT(';'): + case _UT('@'): + case _UT('\''): + case _UT('_'): + case _UT('~'): + case _UT('+'): + case _UT('='): + case URI_SET_DIGIT: + case URI_SET_ALPHA: + { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseSegment)(state, afterPchar, afterLast); + } + + default: + return first; + } +} + + + +/* + * [segmentNz]->[pchar][segment] + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + const URI_CHAR * const afterPchar + = URI_FUNC(ParsePchar)(state, first, afterLast); + if (afterPchar == NULL) { + return NULL; + } + return URI_FUNC(ParseSegment)(state, afterPchar, afterLast); +} + + + +static URI_INLINE UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first) { + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */ + return URI_FALSE; /* Raises malloc error*/ + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + return URI_TRUE; /* Success */ +} + + + +/* + * [segmentNzNcOrScheme2]->[ALPHA][segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[DIGIT][segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[pctEncoded][mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]->[uriTail] // can take + * [segmentNzNcOrScheme2]->[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><$>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><&>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><(>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><)>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><*>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><,>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><.>[segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]->[segment][zeroMoreSlashSegs][uriTail] + * [segmentNzNcOrScheme2]-><:>[hierPart][uriTail] + * [segmentNzNcOrScheme2]-><;>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><@>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><_>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><~>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><+>[segmentNzNcOrScheme2] + * [segmentNzNcOrScheme2]-><=>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><'>[mustBeSegmentNzNc] + * [segmentNzNcOrScheme2]-><->[segmentNzNcOrScheme2] + */ +static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return afterLast; + } + + switch (*first) { + case _UT('.'): + case _UT('+'): + case _UT('-'): + case URI_SET_ALPHA: + case URI_SET_DIGIT: + return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast); + + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast); + if (afterPctEncoded == NULL) { + return NULL; + } + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast); + } + + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('@'): + case _UT('_'): + case _UT('~'): + case _UT('='): + case _UT('\''): + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast); + + case _UT('/'): + { + const URI_CHAR * afterZeroMoreSlashSegs; + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + state->uri->scheme.first = NULL; /* Not a scheme, reset */ + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + afterZeroMoreSlashSegs + = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast); + if (afterZeroMoreSlashSegs == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast); + } + + case _UT(':'): + { + const URI_CHAR * const afterHierPart + = URI_FUNC(ParseHierPart)(state, first + 1, afterLast); + state->uri->scheme.afterLast = first; /* SCHEME END */ + if (afterHierPart == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterHierPart, afterLast); + } + + default: + if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first)) { + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParseUriTail)(state, first, afterLast); + } +} + + + +/* + * [uriReference]->[ALPHA][segmentNzNcOrScheme2] + * [uriReference]->[DIGIT][mustBeSegmentNzNc] + * [uriReference]->[pctEncoded][mustBeSegmentNzNc] + * [uriReference]->[subDelims][mustBeSegmentNzNc] + * [uriReference]->[uriTail] // can take + * [uriReference]-><.>[mustBeSegmentNzNc] + * [uriReference]->[partHelperTwo][uriTail] + * [uriReference]-><@>[mustBeSegmentNzNc] + * [uriReference]-><_>[mustBeSegmentNzNc] + * [uriReference]-><~>[mustBeSegmentNzNc] + * [uriReference]-><->[mustBeSegmentNzNc] + */ +static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case URI_SET_ALPHA: + state->uri->scheme.first = first; /* SCHEME BEGIN */ + return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast); + + case URI_SET_DIGIT: + case _UT('!'): + case _UT('$'): + case _UT('&'): + case _UT('('): + case _UT(')'): + case _UT('*'): + case _UT(','): + case _UT(';'): + case _UT('\''): + case _UT('+'): + case _UT('='): + case _UT('.'): + case _UT('_'): + case _UT('~'): + case _UT('-'): + case _UT('@'): + state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */ + return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast); + + case _UT('%'): + { + const URI_CHAR * const afterPctEncoded + = URI_FUNC(ParsePctEncoded)(state, first, afterLast); + if (afterPctEncoded == NULL) { + return NULL; + } + state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */ + return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast); + } + + case _UT('/'): + { + const URI_CHAR * const afterPartHelperTwo + = URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast); + if (afterPartHelperTwo == NULL) { + return NULL; + } + return URI_FUNC(ParseUriTail)(state, afterPartHelperTwo, afterLast); + } + + default: + return URI_FUNC(ParseUriTail)(state, first, afterLast); + } +} + + + +/* + * [uriTail]-><#>[queryFrag] + * [uriTail]->[queryFrag][uriTailTwo] + * [uriTail]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('#'): + { + const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */ + state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */ + return afterQueryFrag; + } + + case _UT('?'): + { + const URI_CHAR * const afterQueryFrag + = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->query.first = first + 1; /* QUERY BEGIN */ + state->uri->query.afterLast = afterQueryFrag; /* QUERY END */ + return URI_FUNC(ParseUriTailTwo)(state, afterQueryFrag, afterLast); + } + + default: + return first; + } +} + + + +/* + * [uriTailTwo]-><#>[queryFrag] + * [uriTailTwo]-> + */ +static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('#'): + { + const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast); + if (afterQueryFrag == NULL) { + return NULL; + } + state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */ + state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */ + return afterQueryFrag; + } + + default: + return first; + } +} + + + +/* + * [zeroMoreSlashSegs]->[segment][zeroMoreSlashSegs] + * [zeroMoreSlashSegs]-> + */ +static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + if (first >= afterLast) { + return afterLast; + } + + switch (*first) { + case _UT('/'): + { + const URI_CHAR * const afterSegment + = URI_FUNC(ParseSegment)(state, first + 1, afterLast); + if (afterSegment == NULL) { + return NULL; + } + if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */ + URI_FUNC(StopMalloc)(state); + return NULL; + } + return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast); + } + + default: + return first; + } +} + + + +static URI_INLINE void URI_FUNC(ResetParserState)(URI_TYPE(ParserState) * state) { + URI_TYPE(Uri) * const uriBackup = state->uri; + memset(state, 0, sizeof(URI_TYPE(ParserState))); + state->uri = uriBackup; +} + + + +static URI_INLINE UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + URI_TYPE(PathSegment) * segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + memset(segment, 0, sizeof(URI_TYPE(PathSegment))); + if (first == afterLast) { + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + } else { + segment->text.first = first; + segment->text.afterLast = afterLast; + } + + /* First segment ever? */ + if (state->uri->pathHead == NULL) { + /* First segment ever, set head and tail */ + state->uri->pathHead = segment; + state->uri->pathTail = segment; + } else { + /* Append, update tail */ + state->uri->pathTail->next = segment; + state->uri->pathTail = segment; + } + + return URI_TRUE; /* Success */ +} + + + +int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) { + const URI_CHAR * afterUriReference; + URI_TYPE(Uri) * uri; + + /* Check params */ + if ((state == NULL) || (first == NULL) || (afterLast == NULL)) { + return URI_ERROR_NULL; + } + uri = state->uri; + + /* Init parser */ + URI_FUNC(ResetParserState)(state); + URI_FUNC(ResetUri)(uri); + + /* Parse */ + afterUriReference = URI_FUNC(ParseUriReference)(state, first, afterLast); + if (afterUriReference == NULL) { + return state->errorCode; + } + if (afterUriReference != afterLast) { + URI_FUNC(StopSyntax)(state, afterUriReference); + return state->errorCode; + } + return URI_SUCCESS; +} + + + +int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, const URI_CHAR * text) { + if ((state == NULL) || (text == NULL)) { + return URI_ERROR_NULL; + } + return URI_FUNC(ParseUriEx)(state, text, text + URI_STRLEN(text)); +} + + + +void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri) { + if (uri == NULL) { + return; + } + + if (uri->owner) { + /* Scheme */ + if (uri->scheme.first != NULL) { + if (uri->scheme.first != uri->scheme.afterLast) { + free((URI_CHAR *)uri->scheme.first); + } + uri->scheme.first = NULL; + uri->scheme.afterLast = NULL; + } + + /* User info */ + if (uri->userInfo.first != NULL) { + if (uri->userInfo.first != uri->userInfo.afterLast) { + free((URI_CHAR *)uri->userInfo.first); + } + uri->userInfo.first = NULL; + uri->userInfo.afterLast = NULL; + } + + /* Host data - IPvFuture */ + if (uri->hostData.ipFuture.first != NULL) { + if (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast) { + free((URI_CHAR *)uri->hostData.ipFuture.first); + } + uri->hostData.ipFuture.first = NULL; + uri->hostData.ipFuture.afterLast = NULL; + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + + /* Host text (if regname, after IPvFuture!) */ + if ((uri->hostText.first != NULL) + && (uri->hostData.ip4 == NULL) + && (uri->hostData.ip6 == NULL)) { + /* Real regname */ + if (uri->hostText.first != uri->hostText.afterLast) { + free((URI_CHAR *)uri->hostText.first); + } + uri->hostText.first = NULL; + uri->hostText.afterLast = NULL; + } + } + + /* Host data - IPv4 */ + if (uri->hostData.ip4 != NULL) { + free(uri->hostData.ip4); + uri->hostData.ip4 = NULL; + } + + /* Host data - IPv6 */ + if (uri->hostData.ip6 != NULL) { + free(uri->hostData.ip6); + uri->hostData.ip6 = NULL; + } + + /* Port text */ + if (uri->owner && (uri->portText.first != NULL)) { + if (uri->portText.first != uri->portText.afterLast) { + free((URI_CHAR *)uri->portText.first); + } + uri->portText.first = NULL; + uri->portText.afterLast = NULL; + } + + /* Path */ + if (uri->pathHead != NULL) { + URI_TYPE(PathSegment) * segWalk = uri->pathHead; + while (segWalk != NULL) { + URI_TYPE(PathSegment) * const next = segWalk->next; + if (uri->owner && (segWalk->text.first != NULL) + && (segWalk->text.first < segWalk->text.afterLast)) { + free((URI_CHAR *)segWalk->text.first); + } + free(segWalk); + segWalk = next; + } + uri->pathHead = NULL; + uri->pathTail = NULL; + } + + if (uri->owner) { + /* Query */ + if (uri->query.first != NULL) { + if (uri->query.first != uri->query.afterLast) { + free((URI_CHAR *)uri->query.first); + } + uri->query.first = NULL; + uri->query.afterLast = NULL; + } + + /* Fragment */ + if (uri->fragment.first != NULL) { + if (uri->fragment.first != uri->fragment.afterLast) { + free((URI_CHAR *)uri->fragment.first); + } + uri->fragment.first = NULL; + uri->fragment.afterLast = NULL; + } + } +} + + + +UriBool URI_FUNC(_TESTING_ONLY_ParseIpSix)(const URI_CHAR * text) { + URI_TYPE(Uri) uri; + URI_TYPE(ParserState) parser; + const URI_CHAR * const afterIpSix = text + URI_STRLEN(text); + const URI_CHAR * res; + + URI_FUNC(ResetParserState)(&parser); + URI_FUNC(ResetUri)(&uri); + parser.uri = &uri; + parser.uri->hostData.ip6 = malloc(1 * sizeof(UriIp6)); + res = URI_FUNC(ParseIPv6address2)(&parser, text, afterIpSix); + URI_FUNC(FreeUriMembers)(&uri); + return res == afterIpSix ? URI_TRUE : URI_FALSE; +} + + + +UriBool URI_FUNC(_TESTING_ONLY_ParseIpFour)(const URI_CHAR * text) { + unsigned char octets[4]; + int res = URI_FUNC(ParseIpFourAddress)(octets, text, text + URI_STRLEN(text)); + return (res == URI_SUCCESS) ? URI_TRUE : URI_FALSE; +} + + + +#undef URI_SET_DIGIT +#undef URI_SET_HEX_LETTER_UPPER +#undef URI_SET_HEX_LETTER_LOWER +#undef URI_SET_HEXDIG +#undef URI_SET_ALPHA + + + +#endif diff --git a/plugins/uriparser/UriParseBase.c b/plugins/uriparser/UriParseBase.c new file mode 100644 index 00000000..75e47613 --- /dev/null +++ b/plugins/uriparser/UriParseBase.c @@ -0,0 +1,90 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_DOXYGEN +# include "UriParseBase.h" +#endif + + + +void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount, unsigned char * output) { + switch (digitCount) { + case 1: + /* 0x___? -> \x00 \x0? */ + output[0] = 0; + output[1] = hexDigits[0]; + break; + + case 2: + /* 0x__?? -> \0xx \x?? */ + output[0] = 0; + output[1] = 16 * hexDigits[0] + hexDigits[1]; + break; + + case 3: + /* 0x_??? -> \0x? \x?? */ + output[0] = hexDigits[0]; + output[1] = 16 * hexDigits[1] + hexDigits[2]; + break; + + case 4: + /* 0x???? -> \0?? \x?? */ + output[0] = 16 * hexDigits[0] + hexDigits[1]; + output[1] = 16 * hexDigits[2] + hexDigits[3]; + break; + + } +} + + + +unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount) { + switch (digitCount) { + case 1: + return digits[0]; + + case 2: + return 10 * digits[0] + digits[1]; + + case 3: + default: + return 100 * digits[0] + 10 * digits[1] + digits[2]; + + } +} diff --git a/plugins/uriparser/UriParseBase.h b/plugins/uriparser/UriParseBase.h new file mode 100644 index 00000000..dea5c81c --- /dev/null +++ b/plugins/uriparser/UriParseBase.h @@ -0,0 +1,55 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URI_PARSE_BASE_H +#define URI_PARSE_BASE_H 1 + + + +#include + + + +void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount, + unsigned char * output); +unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount); + + + +#endif /* URI_PARSE_BASE_H */ diff --git a/plugins/uriparser/UriQuery.c b/plugins/uriparser/UriQuery.c new file mode 100644 index 00000000..7adb0736 --- /dev/null +++ b/plugins/uriparser/UriQuery.c @@ -0,0 +1,460 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriQuery.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriQuery.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +static int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, + int maxChars, int * charsWritten, int * charsRequired, + UriBool spaceToPlus, UriBool normalizeBreaks); + +static UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext, + int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter, + const URI_CHAR * valueFirst, const URI_CHAR * valueAfter, + UriBool plusToSpace, UriBreakConversion breakConversion); + + + +int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList, + int * charsRequired) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, charsRequired, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList, + int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks) { + if ((queryList == NULL) || (charsRequired == NULL)) { + return URI_ERROR_NULL; + } + + return URI_FUNC(ComposeQueryEngine)(NULL, queryList, 0, NULL, + charsRequired, spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQuery)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryEx)(dest, queryList, maxChars, charsWritten, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten, + UriBool spaceToPlus, UriBool normalizeBreaks) { + if ((dest == NULL) || (queryList == NULL)) { + return URI_ERROR_NULL; + } + + if (maxChars < 1) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + return URI_FUNC(ComposeQueryEngine)(dest, queryList, maxChars, + charsWritten, NULL, spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList) { + const UriBool spaceToPlus = URI_TRUE; + const UriBool normalizeBreaks = URI_TRUE; + + return URI_FUNC(ComposeQueryMallocEx)(dest, queryList, + spaceToPlus, normalizeBreaks); +} + + + +int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest, + const URI_TYPE(QueryList) * queryList, + UriBool spaceToPlus, UriBool normalizeBreaks) { + int charsRequired; + int res; + URI_CHAR * queryString; + + if (dest == NULL) { + return URI_ERROR_NULL; + } + + /* Calculate space */ + res = URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, &charsRequired, + spaceToPlus, normalizeBreaks); + if (res != URI_SUCCESS) { + return res; + } + charsRequired++; + + /* Allocate space */ + queryString = malloc(charsRequired * sizeof(URI_CHAR)); + if (queryString == NULL) { + return URI_ERROR_MALLOC; + } + + /* Put query in */ + res = URI_FUNC(ComposeQueryEx)(queryString, queryList, charsRequired, + NULL, spaceToPlus, normalizeBreaks); + if (res != URI_SUCCESS) { + free(queryString); + return res; + } + + *dest = queryString; + return URI_SUCCESS; +} + + + +int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest, + const URI_TYPE(QueryList) * queryList, + int maxChars, int * charsWritten, int * charsRequired, + UriBool spaceToPlus, UriBool normalizeBreaks) { + UriBool firstItem = URI_TRUE; + int ampersandLen = 0; + URI_CHAR * write = dest; + + /* Subtract terminator */ + if (dest == NULL) { + *charsRequired = 0; + } else { + maxChars--; + } + + while (queryList != NULL) { + const URI_CHAR * const key = queryList->key; + const URI_CHAR * const value = queryList->value; + const int worstCase = (normalizeBreaks == URI_TRUE ? 6 : 3); + const int keyLen = (key == NULL) ? 0 : (int)URI_STRLEN(key); + const int keyRequiredChars = worstCase * keyLen; + const int valueLen = (value == NULL) ? 0 : (int)URI_STRLEN(value); + const int valueRequiredChars = worstCase * valueLen; + + if (dest == NULL) { + if (firstItem == URI_TRUE) { + ampersandLen = 1; + firstItem = URI_FALSE; + } + + (*charsRequired) += ampersandLen + keyRequiredChars + ((value == NULL) + ? 0 + : 1 + valueRequiredChars); + } else { + URI_CHAR * afterKey; + + if ((write - dest) + ampersandLen + keyRequiredChars > maxChars) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + /* Copy key */ + if (firstItem == URI_TRUE) { + firstItem = URI_FALSE; + } else { + write[0] = _UT('&'); + write++; + } + afterKey = URI_FUNC(EscapeEx)(key, key + keyLen, + write, spaceToPlus, normalizeBreaks); + write += (afterKey - write); + + if (value != NULL) { + URI_CHAR * afterValue; + + if ((write - dest) + 1 + valueRequiredChars > maxChars) { + return URI_ERROR_OUTPUT_TOO_LARGE; + } + + /* Copy value */ + write[0] = _UT('='); + write++; + afterValue = URI_FUNC(EscapeEx)(value, value + valueLen, + write, spaceToPlus, normalizeBreaks); + write += (afterValue - write); + } + } + + queryList = queryList->next; + } + + if (dest != NULL) { + write[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = (int)(write - dest) + 1; /* .. for terminator */ + } + } + + return URI_SUCCESS; +} + + + +UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext, + int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter, + const URI_CHAR * valueFirst, const URI_CHAR * valueAfter, + UriBool plusToSpace, UriBreakConversion breakConversion) { + const int keyLen = (int)(keyAfter - keyFirst); + const int valueLen = (int)(valueAfter - valueFirst); + URI_CHAR * key; + URI_CHAR * value; + + if ((prevNext == NULL) || (itemCount == NULL) + || (keyFirst == NULL) || (keyAfter == NULL) + || (keyFirst > keyAfter) || (valueFirst > valueAfter) + || ((keyFirst == keyAfter) + && (valueFirst == NULL) && (valueAfter == NULL))) { + return URI_TRUE; + } + + /* Append new empty item */ + *prevNext = malloc(1 * sizeof(URI_TYPE(QueryList))); + if (*prevNext == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + (*prevNext)->next = NULL; + + + /* Fill key */ + key = malloc((keyLen + 1) * sizeof(URI_CHAR)); + if (key == NULL) { + free(*prevNext); + *prevNext = NULL; + return URI_FALSE; /* Raises malloc error */ + } + + key[keyLen] = _UT('\0'); + if (keyLen > 0) { + /* Copy 1:1 */ + memcpy(key, keyFirst, keyLen * sizeof(URI_CHAR)); + + /* Unescape */ + URI_FUNC(UnescapeInPlaceEx)(key, plusToSpace, breakConversion); + } + (*prevNext)->key = key; + + + /* Fill value */ + if (valueFirst != NULL) { + value = malloc((valueLen + 1) * sizeof(URI_CHAR)); + if (value == NULL) { + free(key); + free(*prevNext); + *prevNext = NULL; + return URI_FALSE; /* Raises malloc error */ + } + + value[valueLen] = _UT('\0'); + if (valueLen > 0) { + /* Copy 1:1 */ + memcpy(value, valueFirst, valueLen * sizeof(URI_CHAR)); + + /* Unescape */ + URI_FUNC(UnescapeInPlaceEx)(value, plusToSpace, breakConversion); + } + (*prevNext)->value = value; + } else { + value = NULL; + } + (*prevNext)->value = value; + + (*itemCount)++; + return URI_TRUE; +} + + + +void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList) { + while (queryList != NULL) { + URI_TYPE(QueryList) * nextBackup = queryList->next; + free((URI_CHAR *)queryList->key); /* const cast */ + free((URI_CHAR *)queryList->value); /* const cast */ + free(queryList); + queryList = nextBackup; + } +} + + + +int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast) { + const UriBool plusToSpace = URI_TRUE; + const UriBreakConversion breakConversion = URI_BR_DONT_TOUCH; + + return URI_FUNC(DissectQueryMallocEx)(dest, itemCount, first, afterLast, + plusToSpace, breakConversion); +} + + + +int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount, + const URI_CHAR * first, const URI_CHAR * afterLast, + UriBool plusToSpace, UriBreakConversion breakConversion) { + const URI_CHAR * walk = first; + const URI_CHAR * keyFirst = first; + const URI_CHAR * keyAfter = NULL; + const URI_CHAR * valueFirst = NULL; + const URI_CHAR * valueAfter = NULL; + URI_TYPE(QueryList) ** prevNext = dest; + int nullCounter; + int * itemsAppended = (itemCount == NULL) ? &nullCounter : itemCount; + + if ((dest == NULL) || (first == NULL) || (afterLast == NULL)) { + return URI_ERROR_NULL; + } + + if (first > afterLast) { + return URI_ERROR_RANGE_INVALID; + } + + *dest = NULL; + *itemsAppended = 0; + + /* Parse query string */ + for (; walk < afterLast; walk++) { + switch (*walk) { + case _UT('&'): + if (valueFirst != NULL) { + valueAfter = walk; + } else { + keyAfter = walk; + } + + if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended, + keyFirst, keyAfter, valueFirst, valueAfter, + plusToSpace, breakConversion) + == URI_FALSE) { + /* Free list we built */ + *itemsAppended = 0; + URI_FUNC(FreeQueryList)(*dest); + return URI_ERROR_MALLOC; + } + + /* Make future items children of the current */ + if ((prevNext != NULL) && (*prevNext != NULL)) { + prevNext = &((*prevNext)->next); + } + + if (walk + 1 < afterLast) { + keyFirst = walk + 1; + } else { + keyFirst = NULL; + } + keyAfter = NULL; + valueFirst = NULL; + valueAfter = NULL; + break; + + case _UT('='): + /* NOTE: WE treat the first '=' as a separator, */ + /* all following go into the value part */ + if (keyAfter == NULL) { + keyAfter = walk; + if (walk + 1 <= afterLast) { + valueFirst = walk + 1; + valueAfter = walk + 1; + } + } + break; + + default: + break; + } + } + + if (valueFirst != NULL) { + /* Must be key/value pair */ + valueAfter = walk; + } else { + /* Must be key only */ + keyAfter = walk; + } + + if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended, keyFirst, keyAfter, + valueFirst, valueAfter, plusToSpace, breakConversion) + == URI_FALSE) { + /* Free list we built */ + *itemsAppended = 0; + URI_FUNC(FreeQueryList)(*dest); + return URI_ERROR_MALLOC; + } + + return URI_SUCCESS; +} + + + +#endif diff --git a/plugins/uriparser/UriRecompose.c b/plugins/uriparser/UriRecompose.c new file mode 100644 index 00000000..09daee07 --- /dev/null +++ b/plugins/uriparser/UriRecompose.c @@ -0,0 +1,577 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriRecompose.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriRecompose.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +static int URI_FUNC(ToStringEngine)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, + int maxChars, int * charsWritten, int * charsRequired); + + + +int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, + int * charsRequired) { + const int MAX_CHARS = ((unsigned int)-1) >> 1; + return URI_FUNC(ToStringEngine)(NULL, uri, MAX_CHARS, NULL, charsRequired); +} + + + +int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, + int maxChars, int * charsWritten) { + return URI_FUNC(ToStringEngine)(dest, uri, maxChars, charsWritten, NULL); +} + + + +static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest, + const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten, + int * charsRequired) { + int written = 0; + if ((uri == NULL) || ((dest == NULL) && (charsRequired == NULL))) { + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_NULL; + } + + if (maxChars < 1) { + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + maxChars--; /* So we don't have to substract 1 for '\0' all the time */ + + /* [01/19] result = "" */ + if (dest != NULL) { + dest[0] = _UT('\0'); + } else { + (*charsRequired) = 0; + } + /* [02/19] if defined(scheme) then */ + if (uri->scheme.first != NULL) { + /* [03/19] append scheme to result; */ + const int charsToWrite + = (int)(uri->scheme.afterLast - uri->scheme.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->scheme.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + /* [04/19] append ":" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [05/19] endif; */ + } + /* [06/19] if defined(authority) then */ + if (URI_FUNC(IsHostSet)(uri)) { + /* [07/19] append "//" to result; */ + if (dest != NULL) { + if (written + 2 <= maxChars) { + memcpy(dest + written, _UT("//"), + 2 * sizeof(URI_CHAR)); + written += 2; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 2; + } + /* [08/19] append authority to result; */ + /* UserInfo */ + if (uri->userInfo.first != NULL) { + const int charsToWrite = (int)(uri->userInfo.afterLast - uri->userInfo.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->userInfo.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("@"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite + 1; + } + } + + /* Host */ + if (uri->hostData.ip4 != NULL) { + /* IPv4 */ + int i = 0; + for (; i < 4; i++) { + const unsigned char value = uri->hostData.ip4->data[i]; + const int charsToWrite = (value > 99) ? 3 : ((value > 9) ? 2 : 1); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + URI_CHAR text[4]; + if (value > 99) { + text[0] = _UT('0') + (value / 100); + text[1] = _UT('0') + ((value % 100) / 10); + text[2] = _UT('0') + (value % 10); + } else if (value > 9) { + text[0] = _UT('0') + (value / 10); + text[1] = _UT('0') + (value % 10); + } else { + text[0] = _UT('0') + value; + } + text[charsToWrite] = _UT('\0'); + memcpy(dest + written, text, charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + if (i < 3) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("."), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } + } else { + (*charsRequired) += charsToWrite + 1; + } + } + } else if (uri->hostData.ip6 != NULL) { + /* IPv6 */ + int i = 0; + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("["), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + + for (; i < 16; i++) { + const unsigned char value = uri->hostData.ip6->data[i]; + if (dest != NULL) { + if (written + 2 <= maxChars) { + URI_CHAR text[3]; + text[0] = URI_FUNC(HexToLetterEx)(value / 16, URI_FALSE); + text[1] = URI_FUNC(HexToLetterEx)(value % 16, URI_FALSE); + text[2] = _UT('\0'); + memcpy(dest + written, text, 2 * sizeof(URI_CHAR)); + written += 2; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 2; + } + if (((i & 1) == 1) && (i < 15)) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + } + + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("]"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } else if (uri->hostData.ipFuture.first != NULL) { + /* IPvFuture */ + const int charsToWrite = (int)(uri->hostData.ipFuture.afterLast + - uri->hostData.ipFuture.first); + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("["), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->hostData.ipFuture.first, charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("]"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1 + charsToWrite + 1; + } + } else if (uri->hostText.first != NULL) { + /* Regname */ + const int charsToWrite = (int)(uri->hostText.afterLast - uri->hostText.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->hostText.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + + /* Port */ + if (uri->portText.first != NULL) { + const int charsToWrite = (int)(uri->portText.afterLast - uri->portText.first); + if (dest != NULL) { + /* Leading ':' */ + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT(":"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + + /* Port number */ + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->portText.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1 + charsToWrite; + } + } + /* [09/19] endif; */ + } + /* [10/19] append path to result; */ + /* Slash needed here? */ + if (uri->absolutePath || ((uri->pathHead != NULL) + && URI_FUNC(IsHostSet)(uri))) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("/"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + + if (uri->pathHead != NULL) { + URI_TYPE(PathSegment) * walker = uri->pathHead; + do { + const int charsToWrite = (int)(walker->text.afterLast - walker->text.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, walker->text.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + + /* Not last segment -> append slash */ + if (walker->next != NULL) { + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("/"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + } + + walker = walker->next; + } while (walker != NULL); + } + /* [11/19] if defined(query) then */ + if (uri->query.first != NULL) { + /* [12/19] append "?" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("?"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [13/19] append query to result; */ + { + const int charsToWrite + = (int)(uri->query.afterLast - uri->query.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->query.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + /* [14/19] endif; */ + } + /* [15/19] if defined(fragment) then */ + if (uri->fragment.first != NULL) { + /* [16/19] append "#" to result; */ + if (dest != NULL) { + if (written + 1 <= maxChars) { + memcpy(dest + written, _UT("#"), + 1 * sizeof(URI_CHAR)); + written += 1; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += 1; + } + /* [17/19] append fragment to result; */ + { + const int charsToWrite + = (int)(uri->fragment.afterLast - uri->fragment.first); + if (dest != NULL) { + if (written + charsToWrite <= maxChars) { + memcpy(dest + written, uri->fragment.first, + charsToWrite * sizeof(URI_CHAR)); + written += charsToWrite; + } else { + dest[0] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = 0; + } + return URI_ERROR_TOSTRING_TOO_LONG; + } + } else { + (*charsRequired) += charsToWrite; + } + } + /* [18/19] endif; */ + } + /* [19/19] return result; */ + if (dest != NULL) { + dest[written++] = _UT('\0'); + if (charsWritten != NULL) { + *charsWritten = written; + } + } + return URI_SUCCESS; +} + + + +#endif diff --git a/plugins/uriparser/UriResolve.c b/plugins/uriparser/UriResolve.c new file mode 100644 index 00000000..3660b6b2 --- /dev/null +++ b/plugins/uriparser/UriResolve.c @@ -0,0 +1,316 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriResolve.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriResolve.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +/* Appends a relative URI to an absolute. The last path segment of + * the absolute URI is replaced. */ +static URI_INLINE UriBool URI_FUNC(MergePath)(URI_TYPE(Uri) * absWork, + const URI_TYPE(Uri) * relAppend) { + URI_TYPE(PathSegment) * sourceWalker; + URI_TYPE(PathSegment) * destPrev; + if (relAppend->pathHead == NULL) { + return URI_TRUE; + } + + /* Replace last segment ("" if trailing slash) with first of append chain */ + if (absWork->pathHead == NULL) { + URI_TYPE(PathSegment) * const dup = malloc(sizeof(URI_TYPE(PathSegment))); + if (dup == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + dup->next = NULL; + absWork->pathHead = dup; + absWork->pathTail = dup; + } + absWork->pathTail->text.first = relAppend->pathHead->text.first; + absWork->pathTail->text.afterLast = relAppend->pathHead->text.afterLast; + + /* Append all the others */ + sourceWalker = relAppend->pathHead->next; + if (sourceWalker == NULL) { + return URI_TRUE; + } + destPrev = absWork->pathTail; + + for (;;) { + URI_TYPE(PathSegment) * const dup = malloc(sizeof(URI_TYPE(PathSegment))); + if (dup == NULL) { + destPrev->next = NULL; + absWork->pathTail = destPrev; + return URI_FALSE; /* Raises malloc error */ + } + dup->text = sourceWalker->text; + destPrev->next = dup; + + if (sourceWalker->next == NULL) { + absWork->pathTail = dup; + absWork->pathTail->next = NULL; + break; + } + destPrev = dup; + sourceWalker = sourceWalker->next; + } + + return URI_TRUE; +} + + +static int URI_FUNC(ResolveAbsolutePathFlag)(URI_TYPE(Uri) * absWork) { + if (absWork == NULL) { + return URI_ERROR_NULL; + } + + if (URI_FUNC(IsHostSet)(absWork) && absWork->absolutePath) { + /* Empty segment needed, instead? */ + if (absWork->pathHead == NULL) { + URI_TYPE(PathSegment) * const segment = malloc(sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_ERROR_MALLOC; + } + segment->text.first = URI_FUNC(SafeToPointTo); + segment->text.afterLast = URI_FUNC(SafeToPointTo); + segment->next = NULL; + + absWork->pathHead = segment; + absWork->pathTail = segment; + } + + absWork->absolutePath = URI_FALSE; + } + + return URI_SUCCESS; +} + + +static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, + const URI_TYPE(Uri) * absBase, + UriResolutionOptions options) { + if (absDest == NULL) { + return URI_ERROR_NULL; + } + URI_FUNC(ResetUri)(absDest); + + if ((relSource == NULL) || (absBase == NULL)) { + return URI_ERROR_NULL; + } + + /* absBase absolute? */ + if (absBase->scheme.first == NULL) { + return URI_ERROR_ADDBASE_REL_BASE; + } + + /* [00/32] -- A non-strict parser may ignore a scheme in the reference */ + /* [00/32] -- if it is identical to the base URI's scheme. */ + /* [00/32] if ((not strict) and (R.scheme == Base.scheme)) then */ + UriBool relSourceHasScheme = (relSource->scheme.first != NULL) ? URI_TRUE : URI_FALSE; + if ((options & URI_RESOLVE_IDENTICAL_SCHEME_COMPAT) + && (absBase->scheme.first != NULL) + && (relSource->scheme.first != NULL) + && (0 == URI_FUNC(CompareRange)(&(absBase->scheme), &(relSource->scheme)))) { + /* [00/32] undefine(R.scheme); */ + relSourceHasScheme = URI_FALSE; + /* [00/32] endif; */ + } + + /* [01/32] if defined(R.scheme) then */ + if (relSourceHasScheme) { + /* [02/32] T.scheme = R.scheme; */ + absDest->scheme = relSource->scheme; + /* [03/32] T.authority = R.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + /* [04/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) { + return URI_ERROR_MALLOC; + } + /* [05/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [06/32] else */ + } else { + /* [07/32] if defined(R.authority) then */ + if (URI_FUNC(IsHostSet)(relSource)) { + /* [08/32] T.authority = R.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + /* [09/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) { + return URI_ERROR_MALLOC; + } + /* [10/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [11/32] else */ + } else { + /* [28/32] T.authority = Base.authority; */ + if (!URI_FUNC(CopyAuthority)(absDest, absBase)) { + return URI_ERROR_MALLOC; + } + /* [12/32] if (R.path == "") then */ + if (relSource->pathHead == NULL && !relSource->absolutePath) { + /* [13/32] T.path = Base.path; */ + if (!URI_FUNC(CopyPath)(absDest, absBase)) { + return URI_ERROR_MALLOC; + } + /* [14/32] if defined(R.query) then */ + if (relSource->query.first != NULL) { + /* [15/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [16/32] else */ + } else { + /* [17/32] T.query = Base.query; */ + absDest->query = absBase->query; + /* [18/32] endif; */ + } + /* [19/32] else */ + } else { + /* [20/32] if (R.path starts-with "/") then */ + if (relSource->absolutePath) { + int res; + /* [21/32] T.path = remove_dot_segments(R.path); */ + if (!URI_FUNC(CopyPath)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + res = URI_FUNC(ResolveAbsolutePathFlag)(absDest); + if (res != URI_SUCCESS) { + return res; + } + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) { + return URI_ERROR_MALLOC; + } + /* [22/32] else */ + } else { + /* [23/32] T.path = merge(Base.path, R.path); */ + if (!URI_FUNC(CopyPath)(absDest, absBase)) { + return URI_ERROR_MALLOC; + } + if (!URI_FUNC(MergePath)(absDest, relSource)) { + return URI_ERROR_MALLOC; + } + /* [24/32] T.path = remove_dot_segments(T.path); */ + if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) { + return URI_ERROR_MALLOC; + } + + if (!URI_FUNC(FixAmbiguity)(absDest)) { + return URI_ERROR_MALLOC; + } + /* [25/32] endif; */ + } + /* [26/32] T.query = R.query; */ + absDest->query = relSource->query; + /* [27/32] endif; */ + } + URI_FUNC(FixEmptyTrailSegment)(absDest); + /* [29/32] endif; */ + } + /* [30/32] T.scheme = Base.scheme; */ + absDest->scheme = absBase->scheme; + /* [31/32] endif; */ + } + /* [32/32] T.fragment = R.fragment; */ + absDest->fragment = relSource->fragment; + + return URI_SUCCESS; + +} + + + +int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase) { + const int res = URI_FUNC(AddBaseUriImpl)(absDest, relSource, absBase, URI_RESOLVE_STRICTLY); + if ((res != URI_SUCCESS) && (absDest != NULL)) { + URI_FUNC(FreeUriMembers)(absDest); + } + return res; +} + + + +int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absDest, + const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase, + UriResolutionOptions options) { + const int res = URI_FUNC(AddBaseUriImpl)(absDest, relSource, absBase, options); + if ((res != URI_SUCCESS) && (absDest != NULL)) { + URI_FUNC(FreeUriMembers)(absDest); + } + return res; +} + + + +#endif diff --git a/plugins/uriparser/UriShorten.c b/plugins/uriparser/UriShorten.c new file mode 100644 index 00000000..c839ae53 --- /dev/null +++ b/plugins/uriparser/UriShorten.c @@ -0,0 +1,320 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the nor the names of its + * contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriShorten.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriShorten.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +#endif + + + +static URI_INLINE UriBool URI_FUNC(AppendSegment)(URI_TYPE(Uri) * uri, + const URI_CHAR * first, const URI_CHAR * afterLast) { + /* Create segment */ + URI_TYPE(PathSegment) * segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); + if (segment == NULL) { + return URI_FALSE; /* Raises malloc error */ + } + segment->next = NULL; + segment->text.first = first; + segment->text.afterLast = afterLast; + + /* Put into chain */ + if (uri->pathTail == NULL) { + uri->pathHead = segment; + } else { + uri->pathTail->next = segment; + } + uri->pathTail = segment; + + return URI_TRUE; +} + + + +static URI_INLINE UriBool URI_FUNC(EqualsAuthority)(const URI_TYPE(Uri) * first, + const URI_TYPE(Uri) * second) { + /* IPv4 */ + if (first->hostData.ip4 != NULL) { + return ((second->hostData.ip4 != NULL) + && !memcmp(first->hostData.ip4->data, + second->hostData.ip4->data, 4)) ? URI_TRUE : URI_FALSE; + } + + /* IPv6 */ + if (first->hostData.ip6 != NULL) { + return ((second->hostData.ip6 != NULL) + && !memcmp(first->hostData.ip6->data, + second->hostData.ip6->data, 16)) ? URI_TRUE : URI_FALSE; + } + + /* IPvFuture */ + if (first->hostData.ipFuture.first != NULL) { + return ((second->hostData.ipFuture.first != NULL) + && !URI_STRNCMP(first->hostData.ipFuture.first, + second->hostData.ipFuture.first, + first->hostData.ipFuture.afterLast + - first->hostData.ipFuture.first)) + ? URI_TRUE : URI_FALSE; + } + + if (first->hostText.first != NULL) { + return ((second->hostText.first != NULL) + && !URI_STRNCMP(first->hostText.first, + second->hostText.first, + first->hostText.afterLast + - first->hostText.first)) ? URI_TRUE : URI_FALSE; + } + + return (second->hostText.first == NULL); +} + + + +int URI_FUNC(RemoveBaseUriImpl)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absSource, + const URI_TYPE(Uri) * absBase, + UriBool domainRootMode) { + if (dest == NULL) { + return URI_ERROR_NULL; + } + URI_FUNC(ResetUri)(dest); + + if ((absSource == NULL) || (absBase == NULL)) { + return URI_ERROR_NULL; + } + + /* absBase absolute? */ + if (absBase->scheme.first == NULL) { + return URI_ERROR_REMOVEBASE_REL_BASE; + } + + /* absSource absolute? */ + if (absSource->scheme.first == NULL) { + return URI_ERROR_REMOVEBASE_REL_SOURCE; + } + + /* [01/50] if (A.scheme != Base.scheme) then */ + if (URI_STRNCMP(absSource->scheme.first, absBase->scheme.first, + absSource->scheme.afterLast - absSource->scheme.first)) { + /* [02/50] T.scheme = A.scheme; */ + dest->scheme = absSource->scheme; + /* [03/50] T.authority = A.authority; */ + if (!URI_FUNC(CopyAuthority)(dest, absSource)) { + return URI_ERROR_MALLOC; + } + /* [04/50] T.path = A.path; */ + if (!URI_FUNC(CopyPath)(dest, absSource)) { + return URI_ERROR_MALLOC; + } + /* [05/50] else */ + } else { + /* [06/50] undef(T.scheme); */ + /* NOOP */ + /* [07/50] if (A.authority != Base.authority) then */ + if (!URI_FUNC(EqualsAuthority)(absSource, absBase)) { + /* [08/50] T.authority = A.authority; */ + if (!URI_FUNC(CopyAuthority)(dest, absSource)) { + return URI_ERROR_MALLOC; + } + /* [09/50] T.path = A.path; */ + if (!URI_FUNC(CopyPath)(dest, absSource)) { + return URI_ERROR_MALLOC; + } + /* [10/50] else */ + } else { + /* [11/50] if domainRootMode then */ + if (domainRootMode == URI_TRUE) { + /* [12/50] undef(T.authority); */ + /* NOOP */ + /* [13/50] if (first(A.path) == "") then */ + /* GROUPED */ + /* [14/50] T.path = "/." + A.path; */ + /* GROUPED */ + /* [15/50] else */ + /* GROUPED */ + /* [16/50] T.path = A.path; */ + /* GROUPED */ + /* [17/50] endif; */ + if (!URI_FUNC(CopyPath)(dest, absSource)) { + return URI_ERROR_MALLOC; + } + dest->absolutePath = URI_TRUE; + + if (!URI_FUNC(FixAmbiguity)(dest)) { + return URI_ERROR_MALLOC; + } + /* [18/50] else */ + } else { + const URI_TYPE(PathSegment) * sourceSeg = absSource->pathHead; + const URI_TYPE(PathSegment) * baseSeg = absBase->pathHead; + /* [19/50] bool pathNaked = true; */ + UriBool pathNaked = URI_TRUE; + /* [20/50] undef(last(Base.path)); */ + /* NOOP */ + /* [21/50] T.path = ""; */ + dest->absolutePath = URI_FALSE; + /* [22/50] while (first(A.path) == first(Base.path)) do */ + while ((sourceSeg != NULL) && (baseSeg != NULL) + && !URI_STRNCMP(sourceSeg->text.first, baseSeg->text.first, + sourceSeg->text.afterLast - sourceSeg->text.first) + && !((sourceSeg->text.first == sourceSeg->text.afterLast) + && ((sourceSeg->next == NULL) != (baseSeg->next == NULL)))) { + /* [23/50] A.path++; */ + sourceSeg = sourceSeg->next; + /* [24/50] Base.path++; */ + baseSeg = baseSeg->next; + /* [25/50] endwhile; */ + } + /* [26/50] while defined(first(Base.path)) do */ + while ((baseSeg != NULL) && (baseSeg->next != NULL)) { + /* [27/50] Base.path++; */ + baseSeg = baseSeg->next; + /* [28/50] T.path += "../"; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstParent), + URI_FUNC(ConstParent) + 2)) { + return URI_ERROR_MALLOC; + } + /* [29/50] pathNaked = false; */ + pathNaked = URI_FALSE; + /* [30/50] endwhile; */ + } + /* [31/50] while defined(first(A.path)) do */ + while (sourceSeg != NULL) { + /* [32/50] if pathNaked then */ + if (pathNaked == URI_TRUE) { + /* [33/50] if (first(A.path) contains ":") then */ + UriBool containsColon = URI_FALSE; + const URI_CHAR * ch = sourceSeg->text.first; + for (; ch < sourceSeg->text.afterLast; ch++) { + if (*ch == _UT(':')) { + containsColon = URI_TRUE; + break; + } + } + + if (containsColon) { + /* [34/50] T.path += "./"; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd), + URI_FUNC(ConstPwd) + 1)) { + return URI_ERROR_MALLOC; + } + /* [35/50] elseif (first(A.path) == "") then */ + } else if (sourceSeg->text.first == sourceSeg->text.afterLast) { + /* [36/50] T.path += "/."; */ + if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd), + URI_FUNC(ConstPwd) + 1)) { + return URI_ERROR_MALLOC; + } + /* [37/50] endif; */ + } + /* [38/50] endif; */ + } + /* [39/50] T.path += first(A.path); */ + if (!URI_FUNC(AppendSegment)(dest, sourceSeg->text.first, + sourceSeg->text.afterLast)) { + return URI_ERROR_MALLOC; + } + /* [40/50] pathNaked = false; */ + pathNaked = URI_FALSE; + /* [41/50] A.path++; */ + sourceSeg = sourceSeg->next; + /* [42/50] if defined(first(A.path)) then */ + /* NOOP */ + /* [43/50] T.path += + "/"; */ + /* NOOP */ + /* [44/50] endif; */ + /* NOOP */ + /* [45/50] endwhile; */ + } + /* [46/50] endif; */ + } + /* [47/50] endif; */ + } + /* [48/50] endif; */ + } + /* [49/50] T.query = A.query; */ + dest->query = absSource->query; + /* [50/50] T.fragment = A.fragment; */ + dest->fragment = absSource->fragment; + + return URI_SUCCESS; +} + + + +int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest, + const URI_TYPE(Uri) * absSource, + const URI_TYPE(Uri) * absBase, + UriBool domainRootMode) { + const int res = URI_FUNC(RemoveBaseUriImpl)(dest, absSource, + absBase, domainRootMode); + if ((res != URI_SUCCESS) && (dest != NULL)) { + URI_FUNC(FreeUriMembers)(dest); + } + return res; +} + + + +#endif -- cgit v1.2.3-74-g34f1