1 files changed, 620 insertions, 0 deletions
diff --git a/plugins/picohttpparser.c b/plugins/picohttpparser.c
new file mode 100644
index 00000000..6a2d872d
--- /dev/null
+++ b/plugins/picohttpparser.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ *                         Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef __SSE4_2__
+#ifdef _MSC_VER
+#include <nmmintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+#include "picohttpparser.h"
+/* $Id: a707070d11d499609f99d09f97535642cec910a8 $ */
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+#define CHECK_EOF()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
+    if (*buf++ != ch) {                                                                                                            \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR(ch)                                                                                                            \
+    CHECK_EOF();                                                                                                                   \
+    EXPECT_CHAR_NO_CHECK(ch);
+#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
+    do {                                                                                                                           \
+        const char *tok_start = buf;                                                                                               \
+        static const char ALIGNED(16) ranges2[] = "\000\040\177\177";                                                              \
+        int found2;                                                                                                                \
+        buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2);                                                  \
+        if (!found2) {                                                                                                             \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        while (1) {                                                                                                                \
+            if (*buf == ' ') {                                                                                                     \
+                break;                                                                                                             \
+            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
+                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
+                    *ret = -1;                                                                                                     \
+                    return NULL;                                                                                                   \
+                }                                                                                                                  \
+            }                                                                                                                      \
+            ++buf;                                                                                                                 \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        tok = tok_start;                                                                                                           \
+        toklen = buf - tok_start;                                                                                                  \
+    } while (0)
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
+{
+    *found = 0;
+#if __SSE4_2__
+    if (likely(buf_end - buf >= 16)) {
+        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+        size_t left = (buf_end - buf) & ~15;
+        do {
+            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+            if (unlikely(r != 16)) {
+                buf += r;
+                *found = 1;
+                break;
+            }
+            buf += 16;
+            left -= 16;
+        } while (likely(left != 0));
+    }
+#else
+    /* suppress unused parameter warning */
+    (void)buf_end;
+    (void)ranges;
+    (void)ranges_size;
+#endif
+    return buf;
+}
+static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
+{
+    const char *token_start = buf;
+#ifdef __SSE4_2__
+    static const char ranges1[] = "\0\010"
+                                  /* allow HT */
+                                  "\012\037"
+                                  /* allow SP and up to but not including DEL */
+                                  "\177\177"
+        /* allow chars w. MSB set */
+        ;
+    int found;
+    buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
+    if (found)
+        goto FOUND_CTL;
+#else
+    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
+    while (likely(buf_end - buf >= 8)) {
+#define DOIT()                                                                                                                     \
+    do {                                                                                                                           \
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
+            goto NonPrintable;                                                                                                     \
+        ++buf;                                                                                                                     \
+    } while (0)
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+#undef DOIT
+        continue;
+    NonPrintable:
+        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+            goto FOUND_CTL;
+        }
+        ++buf;
+    }
+#endif
+    for (;; ++buf) {
+        CHECK_EOF();
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+                goto FOUND_CTL;
+            }
+        }
+    }
+FOUND_CTL:
+    if (likely(*buf == '\015')) {
+        ++buf;
+        EXPECT_CHAR('\012');
+        *token_len = buf - 2 - token_start;
+    } else if (*buf == '\012') {
+        *token_len = buf - token_start;
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    *token = token_start;
+    return buf;
+}
+static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
+{
+    int ret_cnt = 0;
+    buf = last_len < 3 ? buf : buf + last_len - 3;
+    while (1) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            CHECK_EOF();
+            EXPECT_CHAR('\012');
+            ++ret_cnt;
+        } else if (*buf == '\012') {
+            ++buf;
+            ++ret_cnt;
+        } else {
+            ++buf;
+            ret_cnt = 0;
+        }
+        if (ret_cnt == 2) {
+            return buf;
+        }
+    }
+    *ret = -2;
+    return NULL;
+}
+#define PARSE_INT(valp_, mul_)                                                                                                     \
+    if (*buf < '0' || '9' < *buf) {                                                                                                \
+        buf++;                                                                                                                     \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }                                                                                                                              \
+    *(valp_) = (mul_) * (*buf++ - '0');
+#define PARSE_INT_3(valp_)                                                                                                         \
+    do {                                                                                                                           \
+        int res_ = 0;                                                                                                              \
+        PARSE_INT(&res_, 100)                                                                                                      \
+        *valp_ = res_;                                                                                                             \
+        PARSE_INT(&res_, 10)                                                                                                       \
+        *valp_ += res_;                                                                                                            \
+        PARSE_INT(&res_, 1)                                                                                                        \
+        *valp_ += res_;                                                                                                            \
+    } while (0)
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
+{
+    /* we want at least [HTTP/1.<two chars>] to try to parse */
+    if (buf_end - buf < 9) {
+        *ret = -2;
+        return NULL;
+    }
+    EXPECT_CHAR_NO_CHECK('H');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('P');
+    EXPECT_CHAR_NO_CHECK('/');
+    EXPECT_CHAR_NO_CHECK('1');
+    EXPECT_CHAR_NO_CHECK('.');
+    PARSE_INT(minor_version, 1);
+    return buf;
+}
+static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    for (;; ++*num_headers) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            EXPECT_CHAR('\012');
+            break;
+        } else if (*buf == '\012') {
+            ++buf;
+            break;
+        }
+        if (*num_headers == max_headers) {
+            *ret = -1;
+            return NULL;
+        }
+        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
+            /* parsing name, but do not discard SP before colon, see
+             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
+            headers[*num_headers].name = buf;
+            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
+                                                      "\"\""   /* 0x22 */
+                                                      "()"     /* 0x28,0x29 */
+                                                      ",,"     /* 0x2c */
+                                                      "//"     /* 0x2f */
+                                                      ":@"     /* 0x3a-0x40 */
+                                                      "[]"     /* 0x5b-0x5d */
+                                                      "{\377"; /* 0x7b-0xff */
+            int found;
+            buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
+            if (!found) {
+                CHECK_EOF();
+            }
+            while (1) {
+                if (*buf == ':') {
+                    break;
+                } else if (!token_char_map[(unsigned char)*buf]) {
+                    *ret = -1;
+                    return NULL;
+                }
+                ++buf;
+                CHECK_EOF();
+            }
+            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
+                *ret = -1;
+                return NULL;
+            }
+            ++buf;
+            for (;; ++buf) {
+                CHECK_EOF();
+                if (!(*buf == ' ' || *buf == '\t')) {
+                    break;
+                }
+            }
+        } else {
+            headers[*num_headers].name = NULL;
+            headers[*num_headers].name_len = 0;
+        }
+        if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
+            return NULL;
+        }
+    }
+    return buf;
+}
+static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
+                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    /* skip first empty line (some clients add CRLF after POST content) */
+    CHECK_EOF();
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    }
+    /* parse request line */
+    ADVANCE_TOKEN(*method, *method_len);
+    ++buf;
+    ADVANCE_TOKEN(*path, *path_len);
+    ++buf;
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
+                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf_start + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *method = NULL;
+    *method_len = 0;
+    *path = NULL;
+    *path_len = 0;
+    *minor_version = -1;
+    *num_headers = 0;
+    /* if last_len != 0, check if the request is complete (a fast countermeasure
+       againt slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
+                             &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
+                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
+{
+    /* parse "HTTP/1.x" */
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    /* skip space */
+    if (*buf++ != ' ') {
+        *ret = -1;
+        return NULL;
+    }
+    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
+    if (buf_end - buf < 4) {
+        *ret = -2;
+        return NULL;
+    }
+    PARSE_INT_3(status);
+    /* skip space */
+    if (*buf++ != ' ') {
+        *ret = -1;
+        return NULL;
+    }
+    /* get message */
+    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+                       struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *minor_version = -1;
+    *status = 0;
+    *msg = NULL;
+    *msg_len = 0;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+enum {
+    CHUNKED_IN_CHUNK_SIZE,
+    CHUNKED_IN_CHUNK_EXT,
+    CHUNKED_IN_CHUNK_DATA,
+    CHUNKED_IN_CHUNK_CRLF,
+    CHUNKED_IN_TRAILERS_LINE_HEAD,
+    CHUNKED_IN_TRAILERS_LINE_MIDDLE
+};
+static int decode_hex(int ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        return ch - '0';
+    } else if ('A' <= ch && ch <= 'F') {
+        return ch - 'A' + 0xa;
+    } else if ('a' <= ch && ch <= 'f') {
+        return ch - 'a' + 0xa;
+    } else {
+        return -1;
+    }
+}
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
+{
+    size_t dst = 0, src = 0, bufsz = *_bufsz;
+    ssize_t ret = -2; /* incomplete */
+    while (1) {
+        switch (decoder->_state) {
+        case CHUNKED_IN_CHUNK_SIZE:
+            for (;; ++src) {
+                int v;
+                if (src == bufsz)
+                    goto Exit;
+                if ((v = decode_hex(buf[src])) == -1) {
+                    if (decoder->_hex_count == 0) {
+                        ret = -1;
+                        goto Exit;
+                    }
+                    break;
+                }
+                if (decoder->_hex_count == sizeof(size_t) * 2) {
+                    ret = -1;
+                    goto Exit;
+                }
+                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
+                ++decoder->_hex_count;
+            }
+            decoder->_hex_count = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_EXT;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_EXT:
+            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            if (decoder->bytes_left_in_chunk == 0) {
+                if (decoder->consume_trailer) {
+                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+                    break;
+                } else {
+                    goto Complete;
+                }
+            }
+            decoder->_state = CHUNKED_IN_CHUNK_DATA;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_DATA: {
+            size_t avail = bufsz - src;
+            if (avail < decoder->bytes_left_in_chunk) {
+                if (dst != src)
+                    memmove(buf + dst, buf + src, avail);
+                src += avail;
+                dst += avail;
+                decoder->bytes_left_in_chunk -= avail;
+                goto Exit;
+            }
+            if (dst != src)
+                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
+            src += decoder->bytes_left_in_chunk;
+            dst += decoder->bytes_left_in_chunk;
+            decoder->bytes_left_in_chunk = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
+        }
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_CRLF:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src] != '\012') {
+                ret = -1;
+                goto Exit;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
+            break;
+        case CHUNKED_IN_TRAILERS_LINE_HEAD:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src++] == '\012')
+                goto Complete;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
+        /* fallthru */
+        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+            break;
+        default:
+            assert(!"decoder is corrupt");
+        }
+    }
+Complete:
+    ret = bufsz - src;
+Exit:
+    if (dst != src)
+        memmove(buf + dst, buf + src, bufsz - src);
+    *_bufsz = dst;
+    return ret;
+}
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
+{
+    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
+}
+#undef CHECK_EOF
+#undef EXPECT_CHAR
+#undef ADVANCE_TOKEN

diff --git a/plugins/picohttpparser.c b/plugins/picohttpparser.c new file mode 100644 index 00000000..6a2d872d --- /dev/null +++ b/plugins/picohttpparser.c
@@ -0,0 +1,620 @@
	1	/*
	2	* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
	3	* Shigeo Mitsunari
	4	*
	5	* The software is licensed under either the MIT License (below) or the Perl
	6	* license.
	7	*
	8	* Permission is hereby granted, free of charge, to any person obtaining a copy
	9	* of this software and associated documentation files (the "Software"), to
	10	* deal in the Software without restriction, including without limitation the
	11	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	12	* sell copies of the Software, and to permit persons to whom the Software is
	13	* furnished to do so, subject to the following conditions:
	14	*
	15	* The above copyright notice and this permission notice shall be included in
	16	* all copies or substantial portions of the Software.
	17	*
	18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	21	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	22	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	23	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	24	* IN THE SOFTWARE.
	25	*/
	26
	27	#include <assert.h>
	28	#include <stddef.h>
	29	#include <string.h>
	30	#ifdef __SSE4_2__
	31	#ifdef _MSC_VER
	32	#include <nmmintrin.h>
	33	#else
	34	#include <x86intrin.h>
	35	#endif
	36	#endif
	37	#include "picohttpparser.h"
	38
	39	/* $Id: a707070d11d499609f99d09f97535642cec910a8 $ */
	40
	41	#if __GNUC__ >= 3
	42	#define likely(x) __builtin_expect(!!(x), 1)
	43	#define unlikely(x) __builtin_expect(!!(x), 0)
	44	#else
	45	#define likely(x) (x)
	46	#define unlikely(x) (x)
	47	#endif
	48
	49	#ifdef _MSC_VER
	50	#define ALIGNED(n) _declspec(align(n))
	51	#else
	52	#define ALIGNED(n) __attribute__((aligned(n)))
	53	#endif
	54
	55	#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
	56
	57	#define CHECK_EOF() \
	58	if (buf == buf_end) { \
	59	*ret = -2; \
	60	return NULL; \
	61	}
	62
	63	#define EXPECT_CHAR_NO_CHECK(ch) \
	64	if (*buf++ != ch) { \
	65	*ret = -1; \
	66	return NULL; \
	67	}
	68
	69	#define EXPECT_CHAR(ch) \
	70	CHECK_EOF(); \
	71	EXPECT_CHAR_NO_CHECK(ch);
	72
	73	#define ADVANCE_TOKEN(tok, toklen) \
	74	do { \
	75	const char *tok_start = buf; \
	76	static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
	77	int found2; \
	78	buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
	79	if (!found2) { \
	80	CHECK_EOF(); \
	81	} \
	82	while (1) { \
	83	if (*buf == ' ') { \
	84	break; \
	85	} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
	86	if ((unsigned char)buf < '\040' \|\| buf == '\177') { \
	87	*ret = -1; \
	88	return NULL; \
	89	} \
	90	} \
	91	++buf; \
	92	CHECK_EOF(); \
	93	} \
	94	tok = tok_start; \
	95	toklen = buf - tok_start; \
	96	} while (0)
	97
	98	static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	99	"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
	100	"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
	101	"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
	102	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	103	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	104	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	105	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
	106
	107	static const char findchar_fast(const char buf, const char buf_end, const char ranges, size_t ranges_size, int *found)
	108	{
	109	*found = 0;
	110	#if __SSE4_2__
	111	if (likely(buf_end - buf >= 16)) {
	112	__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
	113
	114	size_t left = (buf_end - buf) & ~15;
	115	do {
	116	__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
	117	int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT \| _SIDD_CMP_RANGES \| _SIDD_UBYTE_OPS);
	118	if (unlikely(r != 16)) {
	119	buf += r;
	120	*found = 1;
	121	break;
	122	}
	123	buf += 16;
	124	left -= 16;
	125	} while (likely(left != 0));
	126	}
	127	#else
	128	/* suppress unused parameter warning */
	129	(void)buf_end;
	130	(void)ranges;
	131	(void)ranges_size;
	132	#endif
	133	return buf;
	134	}
	135
	136	static const char get_token_to_eol(const char buf, const char buf_end, const char token, size_t token_len, int *ret)
	137	{
	138	const char *token_start = buf;
	139
	140	#ifdef __SSE4_2__
	141	static const char ranges1[] = "\0\010"
	142	/* allow HT */
	143	"\012\037"
	144	/* allow SP and up to but not including DEL */
	145	"\177\177"
	146	/* allow chars w. MSB set */
	147	;
	148	int found;
	149	buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
	150	if (found)
	151	goto FOUND_CTL;
	152	#else
	153	/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
	154	while (likely(buf_end - buf >= 8)) {
	155	#define DOIT() \
	156	do { \
	157	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
	158	goto NonPrintable; \
	159	++buf; \
	160	} while (0)
	161	DOIT();
	162	DOIT();
	163	DOIT();
	164	DOIT();
	165	DOIT();
	166	DOIT();
	167	DOIT();
	168	DOIT();
	169	#undef DOIT
	170	continue;
	171	NonPrintable:
	172	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	173	goto FOUND_CTL;
	174	}
	175	++buf;
	176	}
	177	#endif
	178	for (;; ++buf) {
	179	CHECK_EOF();
	180	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
	181	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	182	goto FOUND_CTL;
	183	}
	184	}
	185	}
	186	FOUND_CTL:
	187	if (likely(*buf == '\015')) {
	188	++buf;
	189	EXPECT_CHAR('\012');
	190	*token_len = buf - 2 - token_start;
	191	} else if (*buf == '\012') {
	192	*token_len = buf - token_start;
	193	++buf;
	194	} else {
	195	*ret = -1;
	196	return NULL;
	197	}
	198	*token = token_start;
	199
	200	return buf;
	201	}
	202
	203	static const char is_complete(const char buf, const char buf_end, size_t last_len, int ret)
	204	{
	205	int ret_cnt = 0;
	206	buf = last_len < 3 ? buf : buf + last_len - 3;
	207
	208	while (1) {
	209	CHECK_EOF();
	210	if (*buf == '\015') {
	211	++buf;
	212	CHECK_EOF();
	213	EXPECT_CHAR('\012');
	214	++ret_cnt;
	215	} else if (*buf == '\012') {
	216	++buf;
	217	++ret_cnt;
	218	} else {
	219	++buf;
	220	ret_cnt = 0;
	221	}
	222	if (ret_cnt == 2) {
	223	return buf;
	224	}
	225	}
	226
	227	*ret = -2;
	228	return NULL;
	229	}
	230
	231	#define PARSE_INT(valp_, mul_) \
	232	if (buf < '0' \|\| '9' < buf) { \
	233	buf++; \
	234	*ret = -1; \
	235	return NULL; \
	236	} \
	237	(valp_) = (mul_) (*buf++ - '0');
	238
	239	#define PARSE_INT_3(valp_) \
	240	do { \
	241	int res_ = 0; \
	242	PARSE_INT(&res_, 100) \
	243	*valp_ = res_; \
	244	PARSE_INT(&res_, 10) \
	245	*valp_ += res_; \
	246	PARSE_INT(&res_, 1) \
	247	*valp_ += res_; \
	248	} while (0)
	249
	250	/* returned pointer is always within [buf, buf_end), or null */
	251	static const char parse_http_version(const char buf, const char buf_end, int minor_version, int *ret)
	252	{
	253	/* we want at least [HTTP/1.<two chars>] to try to parse */
	254	if (buf_end - buf < 9) {
	255	*ret = -2;
	256	return NULL;
	257	}
	258	EXPECT_CHAR_NO_CHECK('H');
	259	EXPECT_CHAR_NO_CHECK('T');
	260	EXPECT_CHAR_NO_CHECK('T');
	261	EXPECT_CHAR_NO_CHECK('P');
	262	EXPECT_CHAR_NO_CHECK('/');
	263	EXPECT_CHAR_NO_CHECK('1');
	264	EXPECT_CHAR_NO_CHECK('.');
	265	PARSE_INT(minor_version, 1);
	266	return buf;
	267	}
	268
	269	static const char parse_headers(const char buf, const char buf_end, struct phr_header headers, size_t *num_headers,
	270	size_t max_headers, int *ret)
	271	{
	272	for (;; ++*num_headers) {
	273	CHECK_EOF();
	274	if (*buf == '\015') {
	275	++buf;
	276	EXPECT_CHAR('\012');
	277	break;
	278	} else if (*buf == '\012') {
	279	++buf;
	280	break;
	281	}
	282	if (*num_headers == max_headers) {
	283	*ret = -1;
	284	return NULL;
	285	}
	286	if (!(num_headers != 0 && (buf == ' ' \|\| *buf == '\t'))) {
	287	/* parsing name, but do not discard SP before colon, see
	288	* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
	289	headers[*num_headers].name = buf;
	290	static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
	291	"\"\"" /* 0x22 */
	292	"()" /* 0x28,0x29 */
	293	",," /* 0x2c */
	294	"//" /* 0x2f */
	295	":@" /* 0x3a-0x40 */
	296	"[]" /* 0x5b-0x5d */
	297	"{\377"; /* 0x7b-0xff */
	298	int found;
	299	buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
	300	if (!found) {
	301	CHECK_EOF();
	302	}
	303	while (1) {
	304	if (*buf == ':') {
	305	break;
	306	} else if (!token_char_map[(unsigned char)*buf]) {
	307	*ret = -1;
	308	return NULL;
	309	}
	310	++buf;
	311	CHECK_EOF();
	312	}
	313	if ((headers[num_headers].name_len = buf - headers[num_headers].name) == 0) {
	314	*ret = -1;
	315	return NULL;
	316	}
	317	++buf;
	318	for (;; ++buf) {
	319	CHECK_EOF();
	320	if (!(buf == ' ' \|\| buf == '\t')) {
	321	break;
	322	}
	323	}
	324	} else {
	325	headers[*num_headers].name = NULL;
	326	headers[*num_headers].name_len = 0;
	327	}
	328	if ((buf = get_token_to_eol(buf, buf_end, &headers[num_headers].value, &headers[num_headers].value_len, ret)) == NULL) {
	329	return NULL;
	330	}
	331	}
	332	return buf;
	333	}
	334
	335	static const char parse_request(const char buf, const char buf_end, const char method, size_t method_len, const char **path,
	336	size_t path_len, int minor_version, struct phr_header headers, size_t num_headers,
	337	size_t max_headers, int *ret)
	338	{
	339	/* skip first empty line (some clients add CRLF after POST content) */
	340	CHECK_EOF();
	341	if (*buf == '\015') {
	342	++buf;
	343	EXPECT_CHAR('\012');
	344	} else if (*buf == '\012') {
	345	++buf;
	346	}
	347
	348	/* parse request line */
	349	ADVANCE_TOKEN(method, method_len);
	350	++buf;
	351	ADVANCE_TOKEN(path, path_len);
	352	++buf;
	353	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
	354	return NULL;
	355	}
	356	if (*buf == '\015') {
	357	++buf;
	358	EXPECT_CHAR('\012');
	359	} else if (*buf == '\012') {
	360	++buf;
	361	} else {
	362	*ret = -1;
	363	return NULL;
	364	}
	365
	366	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	367	}
	368
	369	int phr_parse_request(const char buf_start, size_t len, const char method, size_t method_len, const char **path,
	370	size_t path_len, int minor_version, struct phr_header headers, size_t num_headers, size_t last_len)
	371	{
	372	const char buf = buf_start, buf_end = buf_start + len;
	373	size_t max_headers = *num_headers;
	374	int r;
	375
	376	*method = NULL;
	377	*method_len = 0;
	378	*path = NULL;
	379	*path_len = 0;
	380	*minor_version = -1;
	381	*num_headers = 0;
	382
	383	/* if last_len != 0, check if the request is complete (a fast countermeasure
	384	againt slowloris */
	385	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	386	return r;
	387	}
	388
	389	if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
	390	&r)) == NULL) {
	391	return r;
	392	}
	393
	394	return (int)(buf - buf_start);
	395	}
	396
	397	static const char parse_response(const char buf, const char buf_end, int minor_version, int status, const char *msg,
	398	size_t msg_len, struct phr_header headers, size_t num_headers, size_t max_headers, int ret)
	399	{
	400	/* parse "HTTP/1.x" */
	401	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
	402	return NULL;
	403	}
	404	/* skip space */
	405	if (*buf++ != ' ') {
	406	*ret = -1;
	407	return NULL;
	408	}
	409	/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
	410	if (buf_end - buf < 4) {
	411	*ret = -2;
	412	return NULL;
	413	}
	414	PARSE_INT_3(status);
	415
	416	/* skip space */
	417	if (*buf++ != ' ') {
	418	*ret = -1;
	419	return NULL;
	420	}
	421	/* get message */
	422	if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
	423	return NULL;
	424	}
	425
	426	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	427	}
	428
	429	int phr_parse_response(const char buf_start, size_t len, int minor_version, int status, const char msg, size_t msg_len,
	430	struct phr_header headers, size_t num_headers, size_t last_len)
	431	{
	432	const char buf = buf_start, buf_end = buf + len;
	433	size_t max_headers = *num_headers;
	434	int r;
	435
	436	*minor_version = -1;
	437	*status = 0;
	438	*msg = NULL;
	439	*msg_len = 0;
	440	*num_headers = 0;
	441
	442	/* if last_len != 0, check if the response is complete (a fast countermeasure
	443	against slowloris */
	444	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	445	return r;
	446	}
	447
	448	if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
	449	return r;
	450	}
	451
	452	return (int)(buf - buf_start);
	453	}
	454
	455	int phr_parse_headers(const char buf_start, size_t len, struct phr_header headers, size_t *num_headers, size_t last_len)
	456	{
	457	const char buf = buf_start, buf_end = buf + len;
	458	size_t max_headers = *num_headers;
	459	int r;
	460
	461	*num_headers = 0;
	462
	463	/* if last_len != 0, check if the response is complete (a fast countermeasure
	464	against slowloris */
	465	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	466	return r;
	467	}
	468
	469	if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
	470	return r;
	471	}
	472
	473	return (int)(buf - buf_start);
	474	}
	475
	476	enum {
	477	CHUNKED_IN_CHUNK_SIZE,
	478	CHUNKED_IN_CHUNK_EXT,
	479	CHUNKED_IN_CHUNK_DATA,
	480	CHUNKED_IN_CHUNK_CRLF,
	481	CHUNKED_IN_TRAILERS_LINE_HEAD,
	482	CHUNKED_IN_TRAILERS_LINE_MIDDLE
	483	};
	484
	485	static int decode_hex(int ch)
	486	{
	487	if ('0' <= ch && ch <= '9') {
	488	return ch - '0';
	489	} else if ('A' <= ch && ch <= 'F') {
	490	return ch - 'A' + 0xa;
	491	} else if ('a' <= ch && ch <= 'f') {
	492	return ch - 'a' + 0xa;
	493	} else {
	494	return -1;
	495	}
	496	}
	497
	498	ssize_t phr_decode_chunked(struct phr_chunked_decoder decoder, char buf, size_t *_bufsz)
	499	{
	500	size_t dst = 0, src = 0, bufsz = *_bufsz;
	501	ssize_t ret = -2; /* incomplete */
	502
	503	while (1) {
	504	switch (decoder->_state) {
	505	case CHUNKED_IN_CHUNK_SIZE:
	506	for (;; ++src) {
	507	int v;
	508	if (src == bufsz)
	509	goto Exit;
	510	if ((v = decode_hex(buf[src])) == -1) {
	511	if (decoder->_hex_count == 0) {
	512	ret = -1;
	513	goto Exit;
	514	}
	515	break;
	516	}
	517	if (decoder->_hex_count == sizeof(size_t) * 2) {
	518	ret = -1;
	519	goto Exit;
	520	}
	521	decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
	522	++decoder->_hex_count;
	523	}
	524	decoder->_hex_count = 0;
	525	decoder->_state = CHUNKED_IN_CHUNK_EXT;
	526	/* fallthru */
	527	case CHUNKED_IN_CHUNK_EXT:
	528	/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
	529	for (;; ++src) {
	530	if (src == bufsz)
	531	goto Exit;
	532	if (buf[src] == '\012')
	533	break;
	534	}
	535	++src;
	536	if (decoder->bytes_left_in_chunk == 0) {
	537	if (decoder->consume_trailer) {
	538	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	539	break;
	540	} else {
	541	goto Complete;
	542	}
	543	}
	544	decoder->_state = CHUNKED_IN_CHUNK_DATA;
	545	/* fallthru */
	546	case CHUNKED_IN_CHUNK_DATA: {
	547	size_t avail = bufsz - src;
	548	if (avail < decoder->bytes_left_in_chunk) {
	549	if (dst != src)
	550	memmove(buf + dst, buf + src, avail);
	551	src += avail;
	552	dst += avail;
	553	decoder->bytes_left_in_chunk -= avail;
	554	goto Exit;
	555	}
	556	if (dst != src)
	557	memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
	558	src += decoder->bytes_left_in_chunk;
	559	dst += decoder->bytes_left_in_chunk;
	560	decoder->bytes_left_in_chunk = 0;
	561	decoder->_state = CHUNKED_IN_CHUNK_CRLF;
	562	}
	563	/* fallthru */
	564	case CHUNKED_IN_CHUNK_CRLF:
	565	for (;; ++src) {
	566	if (src == bufsz)
	567	goto Exit;
	568	if (buf[src] != '\015')
	569	break;
	570	}
	571	if (buf[src] != '\012') {
	572	ret = -1;
	573	goto Exit;
	574	}
	575	++src;
	576	decoder->_state = CHUNKED_IN_CHUNK_SIZE;
	577	break;
	578	case CHUNKED_IN_TRAILERS_LINE_HEAD:
	579	for (;; ++src) {
	580	if (src == bufsz)
	581	goto Exit;
	582	if (buf[src] != '\015')
	583	break;
	584	}
	585	if (buf[src++] == '\012')
	586	goto Complete;
	587	decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
	588	/* fallthru */
	589	case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
	590	for (;; ++src) {
	591	if (src == bufsz)
	592	goto Exit;
	593	if (buf[src] == '\012')
	594	break;
	595	}
	596	++src;
	597	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	598	break;
	599	default:
	600	assert(!"decoder is corrupt");
	601	}
	602	}
	603
	604	Complete:
	605	ret = bufsz - src;
	606	Exit:
	607	if (dst != src)
	608	memmove(buf + dst, buf + src, bufsz - src);
	609	*_bufsz = dst;
	610	return ret;
	611	}
	612
	613	int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
	614	{
	615	return decoder->_state == CHUNKED_IN_CHUNK_DATA;
	616	}
	617
	618	#undef CHECK_EOF
	619	#undef EXPECT_CHAR
	620	#undef ADVANCE_TOKEN