1 files changed, 645 insertions, 0 deletions
diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c
new file mode 100644
index 0000000..74ccc3e
--- /dev/null
+++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ *                         Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef __SSE4_2__
+#ifdef _MSC_VER
+#include <nmmintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+#include "picohttpparser.h"
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+#define CHECK_EOF()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
+    if (*buf++ != ch) {                                                                                                            \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR(ch)                                                                                                            \
+    CHECK_EOF();                                                                                                                   \
+    EXPECT_CHAR_NO_CHECK(ch);
+#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
+    do {                                                                                                                           \
+        const char *tok_start = buf;                                                                                               \
+        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
+        int found2;                                                                                                                \
+        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
+        if (!found2) {                                                                                                             \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        while (1) {                                                                                                                \
+            if (*buf == ' ') {                                                                                                     \
+                break;                                                                                                             \
+            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
+                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
+                    *ret = -1;                                                                                                     \
+                    return NULL;                                                                                                   \
+                }                                                                                                                  \
+            }                                                                                                                      \
+            ++buf;                                                                                                                 \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        tok = tok_start;                                                                                                           \
+        toklen = buf - tok_start;                                                                                                  \
+    } while (0)
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
+{
+    *found = 0;
+#if __SSE4_2__
+    if (likely(buf_end - buf >= 16)) {
+        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+        size_t left = (buf_end - buf) & ~15;
+        do {
+            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+            if (unlikely(r != 16)) {
+                buf += r;
+                *found = 1;
+                break;
+            }
+            buf += 16;
+            left -= 16;
+        } while (likely(left != 0));
+    }
+#else
+    /* suppress unused parameter warning */
+    (void)buf_end;
+    (void)ranges;
+    (void)ranges_size;
+#endif
+    return buf;
+}
+static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
+{
+    const char *token_start = buf;
+#ifdef __SSE4_2__
+    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
+                                                "\012\037"  /* allow SP and up to but not including DEL */
+                                                "\177\177"; /* allow chars w. MSB set */
+    int found;
+    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
+    if (found)
+        goto FOUND_CTL;
+#else
+    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
+    while (likely(buf_end - buf >= 8)) {
+#define DOIT()                                                                                                                     \
+    do {                                                                                                                           \
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
+            goto NonPrintable;                                                                                                     \
+        ++buf;                                                                                                                     \
+    } while (0)
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+#undef DOIT
+        continue;
+    NonPrintable:
+        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+            goto FOUND_CTL;
+        }
+        ++buf;
+    }
+#endif
+    for (;; ++buf) {
+        CHECK_EOF();
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+                goto FOUND_CTL;
+            }
+        }
+    }
+FOUND_CTL:
+    if (likely(*buf == '\015')) {
+        ++buf;
+        EXPECT_CHAR('\012');
+        *token_len = buf - 2 - token_start;
+    } else if (*buf == '\012') {
+        *token_len = buf - token_start;
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    *token = token_start;
+    return buf;
+}
+static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
+{
+    int ret_cnt = 0;
+    buf = last_len < 3 ? buf : buf + last_len - 3;
+    while (1) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            CHECK_EOF();
+            EXPECT_CHAR('\012');
+            ++ret_cnt;
+        } else if (*buf == '\012') {
+            ++buf;
+            ++ret_cnt;
+        } else {
+            ++buf;
+            ret_cnt = 0;
+        }
+        if (ret_cnt == 2) {
+            return buf;
+        }
+    }
+    *ret = -2;
+    return NULL;
+}
+#define PARSE_INT(valp_, mul_)                                                                                                     \
+    if (*buf < '0' || '9' < *buf) {                                                                                                \
+        buf++;                                                                                                                     \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }                                                                                                                              \
+    *(valp_) = (mul_) * (*buf++ - '0');
+#define PARSE_INT_3(valp_)                                                                                                         \
+    do {                                                                                                                           \
+        int res_ = 0;                                                                                                              \
+        PARSE_INT(&res_, 100)                                                                                                      \
+        *valp_ = res_;                                                                                                             \
+        PARSE_INT(&res_, 10)                                                                                                       \
+        *valp_ += res_;                                                                                                            \
+        PARSE_INT(&res_, 1)                                                                                                        \
+        *valp_ += res_;                                                                                                            \
+    } while (0)
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
+{
+    /* we want at least [HTTP/1.<two chars>] to try to parse */
+    if (buf_end - buf < 9) {
+        *ret = -2;
+        return NULL;
+    }
+    EXPECT_CHAR_NO_CHECK('H');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('P');
+    EXPECT_CHAR_NO_CHECK('/');
+    EXPECT_CHAR_NO_CHECK('1');
+    EXPECT_CHAR_NO_CHECK('.');
+    PARSE_INT(minor_version, 1);
+    return buf;
+}
+static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    for (;; ++*num_headers) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            EXPECT_CHAR('\012');
+            break;
+        } else if (*buf == '\012') {
+            ++buf;
+            break;
+        }
+        if (*num_headers == max_headers) {
+            *ret = -1;
+            return NULL;
+        }
+        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
+            /* parsing name, but do not discard SP before colon, see
+             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
+            headers[*num_headers].name = buf;
+            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
+                                                      "\"\""   /* 0x22 */
+                                                      "()"     /* 0x28,0x29 */
+                                                      ",,"     /* 0x2c */
+                                                      "//"     /* 0x2f */
+                                                      ":@"     /* 0x3a-0x40 */
+                                                      "[]"     /* 0x5b-0x5d */
+                                                      "{\377"; /* 0x7b-0xff */
+            int found;
+            buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
+            if (!found) {
+                CHECK_EOF();
+            }
+            while (1) {
+                if (*buf == ':') {
+                    break;
+                } else if (!token_char_map[(unsigned char)*buf]) {
+                    *ret = -1;
+                    return NULL;
+                }
+                ++buf;
+                CHECK_EOF();
+            }
+            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
+                *ret = -1;
+                return NULL;
+            }
+            ++buf;
+            for (;; ++buf) {
+                CHECK_EOF();
+                if (!(*buf == ' ' || *buf == '\t')) {
+                    break;
+                }
+            }
+        } else {
+            headers[*num_headers].name = NULL;
+            headers[*num_headers].name_len = 0;
+        }
+        const char *value;
+        size_t value_len;
+        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
+            return NULL;
+        }
+        /* remove trailing SPs and HTABs */
+        const char *value_end = value + value_len;
+        for (; value_end != value; --value_end) {
+            const char c = *(value_end - 1);
+            if (!(c == ' ' || c == '\t')) {
+                break;
+            }
+        }
+        headers[*num_headers].value = value;
+        headers[*num_headers].value_len = value_end - value;
+    }
+    return buf;
+}
+static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
+                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    /* skip first empty line (some clients add CRLF after POST content) */
+    CHECK_EOF();
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    }
+    /* parse request line */
+    ADVANCE_TOKEN(*method, *method_len);
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    ADVANCE_TOKEN(*path, *path_len);
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    if (*method_len == 0 || *path_len == 0) {
+        *ret = -1;
+        return NULL;
+    }
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
+                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf_start + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *method = NULL;
+    *method_len = 0;
+    *path = NULL;
+    *path_len = 0;
+    *minor_version = -1;
+    *num_headers = 0;
+    /* if last_len != 0, check if the request is complete (a fast countermeasure
+       againt slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
+                             &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
+                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
+{
+    /* parse "HTTP/1.x" */
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    /* skip space */
+    if (*buf != ' ') {
+        *ret = -1;
+        return NULL;
+    }
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
+    if (buf_end - buf < 4) {
+        *ret = -2;
+        return NULL;
+    }
+    PARSE_INT_3(status);
+    /* get message includig preceding space */
+    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
+        return NULL;
+    }
+    if (*msg_len == 0) {
+        /* ok */
+    } else if (**msg == ' ') {
+        /* remove preceding space */
+        do {
+            ++*msg;
+            --*msg_len;
+        } while (**msg == ' ');
+    } else {
+        /* garbage found after status code */
+        *ret = -1;
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+                       struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *minor_version = -1;
+    *status = 0;
+    *msg = NULL;
+    *msg_len = 0;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+enum {
+    CHUNKED_IN_CHUNK_SIZE,
+    CHUNKED_IN_CHUNK_EXT,
+    CHUNKED_IN_CHUNK_DATA,
+    CHUNKED_IN_CHUNK_CRLF,
+    CHUNKED_IN_TRAILERS_LINE_HEAD,
+    CHUNKED_IN_TRAILERS_LINE_MIDDLE
+};
+static int decode_hex(int ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        return ch - '0';
+    } else if ('A' <= ch && ch <= 'F') {
+        return ch - 'A' + 0xa;
+    } else if ('a' <= ch && ch <= 'f') {
+        return ch - 'a' + 0xa;
+    } else {
+        return -1;
+    }
+}
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
+{
+    size_t dst = 0, src = 0, bufsz = *_bufsz;
+    ssize_t ret = -2; /* incomplete */
+    while (1) {
+        switch (decoder->_state) {
+        case CHUNKED_IN_CHUNK_SIZE:
+            for (;; ++src) {
+                int v;
+                if (src == bufsz)
+                    goto Exit;
+                if ((v = decode_hex(buf[src])) == -1) {
+                    if (decoder->_hex_count == 0) {
+                        ret = -1;
+                        goto Exit;
+                    }
+                    break;
+                }
+                if (decoder->_hex_count == sizeof(size_t) * 2) {
+                    ret = -1;
+                    goto Exit;
+                }
+                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
+                ++decoder->_hex_count;
+            }
+            decoder->_hex_count = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_EXT;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_EXT:
+            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            if (decoder->bytes_left_in_chunk == 0) {
+                if (decoder->consume_trailer) {
+                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+                    break;
+                } else {
+                    goto Complete;
+                }
+            }
+            decoder->_state = CHUNKED_IN_CHUNK_DATA;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_DATA: {
+            size_t avail = bufsz - src;
+            if (avail < decoder->bytes_left_in_chunk) {
+                if (dst != src)
+                    memmove(buf + dst, buf + src, avail);
+                src += avail;
+                dst += avail;
+                decoder->bytes_left_in_chunk -= avail;
+                goto Exit;
+            }
+            if (dst != src)
+                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
+            src += decoder->bytes_left_in_chunk;
+            dst += decoder->bytes_left_in_chunk;
+            decoder->bytes_left_in_chunk = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
+        }
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_CRLF:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src] != '\012') {
+                ret = -1;
+                goto Exit;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
+            break;
+        case CHUNKED_IN_TRAILERS_LINE_HEAD:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src++] == '\012')
+                goto Complete;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
+        /* fallthru */
+        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+            break;
+        default:
+            assert(!"decoder is corrupt");
+        }
+    }
+Complete:
+    ret = bufsz - src;
+Exit:
+    if (dst != src)
+        memmove(buf + dst, buf + src, bufsz - src);
+    *_bufsz = dst;
+    return ret;
+}
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
+{
+    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
+}
+#undef CHECK_EOF
+#undef EXPECT_CHAR
+#undef ADVANCE_TOKEN

diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c new file mode 100644 index 0000000..74ccc3e --- /dev/null +++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,645 @@
	1	/*
	2	* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
	3	* Shigeo Mitsunari
	4	*
	5	* The software is licensed under either the MIT License (below) or the Perl
	6	* license.
	7	*
	8	* Permission is hereby granted, free of charge, to any person obtaining a copy
	9	* of this software and associated documentation files (the "Software"), to
	10	* deal in the Software without restriction, including without limitation the
	11	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	12	* sell copies of the Software, and to permit persons to whom the Software is
	13	* furnished to do so, subject to the following conditions:
	14	*
	15	* The above copyright notice and this permission notice shall be included in
	16	* all copies or substantial portions of the Software.
	17	*
	18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	21	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	22	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	23	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	24	* IN THE SOFTWARE.
	25	*/
	26
	27	#include <assert.h>
	28	#include <stddef.h>
	29	#include <string.h>
	30	#ifdef __SSE4_2__
	31	#ifdef _MSC_VER
	32	#include <nmmintrin.h>
	33	#else
	34	#include <x86intrin.h>
	35	#endif
	36	#endif
	37	#include "picohttpparser.h"
	38
	39	#if __GNUC__ >= 3
	40	#define likely(x) __builtin_expect(!!(x), 1)
	41	#define unlikely(x) __builtin_expect(!!(x), 0)
	42	#else
	43	#define likely(x) (x)
	44	#define unlikely(x) (x)
	45	#endif
	46
	47	#ifdef _MSC_VER
	48	#define ALIGNED(n) _declspec(align(n))
	49	#else
	50	#define ALIGNED(n) __attribute__((aligned(n)))
	51	#endif
	52
	53	#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
	54
	55	#define CHECK_EOF() \
	56	if (buf == buf_end) { \
	57	*ret = -2; \
	58	return NULL; \
	59	}
	60
	61	#define EXPECT_CHAR_NO_CHECK(ch) \
	62	if (*buf++ != ch) { \
	63	*ret = -1; \
	64	return NULL; \
	65	}
	66
	67	#define EXPECT_CHAR(ch) \
	68	CHECK_EOF(); \
	69	EXPECT_CHAR_NO_CHECK(ch);
	70
	71	#define ADVANCE_TOKEN(tok, toklen) \
	72	do { \
	73	const char *tok_start = buf; \
	74	static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
	75	int found2; \
	76	buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
	77	if (!found2) { \
	78	CHECK_EOF(); \
	79	} \
	80	while (1) { \
	81	if (*buf == ' ') { \
	82	break; \
	83	} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
	84	if ((unsigned char)buf < '\040' \|\| buf == '\177') { \
	85	*ret = -1; \
	86	return NULL; \
	87	} \
	88	} \
	89	++buf; \
	90	CHECK_EOF(); \
	91	} \
	92	tok = tok_start; \
	93	toklen = buf - tok_start; \
	94	} while (0)
	95
	96	static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	97	"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
	98	"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
	99	"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
	100	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	101	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	102	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	103	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
	104
	105	static const char findchar_fast(const char buf, const char buf_end, const char ranges, size_t ranges_size, int *found)
	106	{
	107	*found = 0;
	108	#if __SSE4_2__
	109	if (likely(buf_end - buf >= 16)) {
	110	__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
	111
	112	size_t left = (buf_end - buf) & ~15;
	113	do {
	114	__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
	115	int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT \| _SIDD_CMP_RANGES \| _SIDD_UBYTE_OPS);
	116	if (unlikely(r != 16)) {
	117	buf += r;
	118	*found = 1;
	119	break;
	120	}
	121	buf += 16;
	122	left -= 16;
	123	} while (likely(left != 0));
	124	}
	125	#else
	126	/* suppress unused parameter warning */
	127	(void)buf_end;
	128	(void)ranges;
	129	(void)ranges_size;
	130	#endif
	131	return buf;
	132	}
	133
	134	static const char get_token_to_eol(const char buf, const char buf_end, const char token, size_t token_len, int *ret)
	135	{
	136	const char *token_start = buf;
	137
	138	#ifdef __SSE4_2__
	139	static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
	140	"\012\037" /* allow SP and up to but not including DEL */
	141	"\177\177"; /* allow chars w. MSB set */
	142	int found;
	143	buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
	144	if (found)
	145	goto FOUND_CTL;
	146	#else
	147	/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
	148	while (likely(buf_end - buf >= 8)) {
	149	#define DOIT() \
	150	do { \
	151	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
	152	goto NonPrintable; \
	153	++buf; \
	154	} while (0)
	155	DOIT();
	156	DOIT();
	157	DOIT();
	158	DOIT();
	159	DOIT();
	160	DOIT();
	161	DOIT();
	162	DOIT();
	163	#undef DOIT
	164	continue;
	165	NonPrintable:
	166	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	167	goto FOUND_CTL;
	168	}
	169	++buf;
	170	}
	171	#endif
	172	for (;; ++buf) {
	173	CHECK_EOF();
	174	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
	175	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	176	goto FOUND_CTL;
	177	}
	178	}
	179	}
	180	FOUND_CTL:
	181	if (likely(*buf == '\015')) {
	182	++buf;
	183	EXPECT_CHAR('\012');
	184	*token_len = buf - 2 - token_start;
	185	} else if (*buf == '\012') {
	186	*token_len = buf - token_start;
	187	++buf;
	188	} else {
	189	*ret = -1;
	190	return NULL;
	191	}
	192	*token = token_start;
	193
	194	return buf;
	195	}
	196
	197	static const char is_complete(const char buf, const char buf_end, size_t last_len, int ret)
	198	{
	199	int ret_cnt = 0;
	200	buf = last_len < 3 ? buf : buf + last_len - 3;
	201
	202	while (1) {
	203	CHECK_EOF();
	204	if (*buf == '\015') {
	205	++buf;
	206	CHECK_EOF();
	207	EXPECT_CHAR('\012');
	208	++ret_cnt;
	209	} else if (*buf == '\012') {
	210	++buf;
	211	++ret_cnt;
	212	} else {
	213	++buf;
	214	ret_cnt = 0;
	215	}
	216	if (ret_cnt == 2) {
	217	return buf;
	218	}
	219	}
	220
	221	*ret = -2;
	222	return NULL;
	223	}
	224
	225	#define PARSE_INT(valp_, mul_) \
	226	if (buf < '0' \|\| '9' < buf) { \
	227	buf++; \
	228	*ret = -1; \
	229	return NULL; \
	230	} \
	231	(valp_) = (mul_) (*buf++ - '0');
	232
	233	#define PARSE_INT_3(valp_) \
	234	do { \
	235	int res_ = 0; \
	236	PARSE_INT(&res_, 100) \
	237	*valp_ = res_; \
	238	PARSE_INT(&res_, 10) \
	239	*valp_ += res_; \
	240	PARSE_INT(&res_, 1) \
	241	*valp_ += res_; \
	242	} while (0)
	243
	244	/* returned pointer is always within [buf, buf_end), or null */
	245	static const char parse_http_version(const char buf, const char buf_end, int minor_version, int *ret)
	246	{
	247	/* we want at least [HTTP/1.<two chars>] to try to parse */
	248	if (buf_end - buf < 9) {
	249	*ret = -2;
	250	return NULL;
	251	}
	252	EXPECT_CHAR_NO_CHECK('H');
	253	EXPECT_CHAR_NO_CHECK('T');
	254	EXPECT_CHAR_NO_CHECK('T');
	255	EXPECT_CHAR_NO_CHECK('P');
	256	EXPECT_CHAR_NO_CHECK('/');
	257	EXPECT_CHAR_NO_CHECK('1');
	258	EXPECT_CHAR_NO_CHECK('.');
	259	PARSE_INT(minor_version, 1);
	260	return buf;
	261	}
	262
	263	static const char parse_headers(const char buf, const char buf_end, struct phr_header headers, size_t *num_headers,
	264	size_t max_headers, int *ret)
	265	{
	266	for (;; ++*num_headers) {
	267	CHECK_EOF();
	268	if (*buf == '\015') {
	269	++buf;
	270	EXPECT_CHAR('\012');
	271	break;
	272	} else if (*buf == '\012') {
	273	++buf;
	274	break;
	275	}
	276	if (*num_headers == max_headers) {
	277	*ret = -1;
	278	return NULL;
	279	}
	280	if (!(num_headers != 0 && (buf == ' ' \|\| *buf == '\t'))) {
	281	/* parsing name, but do not discard SP before colon, see
	282	* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
	283	headers[*num_headers].name = buf;
	284	static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
	285	"\"\"" /* 0x22 */
	286	"()" /* 0x28,0x29 */
	287	",," /* 0x2c */
	288	"//" /* 0x2f */
	289	":@" /* 0x3a-0x40 */
	290	"[]" /* 0x5b-0x5d */
	291	"{\377"; /* 0x7b-0xff */
	292	int found;
	293	buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
	294	if (!found) {
	295	CHECK_EOF();
	296	}
	297	while (1) {
	298	if (*buf == ':') {
	299	break;
	300	} else if (!token_char_map[(unsigned char)*buf]) {
	301	*ret = -1;
	302	return NULL;
	303	}
	304	++buf;
	305	CHECK_EOF();
	306	}
	307	if ((headers[num_headers].name_len = buf - headers[num_headers].name) == 0) {
	308	*ret = -1;
	309	return NULL;
	310	}
	311	++buf;
	312	for (;; ++buf) {
	313	CHECK_EOF();
	314	if (!(buf == ' ' \|\| buf == '\t')) {
	315	break;
	316	}
	317	}
	318	} else {
	319	headers[*num_headers].name = NULL;
	320	headers[*num_headers].name_len = 0;
	321	}
	322	const char *value;
	323	size_t value_len;
	324	if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
	325	return NULL;
	326	}
	327	/* remove trailing SPs and HTABs */
	328	const char *value_end = value + value_len;
	329	for (; value_end != value; --value_end) {
	330	const char c = *(value_end - 1);
	331	if (!(c == ' ' \|\| c == '\t')) {
	332	break;
	333	}
	334	}
	335	headers[*num_headers].value = value;
	336	headers[*num_headers].value_len = value_end - value;
	337	}
	338	return buf;
	339	}
	340
	341	static const char parse_request(const char buf, const char buf_end, const char method, size_t method_len, const char **path,
	342	size_t path_len, int minor_version, struct phr_header headers, size_t num_headers,
	343	size_t max_headers, int *ret)
	344	{
	345	/* skip first empty line (some clients add CRLF after POST content) */
	346	CHECK_EOF();
	347	if (*buf == '\015') {
	348	++buf;
	349	EXPECT_CHAR('\012');
	350	} else if (*buf == '\012') {
	351	++buf;
	352	}
	353
	354	/* parse request line */
	355	ADVANCE_TOKEN(method, method_len);
	356	do {
	357	++buf;
	358	} while (*buf == ' ');
	359	ADVANCE_TOKEN(path, path_len);
	360	do {
	361	++buf;
	362	} while (*buf == ' ');
	363	if (method_len == 0 \|\| path_len == 0) {
	364	*ret = -1;
	365	return NULL;
	366	}
	367	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
	368	return NULL;
	369	}
	370	if (*buf == '\015') {
	371	++buf;
	372	EXPECT_CHAR('\012');
	373	} else if (*buf == '\012') {
	374	++buf;
	375	} else {
	376	*ret = -1;
	377	return NULL;
	378	}
	379
	380	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	381	}
	382
	383	int phr_parse_request(const char buf_start, size_t len, const char method, size_t method_len, const char **path,
	384	size_t path_len, int minor_version, struct phr_header headers, size_t num_headers, size_t last_len)
	385	{
	386	const char buf = buf_start, buf_end = buf_start + len;
	387	size_t max_headers = *num_headers;
	388	int r;
	389
	390	*method = NULL;
	391	*method_len = 0;
	392	*path = NULL;
	393	*path_len = 0;
	394	*minor_version = -1;
	395	*num_headers = 0;
	396
	397	/* if last_len != 0, check if the request is complete (a fast countermeasure
	398	againt slowloris */
	399	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	400	return r;
	401	}
	402
	403	if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
	404	&r)) == NULL) {
	405	return r;
	406	}
	407
	408	return (int)(buf - buf_start);
	409	}
	410
	411	static const char parse_response(const char buf, const char buf_end, int minor_version, int status, const char *msg,
	412	size_t msg_len, struct phr_header headers, size_t num_headers, size_t max_headers, int ret)
	413	{
	414	/* parse "HTTP/1.x" */
	415	if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
	416	return NULL;
	417	}
	418	/* skip space */
	419	if (*buf != ' ') {
	420	*ret = -1;
	421	return NULL;
	422	}
	423	do {
	424	++buf;
	425	} while (*buf == ' ');
	426	/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
	427	if (buf_end - buf < 4) {
	428	*ret = -2;
	429	return NULL;
	430	}
	431	PARSE_INT_3(status);
	432
	433	/* get message includig preceding space */
	434	if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
	435	return NULL;
	436	}
	437	if (*msg_len == 0) {
	438	/* ok */
	439	} else if (**msg == ' ') {
	440	/* remove preceding space */
	441	do {
	442	++*msg;
	443	--*msg_len;
	444	} while (**msg == ' ');
	445	} else {
	446	/* garbage found after status code */
	447	*ret = -1;
	448	return NULL;
	449	}
	450
	451	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	452	}
	453
	454	int phr_parse_response(const char buf_start, size_t len, int minor_version, int status, const char msg, size_t msg_len,
	455	struct phr_header headers, size_t num_headers, size_t last_len)
	456	{
	457	const char buf = buf_start, buf_end = buf + len;
	458	size_t max_headers = *num_headers;
	459	int r;
	460
	461	*minor_version = -1;
	462	*status = 0;
	463	*msg = NULL;
	464	*msg_len = 0;
	465	*num_headers = 0;
	466
	467	/* if last_len != 0, check if the response is complete (a fast countermeasure
	468	against slowloris */
	469	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	470	return r;
	471	}
	472
	473	if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
	474	return r;
	475	}
	476
	477	return (int)(buf - buf_start);
	478	}
	479
	480	int phr_parse_headers(const char buf_start, size_t len, struct phr_header headers, size_t *num_headers, size_t last_len)
	481	{
	482	const char buf = buf_start, buf_end = buf + len;
	483	size_t max_headers = *num_headers;
	484	int r;
	485
	486	*num_headers = 0;
	487
	488	/* if last_len != 0, check if the response is complete (a fast countermeasure
	489	against slowloris */
	490	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	491	return r;
	492	}
	493
	494	if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
	495	return r;
	496	}
	497
	498	return (int)(buf - buf_start);
	499	}
	500
	501	enum {
	502	CHUNKED_IN_CHUNK_SIZE,
	503	CHUNKED_IN_CHUNK_EXT,
	504	CHUNKED_IN_CHUNK_DATA,
	505	CHUNKED_IN_CHUNK_CRLF,
	506	CHUNKED_IN_TRAILERS_LINE_HEAD,
	507	CHUNKED_IN_TRAILERS_LINE_MIDDLE
	508	};
	509
	510	static int decode_hex(int ch)
	511	{
	512	if ('0' <= ch && ch <= '9') {
	513	return ch - '0';
	514	} else if ('A' <= ch && ch <= 'F') {
	515	return ch - 'A' + 0xa;
	516	} else if ('a' <= ch && ch <= 'f') {
	517	return ch - 'a' + 0xa;
	518	} else {
	519	return -1;
	520	}
	521	}
	522
	523	ssize_t phr_decode_chunked(struct phr_chunked_decoder decoder, char buf, size_t *_bufsz)
	524	{
	525	size_t dst = 0, src = 0, bufsz = *_bufsz;
	526	ssize_t ret = -2; /* incomplete */
	527
	528	while (1) {
	529	switch (decoder->_state) {
	530	case CHUNKED_IN_CHUNK_SIZE:
	531	for (;; ++src) {
	532	int v;
	533	if (src == bufsz)
	534	goto Exit;
	535	if ((v = decode_hex(buf[src])) == -1) {
	536	if (decoder->_hex_count == 0) {
	537	ret = -1;
	538	goto Exit;
	539	}
	540	break;
	541	}
	542	if (decoder->_hex_count == sizeof(size_t) * 2) {
	543	ret = -1;
	544	goto Exit;
	545	}
	546	decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
	547	++decoder->_hex_count;
	548	}
	549	decoder->_hex_count = 0;
	550	decoder->_state = CHUNKED_IN_CHUNK_EXT;
	551	/* fallthru */
	552	case CHUNKED_IN_CHUNK_EXT:
	553	/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
	554	for (;; ++src) {
	555	if (src == bufsz)
	556	goto Exit;
	557	if (buf[src] == '\012')
	558	break;
	559	}
	560	++src;
	561	if (decoder->bytes_left_in_chunk == 0) {
	562	if (decoder->consume_trailer) {
	563	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	564	break;
	565	} else {
	566	goto Complete;
	567	}
	568	}
	569	decoder->_state = CHUNKED_IN_CHUNK_DATA;
	570	/* fallthru */
	571	case CHUNKED_IN_CHUNK_DATA: {
	572	size_t avail = bufsz - src;
	573	if (avail < decoder->bytes_left_in_chunk) {
	574	if (dst != src)
	575	memmove(buf + dst, buf + src, avail);
	576	src += avail;
	577	dst += avail;
	578	decoder->bytes_left_in_chunk -= avail;
	579	goto Exit;
	580	}
	581	if (dst != src)
	582	memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
	583	src += decoder->bytes_left_in_chunk;
	584	dst += decoder->bytes_left_in_chunk;
	585	decoder->bytes_left_in_chunk = 0;
	586	decoder->_state = CHUNKED_IN_CHUNK_CRLF;
	587	}
	588	/* fallthru */
	589	case CHUNKED_IN_CHUNK_CRLF:
	590	for (;; ++src) {
	591	if (src == bufsz)
	592	goto Exit;
	593	if (buf[src] != '\015')
	594	break;
	595	}
	596	if (buf[src] != '\012') {
	597	ret = -1;
	598	goto Exit;
	599	}
	600	++src;
	601	decoder->_state = CHUNKED_IN_CHUNK_SIZE;
	602	break;
	603	case CHUNKED_IN_TRAILERS_LINE_HEAD:
	604	for (;; ++src) {
	605	if (src == bufsz)
	606	goto Exit;
	607	if (buf[src] != '\015')
	608	break;
	609	}
	610	if (buf[src++] == '\012')
	611	goto Complete;
	612	decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
	613	/* fallthru */
	614	case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
	615	for (;; ++src) {
	616	if (src == bufsz)
	617	goto Exit;
	618	if (buf[src] == '\012')
	619	break;
	620	}
	621	++src;
	622	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	623	break;
	624	default:
	625	assert(!"decoder is corrupt");
	626	}
	627	}
	628
	629	Complete:
	630	ret = bufsz - src;
	631	Exit:
	632	if (dst != src)
	633	memmove(buf + dst, buf + src, bufsz - src);
	634	*_bufsz = dst;
	635	return ret;
	636	}
	637
	638	int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
	639	{
	640	return decoder->_state == CHUNKED_IN_CHUNK_DATA;
	641	}
	642
	643	#undef CHECK_EOF
	644	#undef EXPECT_CHAR
	645	#undef ADVANCE_TOKEN