summaryrefslogtreecommitdiffstats
path: root/plugins/picohttpparser/picohttpparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/picohttpparser/picohttpparser.c')
-rw-r--r--plugins/picohttpparser/picohttpparser.c651
1 files changed, 651 insertions, 0 deletions
diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c
new file mode 100644
index 00000000..d0bfac62
--- /dev/null
+++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,651 @@
1/*
2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3 * Shigeo Mitsunari
4 *
5 * The software is licensed under either the MIT License (below) or the Perl
6 * license.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#include <assert.h>
28#include <stddef.h>
29#include <string.h>
30#ifdef __SSE4_2__
31#ifdef _MSC_VER
32#include <nmmintrin.h>
33#else
34#include <x86intrin.h>
35#endif
36#endif
37#include "picohttpparser.h"
38
39#if __GNUC__ >= 3
40#define likely(x) __builtin_expect(!!(x), 1)
41#define unlikely(x) __builtin_expect(!!(x), 0)
42#else
43#define likely(x) (x)
44#define unlikely(x) (x)
45#endif
46
47#ifdef _MSC_VER
48#define ALIGNED(n) _declspec(align(n))
49#else
50#define ALIGNED(n) __attribute__((aligned(n)))
51#endif
52
53#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55#define CHECK_EOF() \
56 if (buf == buf_end) { \
57 *ret = -2; \
58 return NULL; \
59 }
60
61#define EXPECT_CHAR_NO_CHECK(ch) \
62 if (*buf++ != ch) { \
63 *ret = -1; \
64 return NULL; \
65 }
66
67#define EXPECT_CHAR(ch) \
68 CHECK_EOF(); \
69 EXPECT_CHAR_NO_CHECK(ch);
70
71#define ADVANCE_TOKEN(tok, toklen) \
72 do { \
73 const char *tok_start = buf; \
74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
75 int found2; \
76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
77 if (!found2) { \
78 CHECK_EOF(); \
79 } \
80 while (1) { \
81 if (*buf == ' ') { \
82 break; \
83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
85 *ret = -1; \
86 return NULL; \
87 } \
88 } \
89 ++buf; \
90 CHECK_EOF(); \
91 } \
92 tok = tok_start; \
93 toklen = buf - tok_start; \
94 } while (0)
95
96static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
106{
107 *found = 0;
108#if __SSE4_2__
109 if (likely(buf_end - buf >= 16)) {
110 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112 size_t left = (buf_end - buf) & ~15;
113 do {
114 __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116 if (unlikely(r != 16)) {
117 buf += r;
118 *found = 1;
119 break;
120 }
121 buf += 16;
122 left -= 16;
123 } while (likely(left != 0));
124 }
125#else
126 /* suppress unused parameter warning */
127 (void)buf_end;
128 (void)ranges;
129 (void)ranges_size;
130#endif
131 return buf;
132}
133
134static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
135{
136 const char *token_start = buf;
137
138#ifdef __SSE4_2__
139 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
140 "\012\037" /* allow SP and up to but not including DEL */
141 "\177\177"; /* allow chars w. MSB set */
142 int found;
143 buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144 if (found)
145 goto FOUND_CTL;
146#else
147 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148 while (likely(buf_end - buf >= 8)) {
149#define DOIT() \
150 do { \
151 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
152 goto NonPrintable; \
153 ++buf; \
154 } while (0)
155 DOIT();
156 DOIT();
157 DOIT();
158 DOIT();
159 DOIT();
160 DOIT();
161 DOIT();
162 DOIT();
163#undef DOIT
164 continue;
165 NonPrintable:
166 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167 goto FOUND_CTL;
168 }
169 ++buf;
170 }
171#endif
172 for (;; ++buf) {
173 CHECK_EOF();
174 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176 goto FOUND_CTL;
177 }
178 }
179 }
180FOUND_CTL:
181 if (likely(*buf == '\015')) {
182 ++buf;
183 EXPECT_CHAR('\012');
184 *token_len = buf - 2 - token_start;
185 } else if (*buf == '\012') {
186 *token_len = buf - token_start;
187 ++buf;
188 } else {
189 *ret = -1;
190 return NULL;
191 }
192 *token = token_start;
193
194 return buf;
195}
196
197static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
198{
199 int ret_cnt = 0;
200 buf = last_len < 3 ? buf : buf + last_len - 3;
201
202 while (1) {
203 CHECK_EOF();
204 if (*buf == '\015') {
205 ++buf;
206 CHECK_EOF();
207 EXPECT_CHAR('\012');
208 ++ret_cnt;
209 } else if (*buf == '\012') {
210 ++buf;
211 ++ret_cnt;
212 } else {
213 ++buf;
214 ret_cnt = 0;
215 }
216 if (ret_cnt == 2) {
217 return buf;
218 }
219 }
220
221 *ret = -2;
222 return NULL;
223}
224
225#define PARSE_INT(valp_, mul_) \
226 if (*buf < '0' || '9' < *buf) { \
227 buf++; \
228 *ret = -1; \
229 return NULL; \
230 } \
231 *(valp_) = (mul_) * (*buf++ - '0');
232
233#define PARSE_INT_3(valp_) \
234 do { \
235 int res_ = 0; \
236 PARSE_INT(&res_, 100) \
237 *valp_ = res_; \
238 PARSE_INT(&res_, 10) \
239 *valp_ += res_; \
240 PARSE_INT(&res_, 1) \
241 *valp_ += res_; \
242 } while (0)
243
244/* returned pointer is always within [buf, buf_end), or null */
245static const char *parse_http_version(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *ret)
246{
247 /* we want at least [HTTP/1.<two chars>] to try to parse */
248 if (buf_end - buf < 9) {
249 *ret = -2;
250 return NULL;
251 }
252 EXPECT_CHAR_NO_CHECK('H');
253 EXPECT_CHAR_NO_CHECK('T');
254 EXPECT_CHAR_NO_CHECK('T');
255 EXPECT_CHAR_NO_CHECK('P');
256 EXPECT_CHAR_NO_CHECK('/');
257 PARSE_INT(major_version, 1);
258 if (*major_version == 1) {
259 EXPECT_CHAR_NO_CHECK('.');
260 PARSE_INT(minor_version, 1);
261 } else {
262 *minor_version = 0;
263 }
264 return buf;
265}
266
267static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
268 size_t max_headers, int *ret)
269{
270 for (;; ++*num_headers) {
271 CHECK_EOF();
272 if (*buf == '\015') {
273 ++buf;
274 EXPECT_CHAR('\012');
275 break;
276 } else if (*buf == '\012') {
277 ++buf;
278 break;
279 }
280 if (*num_headers == max_headers) {
281 *ret = -1;
282 return NULL;
283 }
284 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
285 /* parsing name, but do not discard SP before colon, see
286 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
287 headers[*num_headers].name = buf;
288 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
289 "\"\"" /* 0x22 */
290 "()" /* 0x28,0x29 */
291 ",," /* 0x2c */
292 "//" /* 0x2f */
293 ":@" /* 0x3a-0x40 */
294 "[]" /* 0x5b-0x5d */
295 "{\377"; /* 0x7b-0xff */
296 int found;
297 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
298 if (!found) {
299 CHECK_EOF();
300 }
301 while (1) {
302 if (*buf == ':') {
303 break;
304 } else if (!token_char_map[(unsigned char)*buf]) {
305 *ret = -1;
306 return NULL;
307 }
308 ++buf;
309 CHECK_EOF();
310 }
311 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
312 *ret = -1;
313 return NULL;
314 }
315 ++buf;
316 for (;; ++buf) {
317 CHECK_EOF();
318 if (!(*buf == ' ' || *buf == '\t')) {
319 break;
320 }
321 }
322 } else {
323 headers[*num_headers].name = NULL;
324 headers[*num_headers].name_len = 0;
325 }
326 const char *value;
327 size_t value_len;
328 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
329 return NULL;
330 }
331 /* remove trailing SPs and HTABs */
332 const char *value_end = value + value_len;
333 for (; value_end != value; --value_end) {
334 const char c = *(value_end - 1);
335 if (!(c == ' ' || c == '\t')) {
336 break;
337 }
338 }
339 headers[*num_headers].value = value;
340 headers[*num_headers].value_len = value_end - value;
341 }
342 return buf;
343}
344
345static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
346 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers,
347 size_t max_headers, int *ret)
348{
349 /* skip first empty line (some clients add CRLF after POST content) */
350 CHECK_EOF();
351 if (*buf == '\015') {
352 ++buf;
353 EXPECT_CHAR('\012');
354 } else if (*buf == '\012') {
355 ++buf;
356 }
357
358 /* parse request line */
359 ADVANCE_TOKEN(*method, *method_len);
360 do {
361 ++buf;
362 } while (*buf == ' ');
363 ADVANCE_TOKEN(*path, *path_len);
364 do {
365 ++buf;
366 } while (*buf == ' ');
367 if (*method_len == 0 || *path_len == 0) {
368 *ret = -1;
369 return NULL;
370 }
371 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
372 return NULL;
373 }
374 if (*buf == '\015') {
375 ++buf;
376 EXPECT_CHAR('\012');
377 } else if (*buf == '\012') {
378 ++buf;
379 } else {
380 *ret = -1;
381 return NULL;
382 }
383
384 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
385}
386
387int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
388 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
389{
390 const char *buf = buf_start, *buf_end = buf_start + len;
391 size_t max_headers = *num_headers;
392 int r;
393
394 *method = NULL;
395 *method_len = 0;
396 *path = NULL;
397 *path_len = 0;
398 *major_version = -1;
399 *minor_version = -1;
400 *num_headers = 0;
401
402 /* if last_len != 0, check if the request is complete (a fast countermeasure
403 against slowloris */
404 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
405 return r;
406 }
407
408 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers, max_headers,
409 &r)) == NULL) {
410 return r;
411 }
412
413 return (int)(buf - buf_start);
414}
415
416static const char *parse_response(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *status, const char **msg,
417 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
418{
419 /* parse "HTTP/1.x" */
420 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
421 return NULL;
422 }
423 /* skip space */
424 if (*buf != ' ') {
425 *ret = -1;
426 return NULL;
427 }
428 do {
429 ++buf;
430 } while (*buf == ' ');
431 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
432 if (buf_end - buf < 4) {
433 *ret = -2;
434 return NULL;
435 }
436 PARSE_INT_3(status);
437
438 /* get message including preceding space */
439 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
440 return NULL;
441 }
442 if (*msg_len == 0) {
443 /* ok */
444 } else if (**msg == ' ') {
445 /* remove preceding space */
446 do {
447 ++*msg;
448 --*msg_len;
449 } while (**msg == ' ');
450 } else {
451 /* garbage found after status code */
452 *ret = -1;
453 return NULL;
454 }
455
456 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
457}
458
459int phr_parse_response(const char *buf_start, size_t len, int *major_version, int *minor_version, int *status, const char **msg, size_t *msg_len,
460 struct phr_header *headers, size_t *num_headers, size_t last_len)
461{
462 const char *buf = buf_start, *buf_end = buf + len;
463 size_t max_headers = *num_headers;
464 int r;
465
466 *major_version = -1;
467 *minor_version = -1;
468 *status = 0;
469 *msg = NULL;
470 *msg_len = 0;
471 *num_headers = 0;
472
473 /* if last_len != 0, check if the response is complete (a fast countermeasure
474 against slowloris */
475 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
476 return r;
477 }
478
479 if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
480 return r;
481 }
482
483 return (int)(buf - buf_start);
484}
485
486int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
487{
488 const char *buf = buf_start, *buf_end = buf + len;
489 size_t max_headers = *num_headers;
490 int r;
491
492 *num_headers = 0;
493
494 /* if last_len != 0, check if the response is complete (a fast countermeasure
495 against slowloris */
496 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
497 return r;
498 }
499
500 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
501 return r;
502 }
503
504 return (int)(buf - buf_start);
505}
506
507enum {
508 CHUNKED_IN_CHUNK_SIZE,
509 CHUNKED_IN_CHUNK_EXT,
510 CHUNKED_IN_CHUNK_DATA,
511 CHUNKED_IN_CHUNK_CRLF,
512 CHUNKED_IN_TRAILERS_LINE_HEAD,
513 CHUNKED_IN_TRAILERS_LINE_MIDDLE
514};
515
516static int decode_hex(int ch)
517{
518 if ('0' <= ch && ch <= '9') {
519 return ch - '0';
520 } else if ('A' <= ch && ch <= 'F') {
521 return ch - 'A' + 0xa;
522 } else if ('a' <= ch && ch <= 'f') {
523 return ch - 'a' + 0xa;
524 } else {
525 return -1;
526 }
527}
528
529ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
530{
531 size_t dst = 0, src = 0, bufsz = *_bufsz;
532 ssize_t ret = -2; /* incomplete */
533
534 while (1) {
535 switch (decoder->_state) {
536 case CHUNKED_IN_CHUNK_SIZE:
537 for (;; ++src) {
538 int v;
539 if (src == bufsz)
540 goto Exit;
541 if ((v = decode_hex(buf[src])) == -1) {
542 if (decoder->_hex_count == 0) {
543 ret = -1;
544 goto Exit;
545 }
546 break;
547 }
548 if (decoder->_hex_count == sizeof(size_t) * 2) {
549 ret = -1;
550 goto Exit;
551 }
552 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
553 ++decoder->_hex_count;
554 }
555 decoder->_hex_count = 0;
556 decoder->_state = CHUNKED_IN_CHUNK_EXT;
557 /* fallthru */
558 case CHUNKED_IN_CHUNK_EXT:
559 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
560 for (;; ++src) {
561 if (src == bufsz)
562 goto Exit;
563 if (buf[src] == '\012')
564 break;
565 }
566 ++src;
567 if (decoder->bytes_left_in_chunk == 0) {
568 if (decoder->consume_trailer) {
569 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
570 break;
571 } else {
572 goto Complete;
573 }
574 }
575 decoder->_state = CHUNKED_IN_CHUNK_DATA;
576 /* fallthru */
577 case CHUNKED_IN_CHUNK_DATA: {
578 size_t avail = bufsz - src;
579 if (avail < decoder->bytes_left_in_chunk) {
580 if (dst != src)
581 memmove(buf + dst, buf + src, avail);
582 src += avail;
583 dst += avail;
584 decoder->bytes_left_in_chunk -= avail;
585 goto Exit;
586 }
587 if (dst != src)
588 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
589 src += decoder->bytes_left_in_chunk;
590 dst += decoder->bytes_left_in_chunk;
591 decoder->bytes_left_in_chunk = 0;
592 decoder->_state = CHUNKED_IN_CHUNK_CRLF;
593 }
594 /* fallthru */
595 case CHUNKED_IN_CHUNK_CRLF:
596 for (;; ++src) {
597 if (src == bufsz)
598 goto Exit;
599 if (buf[src] != '\015')
600 break;
601 }
602 if (buf[src] != '\012') {
603 ret = -1;
604 goto Exit;
605 }
606 ++src;
607 decoder->_state = CHUNKED_IN_CHUNK_SIZE;
608 break;
609 case CHUNKED_IN_TRAILERS_LINE_HEAD:
610 for (;; ++src) {
611 if (src == bufsz)
612 goto Exit;
613 if (buf[src] != '\015')
614 break;
615 }
616 if (buf[src++] == '\012')
617 goto Complete;
618 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
619 /* fallthru */
620 case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
621 for (;; ++src) {
622 if (src == bufsz)
623 goto Exit;
624 if (buf[src] == '\012')
625 break;
626 }
627 ++src;
628 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
629 break;
630 default:
631 assert(!"decoder is corrupt");
632 }
633 }
634
635Complete:
636 ret = bufsz - src;
637Exit:
638 if (dst != src)
639 memmove(buf + dst, buf + src, bufsz - src);
640 *_bufsz = dst;
641 return ret;
642}
643
644int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
645{
646 return decoder->_state == CHUNKED_IN_CHUNK_DATA;
647}
648
649#undef CHECK_EOF
650#undef EXPECT_CHAR
651#undef ADVANCE_TOKEN