summaryrefslogtreecommitdiffstats
path: root/plugins/picohttpparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/picohttpparser.c')
-rw-r--r--plugins/picohttpparser.c620
1 files changed, 620 insertions, 0 deletions
diff --git a/plugins/picohttpparser.c b/plugins/picohttpparser.c
new file mode 100644
index 00000000..6a2d872d
--- /dev/null
+++ b/plugins/picohttpparser.c
@@ -0,0 +1,620 @@
1/*
2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3 * Shigeo Mitsunari
4 *
5 * The software is licensed under either the MIT License (below) or the Perl
6 * license.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#include <assert.h>
28#include <stddef.h>
29#include <string.h>
30#ifdef __SSE4_2__
31#ifdef _MSC_VER
32#include <nmmintrin.h>
33#else
34#include <x86intrin.h>
35#endif
36#endif
37#include "picohttpparser.h"
38
39/* $Id: a707070d11d499609f99d09f97535642cec910a8 $ */
40
41#if __GNUC__ >= 3
42#define likely(x) __builtin_expect(!!(x), 1)
43#define unlikely(x) __builtin_expect(!!(x), 0)
44#else
45#define likely(x) (x)
46#define unlikely(x) (x)
47#endif
48
49#ifdef _MSC_VER
50#define ALIGNED(n) _declspec(align(n))
51#else
52#define ALIGNED(n) __attribute__((aligned(n)))
53#endif
54
55#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56
57#define CHECK_EOF() \
58 if (buf == buf_end) { \
59 *ret = -2; \
60 return NULL; \
61 }
62
63#define EXPECT_CHAR_NO_CHECK(ch) \
64 if (*buf++ != ch) { \
65 *ret = -1; \
66 return NULL; \
67 }
68
69#define EXPECT_CHAR(ch) \
70 CHECK_EOF(); \
71 EXPECT_CHAR_NO_CHECK(ch);
72
73#define ADVANCE_TOKEN(tok, toklen) \
74 do { \
75 const char *tok_start = buf; \
76 static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
77 int found2; \
78 buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
79 if (!found2) { \
80 CHECK_EOF(); \
81 } \
82 while (1) { \
83 if (*buf == ' ') { \
84 break; \
85 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
86 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
87 *ret = -1; \
88 return NULL; \
89 } \
90 } \
91 ++buf; \
92 CHECK_EOF(); \
93 } \
94 tok = tok_start; \
95 toklen = buf - tok_start; \
96 } while (0)
97
98static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
99 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
100 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
101 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
104 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
105 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
106
107static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
108{
109 *found = 0;
110#if __SSE4_2__
111 if (likely(buf_end - buf >= 16)) {
112 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
113
114 size_t left = (buf_end - buf) & ~15;
115 do {
116 __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
117 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
118 if (unlikely(r != 16)) {
119 buf += r;
120 *found = 1;
121 break;
122 }
123 buf += 16;
124 left -= 16;
125 } while (likely(left != 0));
126 }
127#else
128 /* suppress unused parameter warning */
129 (void)buf_end;
130 (void)ranges;
131 (void)ranges_size;
132#endif
133 return buf;
134}
135
136static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
137{
138 const char *token_start = buf;
139
140#ifdef __SSE4_2__
141 static const char ranges1[] = "\0\010"
142 /* allow HT */
143 "\012\037"
144 /* allow SP and up to but not including DEL */
145 "\177\177"
146 /* allow chars w. MSB set */
147 ;
148 int found;
149 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
150 if (found)
151 goto FOUND_CTL;
152#else
153 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
154 while (likely(buf_end - buf >= 8)) {
155#define DOIT() \
156 do { \
157 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
158 goto NonPrintable; \
159 ++buf; \
160 } while (0)
161 DOIT();
162 DOIT();
163 DOIT();
164 DOIT();
165 DOIT();
166 DOIT();
167 DOIT();
168 DOIT();
169#undef DOIT
170 continue;
171 NonPrintable:
172 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
173 goto FOUND_CTL;
174 }
175 ++buf;
176 }
177#endif
178 for (;; ++buf) {
179 CHECK_EOF();
180 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
181 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
182 goto FOUND_CTL;
183 }
184 }
185 }
186FOUND_CTL:
187 if (likely(*buf == '\015')) {
188 ++buf;
189 EXPECT_CHAR('\012');
190 *token_len = buf - 2 - token_start;
191 } else if (*buf == '\012') {
192 *token_len = buf - token_start;
193 ++buf;
194 } else {
195 *ret = -1;
196 return NULL;
197 }
198 *token = token_start;
199
200 return buf;
201}
202
203static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
204{
205 int ret_cnt = 0;
206 buf = last_len < 3 ? buf : buf + last_len - 3;
207
208 while (1) {
209 CHECK_EOF();
210 if (*buf == '\015') {
211 ++buf;
212 CHECK_EOF();
213 EXPECT_CHAR('\012');
214 ++ret_cnt;
215 } else if (*buf == '\012') {
216 ++buf;
217 ++ret_cnt;
218 } else {
219 ++buf;
220 ret_cnt = 0;
221 }
222 if (ret_cnt == 2) {
223 return buf;
224 }
225 }
226
227 *ret = -2;
228 return NULL;
229}
230
231#define PARSE_INT(valp_, mul_) \
232 if (*buf < '0' || '9' < *buf) { \
233 buf++; \
234 *ret = -1; \
235 return NULL; \
236 } \
237 *(valp_) = (mul_) * (*buf++ - '0');
238
239#define PARSE_INT_3(valp_) \
240 do { \
241 int res_ = 0; \
242 PARSE_INT(&res_, 100) \
243 *valp_ = res_; \
244 PARSE_INT(&res_, 10) \
245 *valp_ += res_; \
246 PARSE_INT(&res_, 1) \
247 *valp_ += res_; \
248 } while (0)
249
250/* returned pointer is always within [buf, buf_end), or null */
251static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
252{
253 /* we want at least [HTTP/1.<two chars>] to try to parse */
254 if (buf_end - buf < 9) {
255 *ret = -2;
256 return NULL;
257 }
258 EXPECT_CHAR_NO_CHECK('H');
259 EXPECT_CHAR_NO_CHECK('T');
260 EXPECT_CHAR_NO_CHECK('T');
261 EXPECT_CHAR_NO_CHECK('P');
262 EXPECT_CHAR_NO_CHECK('/');
263 EXPECT_CHAR_NO_CHECK('1');
264 EXPECT_CHAR_NO_CHECK('.');
265 PARSE_INT(minor_version, 1);
266 return buf;
267}
268
269static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
270 size_t max_headers, int *ret)
271{
272 for (;; ++*num_headers) {
273 CHECK_EOF();
274 if (*buf == '\015') {
275 ++buf;
276 EXPECT_CHAR('\012');
277 break;
278 } else if (*buf == '\012') {
279 ++buf;
280 break;
281 }
282 if (*num_headers == max_headers) {
283 *ret = -1;
284 return NULL;
285 }
286 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
287 /* parsing name, but do not discard SP before colon, see
288 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
289 headers[*num_headers].name = buf;
290 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
291 "\"\"" /* 0x22 */
292 "()" /* 0x28,0x29 */
293 ",," /* 0x2c */
294 "//" /* 0x2f */
295 ":@" /* 0x3a-0x40 */
296 "[]" /* 0x5b-0x5d */
297 "{\377"; /* 0x7b-0xff */
298 int found;
299 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
300 if (!found) {
301 CHECK_EOF();
302 }
303 while (1) {
304 if (*buf == ':') {
305 break;
306 } else if (!token_char_map[(unsigned char)*buf]) {
307 *ret = -1;
308 return NULL;
309 }
310 ++buf;
311 CHECK_EOF();
312 }
313 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
314 *ret = -1;
315 return NULL;
316 }
317 ++buf;
318 for (;; ++buf) {
319 CHECK_EOF();
320 if (!(*buf == ' ' || *buf == '\t')) {
321 break;
322 }
323 }
324 } else {
325 headers[*num_headers].name = NULL;
326 headers[*num_headers].name_len = 0;
327 }
328 if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
329 return NULL;
330 }
331 }
332 return buf;
333}
334
335static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
336 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
337 size_t max_headers, int *ret)
338{
339 /* skip first empty line (some clients add CRLF after POST content) */
340 CHECK_EOF();
341 if (*buf == '\015') {
342 ++buf;
343 EXPECT_CHAR('\012');
344 } else if (*buf == '\012') {
345 ++buf;
346 }
347
348 /* parse request line */
349 ADVANCE_TOKEN(*method, *method_len);
350 ++buf;
351 ADVANCE_TOKEN(*path, *path_len);
352 ++buf;
353 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
354 return NULL;
355 }
356 if (*buf == '\015') {
357 ++buf;
358 EXPECT_CHAR('\012');
359 } else if (*buf == '\012') {
360 ++buf;
361 } else {
362 *ret = -1;
363 return NULL;
364 }
365
366 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
367}
368
369int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
370 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
371{
372 const char *buf = buf_start, *buf_end = buf_start + len;
373 size_t max_headers = *num_headers;
374 int r;
375
376 *method = NULL;
377 *method_len = 0;
378 *path = NULL;
379 *path_len = 0;
380 *minor_version = -1;
381 *num_headers = 0;
382
383 /* if last_len != 0, check if the request is complete (a fast countermeasure
384 againt slowloris */
385 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
386 return r;
387 }
388
389 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
390 &r)) == NULL) {
391 return r;
392 }
393
394 return (int)(buf - buf_start);
395}
396
397static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
398 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
399{
400 /* parse "HTTP/1.x" */
401 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
402 return NULL;
403 }
404 /* skip space */
405 if (*buf++ != ' ') {
406 *ret = -1;
407 return NULL;
408 }
409 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
410 if (buf_end - buf < 4) {
411 *ret = -2;
412 return NULL;
413 }
414 PARSE_INT_3(status);
415
416 /* skip space */
417 if (*buf++ != ' ') {
418 *ret = -1;
419 return NULL;
420 }
421 /* get message */
422 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
423 return NULL;
424 }
425
426 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
427}
428
429int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
430 struct phr_header *headers, size_t *num_headers, size_t last_len)
431{
432 const char *buf = buf_start, *buf_end = buf + len;
433 size_t max_headers = *num_headers;
434 int r;
435
436 *minor_version = -1;
437 *status = 0;
438 *msg = NULL;
439 *msg_len = 0;
440 *num_headers = 0;
441
442 /* if last_len != 0, check if the response is complete (a fast countermeasure
443 against slowloris */
444 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
445 return r;
446 }
447
448 if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
449 return r;
450 }
451
452 return (int)(buf - buf_start);
453}
454
455int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
456{
457 const char *buf = buf_start, *buf_end = buf + len;
458 size_t max_headers = *num_headers;
459 int r;
460
461 *num_headers = 0;
462
463 /* if last_len != 0, check if the response is complete (a fast countermeasure
464 against slowloris */
465 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
466 return r;
467 }
468
469 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
470 return r;
471 }
472
473 return (int)(buf - buf_start);
474}
475
476enum {
477 CHUNKED_IN_CHUNK_SIZE,
478 CHUNKED_IN_CHUNK_EXT,
479 CHUNKED_IN_CHUNK_DATA,
480 CHUNKED_IN_CHUNK_CRLF,
481 CHUNKED_IN_TRAILERS_LINE_HEAD,
482 CHUNKED_IN_TRAILERS_LINE_MIDDLE
483};
484
485static int decode_hex(int ch)
486{
487 if ('0' <= ch && ch <= '9') {
488 return ch - '0';
489 } else if ('A' <= ch && ch <= 'F') {
490 return ch - 'A' + 0xa;
491 } else if ('a' <= ch && ch <= 'f') {
492 return ch - 'a' + 0xa;
493 } else {
494 return -1;
495 }
496}
497
498ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
499{
500 size_t dst = 0, src = 0, bufsz = *_bufsz;
501 ssize_t ret = -2; /* incomplete */
502
503 while (1) {
504 switch (decoder->_state) {
505 case CHUNKED_IN_CHUNK_SIZE:
506 for (;; ++src) {
507 int v;
508 if (src == bufsz)
509 goto Exit;
510 if ((v = decode_hex(buf[src])) == -1) {
511 if (decoder->_hex_count == 0) {
512 ret = -1;
513 goto Exit;
514 }
515 break;
516 }
517 if (decoder->_hex_count == sizeof(size_t) * 2) {
518 ret = -1;
519 goto Exit;
520 }
521 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
522 ++decoder->_hex_count;
523 }
524 decoder->_hex_count = 0;
525 decoder->_state = CHUNKED_IN_CHUNK_EXT;
526 /* fallthru */
527 case CHUNKED_IN_CHUNK_EXT:
528 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
529 for (;; ++src) {
530 if (src == bufsz)
531 goto Exit;
532 if (buf[src] == '\012')
533 break;
534 }
535 ++src;
536 if (decoder->bytes_left_in_chunk == 0) {
537 if (decoder->consume_trailer) {
538 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
539 break;
540 } else {
541 goto Complete;
542 }
543 }
544 decoder->_state = CHUNKED_IN_CHUNK_DATA;
545 /* fallthru */
546 case CHUNKED_IN_CHUNK_DATA: {
547 size_t avail = bufsz - src;
548 if (avail < decoder->bytes_left_in_chunk) {
549 if (dst != src)
550 memmove(buf + dst, buf + src, avail);
551 src += avail;
552 dst += avail;
553 decoder->bytes_left_in_chunk -= avail;
554 goto Exit;
555 }
556 if (dst != src)
557 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
558 src += decoder->bytes_left_in_chunk;
559 dst += decoder->bytes_left_in_chunk;
560 decoder->bytes_left_in_chunk = 0;
561 decoder->_state = CHUNKED_IN_CHUNK_CRLF;
562 }
563 /* fallthru */
564 case CHUNKED_IN_CHUNK_CRLF:
565 for (;; ++src) {
566 if (src == bufsz)
567 goto Exit;
568 if (buf[src] != '\015')
569 break;
570 }
571 if (buf[src] != '\012') {
572 ret = -1;
573 goto Exit;
574 }
575 ++src;
576 decoder->_state = CHUNKED_IN_CHUNK_SIZE;
577 break;
578 case CHUNKED_IN_TRAILERS_LINE_HEAD:
579 for (;; ++src) {
580 if (src == bufsz)
581 goto Exit;
582 if (buf[src] != '\015')
583 break;
584 }
585 if (buf[src++] == '\012')
586 goto Complete;
587 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
588 /* fallthru */
589 case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
590 for (;; ++src) {
591 if (src == bufsz)
592 goto Exit;
593 if (buf[src] == '\012')
594 break;
595 }
596 ++src;
597 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
598 break;
599 default:
600 assert(!"decoder is corrupt");
601 }
602 }
603
604Complete:
605 ret = bufsz - src;
606Exit:
607 if (dst != src)
608 memmove(buf + dst, buf + src, bufsz - src);
609 *_bufsz = dst;
610 return ret;
611}
612
613int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
614{
615 return decoder->_state == CHUNKED_IN_CHUNK_DATA;
616}
617
618#undef CHECK_EOF
619#undef EXPECT_CHAR
620#undef ADVANCE_TOKEN