summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--plugins/picohttpparser/picohttpparser.c1101
1 files changed, 544 insertions, 557 deletions
diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c
index d0bfac62..2ae92d66 100644
--- a/plugins/picohttpparser/picohttpparser.c
+++ b/plugins/picohttpparser/picohttpparser.c
@@ -28,623 +28,610 @@
28#include <stddef.h> 28#include <stddef.h>
29#include <string.h> 29#include <string.h>
30#ifdef __SSE4_2__ 30#ifdef __SSE4_2__
31#ifdef _MSC_VER 31# ifdef _MSC_VER
32#include <nmmintrin.h> 32# include <nmmintrin.h>
33#else 33# else
34#include <x86intrin.h> 34# include <x86intrin.h>
35#endif 35# endif
36#endif 36#endif
37#include "picohttpparser.h" 37#include "picohttpparser.h"
38 38
39#if __GNUC__ >= 3 39#if __GNUC__ >= 3
40#define likely(x) __builtin_expect(!!(x), 1) 40# define likely(x) __builtin_expect(!!(x), 1)
41#define unlikely(x) __builtin_expect(!!(x), 0) 41# define unlikely(x) __builtin_expect(!!(x), 0)
42#else 42#else
43#define likely(x) (x) 43# define likely(x) (x)
44#define unlikely(x) (x) 44# define unlikely(x) (x)
45#endif 45#endif
46 46
47#ifdef _MSC_VER 47#ifdef _MSC_VER
48#define ALIGNED(n) _declspec(align(n)) 48# define ALIGNED(n) _declspec(align(n))
49#else 49#else
50#define ALIGNED(n) __attribute__((aligned(n))) 50# define ALIGNED(n) __attribute__((aligned(n)))
51#endif 51#endif
52 52
53#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) 53#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54 54
55#define CHECK_EOF() \ 55#define CHECK_EOF() \
56 if (buf == buf_end) { \ 56 if (buf == buf_end) { \
57 *ret = -2; \ 57 *ret = -2; \
58 return NULL; \ 58 return NULL; \
59 } 59 }
60 60
61#define EXPECT_CHAR_NO_CHECK(ch) \ 61#define EXPECT_CHAR_NO_CHECK(ch) \
62 if (*buf++ != ch) { \ 62 if (*buf++ != ch) { \
63 *ret = -1; \ 63 *ret = -1; \
64 return NULL; \ 64 return NULL; \
65 } 65 }
66 66
67#define EXPECT_CHAR(ch) \ 67#define EXPECT_CHAR(ch) \
68 CHECK_EOF(); \ 68 CHECK_EOF(); \
69 EXPECT_CHAR_NO_CHECK(ch); 69 EXPECT_CHAR_NO_CHECK(ch);
70 70
71#define ADVANCE_TOKEN(tok, toklen) \ 71#define ADVANCE_TOKEN(tok, toklen) \
72 do { \ 72 do { \
73 const char *tok_start = buf; \ 73 const char *tok_start = buf; \
74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \ 74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
75 int found2; \ 75 int found2; \
76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \ 76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
77 if (!found2) { \ 77 if (!found2) { \
78 CHECK_EOF(); \ 78 CHECK_EOF(); \
79 } \ 79 } \
80 while (1) { \ 80 while (1) { \
81 if (*buf == ' ') { \ 81 if (*buf == ' ') { \
82 break; \ 82 break; \
83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ 83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \ 84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
85 *ret = -1; \ 85 *ret = -1; \
86 return NULL; \ 86 return NULL; \
87 } \ 87 } \
88 } \ 88 } \
89 ++buf; \ 89 ++buf; \
90 CHECK_EOF(); \ 90 CHECK_EOF(); \
91 } \ 91 } \
92 tok = tok_start; \ 92 tok = tok_start; \
93 toklen = buf - tok_start; \ 93 toklen = buf - tok_start; \
94 } while (0) 94 } while (0)
95 95
96static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 96static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" 97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" 98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" 99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104 104
105static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) 105static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) {
106{ 106 *found = 0;
107 *found = 0;
108#if __SSE4_2__ 107#if __SSE4_2__
109 if (likely(buf_end - buf >= 16)) { 108 if (likely(buf_end - buf >= 16)) {
110 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); 109 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111 110
112 size_t left = (buf_end - buf) & ~15; 111 size_t left = (buf_end - buf) & ~15;
113 do { 112 do {
114 __m128i b16 = _mm_loadu_si128((const __m128i *)buf); 113 __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 114 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116 if (unlikely(r != 16)) { 115 if (unlikely(r != 16)) {
117 buf += r; 116 buf += r;
118 *found = 1; 117 *found = 1;
119 break; 118 break;
120 } 119 }
121 buf += 16; 120 buf += 16;
122 left -= 16; 121 left -= 16;
123 } while (likely(left != 0)); 122 } while (likely(left != 0));
124 } 123 }
125#else 124#else
126 /* suppress unused parameter warning */ 125 /* suppress unused parameter warning */
127 (void)buf_end; 126 (void)buf_end;
128 (void)ranges; 127 (void)ranges;
129 (void)ranges_size; 128 (void)ranges_size;
130#endif 129#endif
131 return buf; 130 return buf;
132} 131}
133 132
134static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) 133static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) {
135{ 134 const char *token_start = buf;
136 const char *token_start = buf;
137 135
138#ifdef __SSE4_2__ 136#ifdef __SSE4_2__
139 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */ 137 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
140 "\012\037" /* allow SP and up to but not including DEL */ 138 "\012\037" /* allow SP and up to but not including DEL */
141 "\177\177"; /* allow chars w. MSB set */ 139 "\177\177"; /* allow chars w. MSB set */
142 int found; 140 int found;
143 buf = findchar_fast(buf, buf_end, ranges1, 6, &found); 141 buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144 if (found) 142 if (found)
145 goto FOUND_CTL; 143 goto FOUND_CTL;
146#else 144#else
147 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ 145 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148 while (likely(buf_end - buf >= 8)) { 146 while (likely(buf_end - buf >= 8)) {
149#define DOIT() \ 147# define DOIT() \
150 do { \ 148 do { \
151 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ 149 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
152 goto NonPrintable; \ 150 goto NonPrintable; \
153 ++buf; \ 151 ++buf; \
154 } while (0) 152 } while (0)
155 DOIT(); 153 DOIT();
156 DOIT(); 154 DOIT();
157 DOIT(); 155 DOIT();
158 DOIT(); 156 DOIT();
159 DOIT(); 157 DOIT();
160 DOIT(); 158 DOIT();
161 DOIT(); 159 DOIT();
162 DOIT(); 160 DOIT();
163#undef DOIT 161# undef DOIT
164 continue; 162 continue;
165 NonPrintable: 163 NonPrintable:
166 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 164 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167 goto FOUND_CTL; 165 goto FOUND_CTL;
168 } 166 }
169 ++buf; 167 ++buf;
170 } 168 }
171#endif 169#endif
172 for (;; ++buf) { 170 for (;; ++buf) {
173 CHECK_EOF(); 171 CHECK_EOF();
174 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { 172 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 173 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176 goto FOUND_CTL; 174 goto FOUND_CTL;
177 } 175 }
178 } 176 }
179 } 177 }
180FOUND_CTL: 178FOUND_CTL:
181 if (likely(*buf == '\015')) { 179 if (likely(*buf == '\015')) {
182 ++buf; 180 ++buf;
183 EXPECT_CHAR('\012'); 181 EXPECT_CHAR('\012');
184 *token_len = buf - 2 - token_start; 182 *token_len = buf - 2 - token_start;
185 } else if (*buf == '\012') { 183 } else if (*buf == '\012') {
186 *token_len = buf - token_start; 184 *token_len = buf - token_start;
187 ++buf; 185 ++buf;
188 } else { 186 } else {
189 *ret = -1; 187 *ret = -1;
190 return NULL; 188 return NULL;
191 } 189 }
192 *token = token_start; 190 *token = token_start;
193 191
194 return buf; 192 return buf;
195} 193}
196 194
197static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) 195static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) {
198{ 196 int ret_cnt = 0;
199 int ret_cnt = 0; 197 buf = last_len < 3 ? buf : buf + last_len - 3;
200 buf = last_len < 3 ? buf : buf + last_len - 3; 198
201 199 while (1) {
202 while (1) { 200 CHECK_EOF();
203 CHECK_EOF(); 201 if (*buf == '\015') {
204 if (*buf == '\015') { 202 ++buf;
205 ++buf; 203 CHECK_EOF();
206 CHECK_EOF(); 204 EXPECT_CHAR('\012');
207 EXPECT_CHAR('\012'); 205 ++ret_cnt;
208 ++ret_cnt; 206 } else if (*buf == '\012') {
209 } else if (*buf == '\012') { 207 ++buf;
210 ++buf; 208 ++ret_cnt;
211 ++ret_cnt; 209 } else {
212 } else { 210 ++buf;
213 ++buf; 211 ret_cnt = 0;
214 ret_cnt = 0; 212 }
215 } 213 if (ret_cnt == 2) {
216 if (ret_cnt == 2) { 214 return buf;
217 return buf; 215 }
218 } 216 }
219 } 217
220 218 *ret = -2;
221 *ret = -2; 219 return NULL;
222 return NULL;
223} 220}
224 221
225#define PARSE_INT(valp_, mul_) \ 222#define PARSE_INT(valp_, mul_) \
226 if (*buf < '0' || '9' < *buf) { \ 223 if (*buf < '0' || '9' < *buf) { \
227 buf++; \ 224 buf++; \
228 *ret = -1; \ 225 *ret = -1; \
229 return NULL; \ 226 return NULL; \
230 } \ 227 } \
231 *(valp_) = (mul_) * (*buf++ - '0'); 228 *(valp_) = (mul_) * (*buf++ - '0');
232 229
233#define PARSE_INT_3(valp_) \ 230#define PARSE_INT_3(valp_) \
234 do { \ 231 do { \
235 int res_ = 0; \ 232 int res_ = 0; \
236 PARSE_INT(&res_, 100) \ 233 PARSE_INT(&res_, 100) \
237 *valp_ = res_; \ 234 *valp_ = res_; \
238 PARSE_INT(&res_, 10) \ 235 PARSE_INT(&res_, 10) \
239 *valp_ += res_; \ 236 *valp_ += res_; \
240 PARSE_INT(&res_, 1) \ 237 PARSE_INT(&res_, 1) \
241 *valp_ += res_; \ 238 *valp_ += res_; \
242 } while (0) 239 } while (0)
243 240
244/* returned pointer is always within [buf, buf_end), or null */ 241/* returned pointer is always within [buf, buf_end), or null */
245static const char *parse_http_version(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *ret) 242static const char *parse_http_version(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *ret) {
246{ 243 /* we want at least [HTTP/1.<two chars>] to try to parse */
247 /* we want at least [HTTP/1.<two chars>] to try to parse */ 244 if (buf_end - buf < 9) {
248 if (buf_end - buf < 9) { 245 *ret = -2;
249 *ret = -2; 246 return NULL;
250 return NULL; 247 }
251 } 248 EXPECT_CHAR_NO_CHECK('H');
252 EXPECT_CHAR_NO_CHECK('H'); 249 EXPECT_CHAR_NO_CHECK('T');
253 EXPECT_CHAR_NO_CHECK('T'); 250 EXPECT_CHAR_NO_CHECK('T');
254 EXPECT_CHAR_NO_CHECK('T'); 251 EXPECT_CHAR_NO_CHECK('P');
255 EXPECT_CHAR_NO_CHECK('P'); 252 EXPECT_CHAR_NO_CHECK('/');
256 EXPECT_CHAR_NO_CHECK('/'); 253 PARSE_INT(major_version, 1);
257 PARSE_INT(major_version, 1); 254 if (*major_version == 1) {
258 if (*major_version == 1) { 255 EXPECT_CHAR_NO_CHECK('.');
259 EXPECT_CHAR_NO_CHECK('.'); 256 PARSE_INT(minor_version, 1);
260 PARSE_INT(minor_version, 1); 257 } else {
261 } else { 258 *minor_version = 0;
262 *minor_version = 0; 259 }
263 } 260 return buf;
264 return buf;
265} 261}
266 262
267static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, 263static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, size_t max_headers,
268 size_t max_headers, int *ret) 264 int *ret) {
269{ 265 for (;; ++*num_headers) {
270 for (;; ++*num_headers) { 266 CHECK_EOF();
271 CHECK_EOF(); 267 if (*buf == '\015') {
272 if (*buf == '\015') { 268 ++buf;
273 ++buf; 269 EXPECT_CHAR('\012');
274 EXPECT_CHAR('\012'); 270 break;
275 break; 271 } else if (*buf == '\012') {
276 } else if (*buf == '\012') { 272 ++buf;
277 ++buf; 273 break;
278 break; 274 }
279 } 275 if (*num_headers == max_headers) {
280 if (*num_headers == max_headers) { 276 *ret = -1;
281 *ret = -1; 277 return NULL;
282 return NULL; 278 }
283 } 279 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
284 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { 280 /* parsing name, but do not discard SP before colon, see
285 /* parsing name, but do not discard SP before colon, see 281 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
286 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ 282 headers[*num_headers].name = buf;
287 headers[*num_headers].name = buf; 283 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
288 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */ 284 "\"\"" /* 0x22 */
289 "\"\"" /* 0x22 */ 285 "()" /* 0x28,0x29 */
290 "()" /* 0x28,0x29 */ 286 ",," /* 0x2c */
291 ",," /* 0x2c */ 287 "//" /* 0x2f */
292 "//" /* 0x2f */ 288 ":@" /* 0x3a-0x40 */
293 ":@" /* 0x3a-0x40 */ 289 "[]" /* 0x5b-0x5d */
294 "[]" /* 0x5b-0x5d */ 290 "{\377"; /* 0x7b-0xff */
295 "{\377"; /* 0x7b-0xff */ 291 int found;
296 int found; 292 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
297 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); 293 if (!found) {
298 if (!found) { 294 CHECK_EOF();
299 CHECK_EOF(); 295 }
300 } 296 while (1) {
301 while (1) { 297 if (*buf == ':') {
302 if (*buf == ':') { 298 break;
303 break; 299 } else if (!token_char_map[(unsigned char)*buf]) {
304 } else if (!token_char_map[(unsigned char)*buf]) { 300 *ret = -1;
305 *ret = -1; 301 return NULL;
306 return NULL; 302 }
307 } 303 ++buf;
308 ++buf; 304 CHECK_EOF();
309 CHECK_EOF(); 305 }
310 } 306 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
311 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) { 307 *ret = -1;
312 *ret = -1; 308 return NULL;
313 return NULL; 309 }
314 } 310 ++buf;
315 ++buf; 311 for (;; ++buf) {
316 for (;; ++buf) { 312 CHECK_EOF();
317 CHECK_EOF(); 313 if (!(*buf == ' ' || *buf == '\t')) {
318 if (!(*buf == ' ' || *buf == '\t')) { 314 break;
319 break; 315 }
320 } 316 }
321 } 317 } else {
322 } else { 318 headers[*num_headers].name = NULL;
323 headers[*num_headers].name = NULL; 319 headers[*num_headers].name_len = 0;
324 headers[*num_headers].name_len = 0; 320 }
325 } 321 const char *value;
326 const char *value; 322 size_t value_len;
327 size_t value_len; 323 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
328 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) { 324 return NULL;
329 return NULL; 325 }
330 } 326 /* remove trailing SPs and HTABs */
331 /* remove trailing SPs and HTABs */ 327 const char *value_end = value + value_len;
332 const char *value_end = value + value_len; 328 for (; value_end != value; --value_end) {
333 for (; value_end != value; --value_end) { 329 const char c = *(value_end - 1);
334 const char c = *(value_end - 1); 330 if (!(c == ' ' || c == '\t')) {
335 if (!(c == ' ' || c == '\t')) { 331 break;
336 break; 332 }
337 } 333 }
338 } 334 headers[*num_headers].value = value;
339 headers[*num_headers].value = value; 335 headers[*num_headers].value_len = value_end - value;
340 headers[*num_headers].value_len = value_end - value; 336 }
341 } 337 return buf;
342 return buf;
343} 338}
344 339
345static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, 340static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
346 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, 341 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers,
347 size_t max_headers, int *ret) 342 size_t max_headers, int *ret) {
348{ 343 /* skip first empty line (some clients add CRLF after POST content) */
349 /* skip first empty line (some clients add CRLF after POST content) */ 344 CHECK_EOF();
350 CHECK_EOF(); 345 if (*buf == '\015') {
351 if (*buf == '\015') { 346 ++buf;
352 ++buf; 347 EXPECT_CHAR('\012');
353 EXPECT_CHAR('\012'); 348 } else if (*buf == '\012') {
354 } else if (*buf == '\012') { 349 ++buf;
355 ++buf; 350 }
356 } 351
357 352 /* parse request line */
358 /* parse request line */ 353 ADVANCE_TOKEN(*method, *method_len);
359 ADVANCE_TOKEN(*method, *method_len); 354 do {
360 do { 355 ++buf;
361 ++buf; 356 } while (*buf == ' ');
362 } while (*buf == ' '); 357 ADVANCE_TOKEN(*path, *path_len);
363 ADVANCE_TOKEN(*path, *path_len); 358 do {
364 do { 359 ++buf;
365 ++buf; 360 } while (*buf == ' ');
366 } while (*buf == ' '); 361 if (*method_len == 0 || *path_len == 0) {
367 if (*method_len == 0 || *path_len == 0) { 362 *ret = -1;
368 *ret = -1; 363 return NULL;
369 return NULL; 364 }
370 } 365 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
371 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) { 366 return NULL;
372 return NULL; 367 }
373 } 368 if (*buf == '\015') {
374 if (*buf == '\015') { 369 ++buf;
375 ++buf; 370 EXPECT_CHAR('\012');
376 EXPECT_CHAR('\012'); 371 } else if (*buf == '\012') {
377 } else if (*buf == '\012') { 372 ++buf;
378 ++buf; 373 } else {
379 } else { 374 *ret = -1;
380 *ret = -1; 375 return NULL;
381 return NULL; 376 }
382 } 377
383 378 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
384 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
385} 379}
386 380
387int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, 381int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
388 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) 382 int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) {
389{ 383 const char *buf = buf_start, *buf_end = buf_start + len;
390 const char *buf = buf_start, *buf_end = buf_start + len; 384 size_t max_headers = *num_headers;
391 size_t max_headers = *num_headers; 385 int r;
392 int r; 386
393 387 *method = NULL;
394 *method = NULL; 388 *method_len = 0;
395 *method_len = 0; 389 *path = NULL;
396 *path = NULL; 390 *path_len = 0;
397 *path_len = 0; 391 *major_version = -1;
398 *major_version = -1; 392 *minor_version = -1;
399 *minor_version = -1; 393 *num_headers = 0;
400 *num_headers = 0; 394
401 395 /* if last_len != 0, check if the request is complete (a fast countermeasure
402 /* if last_len != 0, check if the request is complete (a fast countermeasure 396 against slowloris */
403 against slowloris */ 397 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
404 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 398 return r;
405 return r; 399 }
406 } 400
407 401 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers,
408 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers, max_headers, 402 max_headers, &r)) == NULL) {
409 &r)) == NULL) { 403 return r;
410 return r; 404 }
411 } 405
412 406 return (int)(buf - buf_start);
413 return (int)(buf - buf_start);
414} 407}
415 408
416static const char *parse_response(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *status, const char **msg, 409static const char *parse_response(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *status,
417 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) 410 const char **msg, size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers,
418{ 411 int *ret) {
419 /* parse "HTTP/1.x" */ 412 /* parse "HTTP/1.x" */
420 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) { 413 if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
421 return NULL; 414 return NULL;
422 } 415 }
423 /* skip space */ 416 /* skip space */
424 if (*buf != ' ') { 417 if (*buf != ' ') {
425 *ret = -1; 418 *ret = -1;
426 return NULL; 419 return NULL;
427 } 420 }
428 do { 421 do {
429 ++buf; 422 ++buf;
430 } while (*buf == ' '); 423 } while (*buf == ' ');
431 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */ 424 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
432 if (buf_end - buf < 4) { 425 if (buf_end - buf < 4) {
433 *ret = -2; 426 *ret = -2;
434 return NULL; 427 return NULL;
435 } 428 }
436 PARSE_INT_3(status); 429 PARSE_INT_3(status);
437 430
438 /* get message including preceding space */ 431 /* get message including preceding space */
439 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { 432 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
440 return NULL; 433 return NULL;
441 } 434 }
442 if (*msg_len == 0) { 435 if (*msg_len == 0) {
443 /* ok */ 436 /* ok */
444 } else if (**msg == ' ') { 437 } else if (**msg == ' ') {
445 /* remove preceding space */ 438 /* remove preceding space */
446 do { 439 do {
447 ++*msg; 440 ++*msg;
448 --*msg_len; 441 --*msg_len;
449 } while (**msg == ' '); 442 } while (**msg == ' ');
450 } else { 443 } else {
451 /* garbage found after status code */ 444 /* garbage found after status code */
452 *ret = -1; 445 *ret = -1;
453 return NULL; 446 return NULL;
454 } 447 }
455 448
456 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 449 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
457} 450}
458 451
459int phr_parse_response(const char *buf_start, size_t len, int *major_version, int *minor_version, int *status, const char **msg, size_t *msg_len, 452int phr_parse_response(const char *buf_start, size_t len, int *major_version, int *minor_version, int *status, const char **msg,
460 struct phr_header *headers, size_t *num_headers, size_t last_len) 453 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t last_len) {
461{ 454 const char *buf = buf_start, *buf_end = buf + len;
462 const char *buf = buf_start, *buf_end = buf + len; 455 size_t max_headers = *num_headers;
463 size_t max_headers = *num_headers; 456 int r;
464 int r; 457
465 458 *major_version = -1;
466 *major_version = -1; 459 *minor_version = -1;
467 *minor_version = -1; 460 *status = 0;
468 *status = 0; 461 *msg = NULL;
469 *msg = NULL; 462 *msg_len = 0;
470 *msg_len = 0; 463 *num_headers = 0;
471 *num_headers = 0; 464
472 465 /* if last_len != 0, check if the response is complete (a fast countermeasure
473 /* if last_len != 0, check if the response is complete (a fast countermeasure 466 against slowloris */
474 against slowloris */ 467 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
475 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 468 return r;
476 return r; 469 }
477 } 470
478 471 if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) ==
479 if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { 472 NULL) {
480 return r; 473 return r;
481 } 474 }
482 475
483 return (int)(buf - buf_start); 476 return (int)(buf - buf_start);
484} 477}
485 478
486int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) 479int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) {
487{ 480 const char *buf = buf_start, *buf_end = buf + len;
488 const char *buf = buf_start, *buf_end = buf + len; 481 size_t max_headers = *num_headers;
489 size_t max_headers = *num_headers; 482 int r;
490 int r;
491 483
492 *num_headers = 0; 484 *num_headers = 0;
493 485
494 /* if last_len != 0, check if the response is complete (a fast countermeasure 486 /* if last_len != 0, check if the response is complete (a fast countermeasure
495 against slowloris */ 487 against slowloris */
496 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 488 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
497 return r; 489 return r;
498 } 490 }
499 491
500 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { 492 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
501 return r; 493 return r;
502 } 494 }
503 495
504 return (int)(buf - buf_start); 496 return (int)(buf - buf_start);
505} 497}
506 498
507enum { 499enum {
508 CHUNKED_IN_CHUNK_SIZE, 500 CHUNKED_IN_CHUNK_SIZE,
509 CHUNKED_IN_CHUNK_EXT, 501 CHUNKED_IN_CHUNK_EXT,
510 CHUNKED_IN_CHUNK_DATA, 502 CHUNKED_IN_CHUNK_DATA,
511 CHUNKED_IN_CHUNK_CRLF, 503 CHUNKED_IN_CHUNK_CRLF,
512 CHUNKED_IN_TRAILERS_LINE_HEAD, 504 CHUNKED_IN_TRAILERS_LINE_HEAD,
513 CHUNKED_IN_TRAILERS_LINE_MIDDLE 505 CHUNKED_IN_TRAILERS_LINE_MIDDLE
514}; 506};
515 507
516static int decode_hex(int ch) 508static int decode_hex(int ch) {
517{ 509 if ('0' <= ch && ch <= '9') {
518 if ('0' <= ch && ch <= '9') { 510 return ch - '0';
519 return ch - '0'; 511 } else if ('A' <= ch && ch <= 'F') {
520 } else if ('A' <= ch && ch <= 'F') { 512 return ch - 'A' + 0xa;
521 return ch - 'A' + 0xa; 513 } else if ('a' <= ch && ch <= 'f') {
522 } else if ('a' <= ch && ch <= 'f') { 514 return ch - 'a' + 0xa;
523 return ch - 'a' + 0xa; 515 } else {
524 } else { 516 return -1;
525 return -1; 517 }
526 }
527} 518}
528 519
529ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) 520ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) {
530{ 521 size_t dst = 0, src = 0, bufsz = *_bufsz;
531 size_t dst = 0, src = 0, bufsz = *_bufsz; 522 ssize_t ret = -2; /* incomplete */
532 ssize_t ret = -2; /* incomplete */ 523
533 524 while (1) {
534 while (1) { 525 switch (decoder->_state) {
535 switch (decoder->_state) { 526 case CHUNKED_IN_CHUNK_SIZE:
536 case CHUNKED_IN_CHUNK_SIZE: 527 for (;; ++src) {
537 for (;; ++src) { 528 int v;
538 int v; 529 if (src == bufsz)
539 if (src == bufsz) 530 goto Exit;
540 goto Exit; 531 if ((v = decode_hex(buf[src])) == -1) {
541 if ((v = decode_hex(buf[src])) == -1) { 532 if (decoder->_hex_count == 0) {
542 if (decoder->_hex_count == 0) { 533 ret = -1;
543 ret = -1; 534 goto Exit;
544 goto Exit; 535 }
545 } 536 break;
546 break; 537 }
547 } 538 if (decoder->_hex_count == sizeof(size_t) * 2) {
548 if (decoder->_hex_count == sizeof(size_t) * 2) { 539 ret = -1;
549 ret = -1; 540 goto Exit;
550 goto Exit; 541 }
551 } 542 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
552 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; 543 ++decoder->_hex_count;
553 ++decoder->_hex_count; 544 }
554 } 545 decoder->_hex_count = 0;
555 decoder->_hex_count = 0; 546 decoder->_state = CHUNKED_IN_CHUNK_EXT;
556 decoder->_state = CHUNKED_IN_CHUNK_EXT; 547 /* fallthru */
557 /* fallthru */ 548 case CHUNKED_IN_CHUNK_EXT:
558 case CHUNKED_IN_CHUNK_EXT: 549 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
559 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ 550 for (;; ++src) {
560 for (;; ++src) { 551 if (src == bufsz)
561 if (src == bufsz) 552 goto Exit;
562 goto Exit; 553 if (buf[src] == '\012')
563 if (buf[src] == '\012') 554 break;
564 break; 555 }
565 } 556 ++src;
566 ++src; 557 if (decoder->bytes_left_in_chunk == 0) {
567 if (decoder->bytes_left_in_chunk == 0) { 558 if (decoder->consume_trailer) {
568 if (decoder->consume_trailer) { 559 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
569 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 560 break;
570 break; 561 } else {
571 } else { 562 goto Complete;
572 goto Complete; 563 }
573 } 564 }
574 } 565 decoder->_state = CHUNKED_IN_CHUNK_DATA;
575 decoder->_state = CHUNKED_IN_CHUNK_DATA; 566 /* fallthru */
576 /* fallthru */ 567 case CHUNKED_IN_CHUNK_DATA: {
577 case CHUNKED_IN_CHUNK_DATA: { 568 size_t avail = bufsz - src;
578 size_t avail = bufsz - src; 569 if (avail < decoder->bytes_left_in_chunk) {
579 if (avail < decoder->bytes_left_in_chunk) { 570 if (dst != src)
580 if (dst != src) 571 memmove(buf + dst, buf + src, avail);
581 memmove(buf + dst, buf + src, avail); 572 src += avail;
582 src += avail; 573 dst += avail;
583 dst += avail; 574 decoder->bytes_left_in_chunk -= avail;
584 decoder->bytes_left_in_chunk -= avail; 575 goto Exit;
585 goto Exit; 576 }
586 } 577 if (dst != src)
587 if (dst != src) 578 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
588 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); 579 src += decoder->bytes_left_in_chunk;
589 src += decoder->bytes_left_in_chunk; 580 dst += decoder->bytes_left_in_chunk;
590 dst += decoder->bytes_left_in_chunk; 581 decoder->bytes_left_in_chunk = 0;
591 decoder->bytes_left_in_chunk = 0; 582 decoder->_state = CHUNKED_IN_CHUNK_CRLF;
592 decoder->_state = CHUNKED_IN_CHUNK_CRLF; 583 }
593 } 584 /* fallthru */
594 /* fallthru */ 585 case CHUNKED_IN_CHUNK_CRLF:
595 case CHUNKED_IN_CHUNK_CRLF: 586 for (;; ++src) {
596 for (;; ++src) { 587 if (src == bufsz)
597 if (src == bufsz) 588 goto Exit;
598 goto Exit; 589 if (buf[src] != '\015')
599 if (buf[src] != '\015') 590 break;
600 break; 591 }
601 } 592 if (buf[src] != '\012') {
602 if (buf[src] != '\012') { 593 ret = -1;
603 ret = -1; 594 goto Exit;
604 goto Exit; 595 }
605 } 596 ++src;
606 ++src; 597 decoder->_state = CHUNKED_IN_CHUNK_SIZE;
607 decoder->_state = CHUNKED_IN_CHUNK_SIZE; 598 break;
608 break; 599 case CHUNKED_IN_TRAILERS_LINE_HEAD:
609 case CHUNKED_IN_TRAILERS_LINE_HEAD: 600 for (;; ++src) {
610 for (;; ++src) { 601 if (src == bufsz)
611 if (src == bufsz) 602 goto Exit;
612 goto Exit; 603 if (buf[src] != '\015')
613 if (buf[src] != '\015') 604 break;
614 break; 605 }
615 } 606 if (buf[src++] == '\012')
616 if (buf[src++] == '\012') 607 goto Complete;
617 goto Complete; 608 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
618 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; 609 /* fallthru */
619 /* fallthru */ 610 case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
620 case CHUNKED_IN_TRAILERS_LINE_MIDDLE: 611 for (;; ++src) {
621 for (;; ++src) { 612 if (src == bufsz)
622 if (src == bufsz) 613 goto Exit;
623 goto Exit; 614 if (buf[src] == '\012')
624 if (buf[src] == '\012') 615 break;
625 break; 616 }
626 } 617 ++src;
627 ++src; 618 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
628 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 619 break;
629 break; 620 default:
630 default: 621 assert(!"decoder is corrupt");
631 assert(!"decoder is corrupt"); 622 }
632 } 623 }
633 }
634 624
635Complete: 625Complete:
636 ret = bufsz - src; 626 ret = bufsz - src;
637Exit: 627Exit:
638 if (dst != src) 628 if (dst != src)
639 memmove(buf + dst, buf + src, bufsz - src); 629 memmove(buf + dst, buf + src, bufsz - src);
640 *_bufsz = dst; 630 *_bufsz = dst;
641 return ret; 631 return ret;
642} 632}
643 633
644int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) 634int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) { return decoder->_state == CHUNKED_IN_CHUNK_DATA; }
645{
646 return decoder->_state == CHUNKED_IN_CHUNK_DATA;
647}
648 635
649#undef CHECK_EOF 636#undef CHECK_EOF
650#undef EXPECT_CHAR 637#undef EXPECT_CHAR