summaryrefslogtreecommitdiffstats
path: root/plugins/uriparser/UriParse.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/uriparser/UriParse.c')
-rw-r--r--plugins/uriparser/UriParse.c2241
1 files changed, 2241 insertions, 0 deletions
diff --git a/plugins/uriparser/UriParse.c b/plugins/uriparser/UriParse.c
new file mode 100644
index 00000000..e3cdc68d
--- /dev/null
+++ b/plugins/uriparser/UriParse.c
@@ -0,0 +1,2241 @@
1/*
2 * uriparser - RFC 3986 URI parsing library
3 *
4 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5 * Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * * Neither the name of the <ORGANIZATION> nor the names of its
22 * contributors may be used to endorse or promote products
23 * derived from this software without specific prior written
24 * permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 * OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/**
41 * @file UriParse.c
42 * Holds the RFC 3986 %URI parsing implementation.
43 * NOTE: This source file includes itself twice.
44 */
45
46/* What encodings are enabled? */
47#include <uriparser/UriDefsConfig.h>
48#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
49/* Include SELF twice */
50# ifdef URI_ENABLE_ANSI
51# define URI_PASS_ANSI 1
52# include "UriParse.c"
53# undef URI_PASS_ANSI
54# endif
55# ifdef URI_ENABLE_UNICODE
56# define URI_PASS_UNICODE 1
57# include "UriParse.c"
58# undef URI_PASS_UNICODE
59# endif
60#else
61# ifdef URI_PASS_ANSI
62# include <uriparser/UriDefsAnsi.h>
63# else
64# include <uriparser/UriDefsUnicode.h>
65# include <wchar.h>
66# endif
67
68
69
70#ifndef URI_DOXYGEN
71# include <uriparser/Uri.h>
72# include <uriparser/UriIp4.h>
73# include "UriCommon.h"
74# include "UriParseBase.h"
75#endif
76
77
78
79#define URI_SET_DIGIT \
80 _UT('0'): \
81 case _UT('1'): \
82 case _UT('2'): \
83 case _UT('3'): \
84 case _UT('4'): \
85 case _UT('5'): \
86 case _UT('6'): \
87 case _UT('7'): \
88 case _UT('8'): \
89 case _UT('9')
90
91#define URI_SET_HEX_LETTER_UPPER \
92 _UT('A'): \
93 case _UT('B'): \
94 case _UT('C'): \
95 case _UT('D'): \
96 case _UT('E'): \
97 case _UT('F')
98
99#define URI_SET_HEX_LETTER_LOWER \
100 _UT('a'): \
101 case _UT('b'): \
102 case _UT('c'): \
103 case _UT('d'): \
104 case _UT('e'): \
105 case _UT('f')
106
107#define URI_SET_HEXDIG \
108 URI_SET_DIGIT: \
109 case URI_SET_HEX_LETTER_UPPER: \
110 case URI_SET_HEX_LETTER_LOWER
111
112#define URI_SET_ALPHA \
113 URI_SET_HEX_LETTER_UPPER: \
114 case URI_SET_HEX_LETTER_LOWER: \
115 case _UT('g'): \
116 case _UT('G'): \
117 case _UT('h'): \
118 case _UT('H'): \
119 case _UT('i'): \
120 case _UT('I'): \
121 case _UT('j'): \
122 case _UT('J'): \
123 case _UT('k'): \
124 case _UT('K'): \
125 case _UT('l'): \
126 case _UT('L'): \
127 case _UT('m'): \
128 case _UT('M'): \
129 case _UT('n'): \
130 case _UT('N'): \
131 case _UT('o'): \
132 case _UT('O'): \
133 case _UT('p'): \
134 case _UT('P'): \
135 case _UT('q'): \
136 case _UT('Q'): \
137 case _UT('r'): \
138 case _UT('R'): \
139 case _UT('s'): \
140 case _UT('S'): \
141 case _UT('t'): \
142 case _UT('T'): \
143 case _UT('u'): \
144 case _UT('U'): \
145 case _UT('v'): \
146 case _UT('V'): \
147 case _UT('w'): \
148 case _UT('W'): \
149 case _UT('x'): \
150 case _UT('X'): \
151 case _UT('y'): \
152 case _UT('Y'): \
153 case _UT('z'): \
154 case _UT('Z')
155
156
157
158static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
159static const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
160static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
161static const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
162static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
163static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
164static const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
165static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
166static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
167static const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
168static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
169static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
170static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
171static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
172static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
173static const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
174static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
175static const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
176static const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
177static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
178static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
179static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
180static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
181static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
182static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
183static const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
184static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
185static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
186static const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
187static const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
188static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
189
190static UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first);
191static UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first);
192static UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first);
193static UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first);
194static void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state);
195
196static void URI_FUNC(ResetParserState)(URI_TYPE(ParserState) * state);
197
198static UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
199
200static void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, const URI_CHAR * errorPos);
201static void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state);
202
203
204
205static URI_INLINE void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state,
206 const URI_CHAR * errorPos) {
207 URI_FUNC(FreeUriMembers)(state->uri);
208 state->errorPos = errorPos;
209 state->errorCode = URI_ERROR_SYNTAX;
210}
211
212
213
214static URI_INLINE void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state) {
215 URI_FUNC(FreeUriMembers)(state->uri);
216 state->errorPos = NULL;
217 state->errorCode = URI_ERROR_MALLOC;
218}
219
220
221
222/*
223 * [authority]-><[>[ipLit2][authorityTwo]
224 * [authority]->[ownHostUserInfoNz]
225 * [authority]-><NULL>
226 */
227static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
228 if (first >= afterLast) {
229 /* "" regname host */
230 state->uri->hostText.first = URI_FUNC(SafeToPointTo);
231 state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo);
232 return afterLast;
233 }
234
235 switch (*first) {
236 case _UT('['):
237 {
238 const URI_CHAR * const afterIpLit2
239 = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast);
240 if (afterIpLit2 == NULL) {
241 return NULL;
242 }
243 state->uri->hostText.first = first + 1; /* HOST BEGIN */
244 return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
245 }
246
247 case _UT('!'):
248 case _UT('$'):
249 case _UT('%'):
250 case _UT('&'):
251 case _UT('('):
252 case _UT(')'):
253 case _UT('-'):
254 case _UT('*'):
255 case _UT(','):
256 case _UT('.'):
257 case _UT(':'):
258 case _UT(';'):
259 case _UT('@'):
260 case _UT('\''):
261 case _UT('_'):
262 case _UT('~'):
263 case _UT('+'):
264 case _UT('='):
265 case URI_SET_DIGIT:
266 case URI_SET_ALPHA:
267 state->uri->userInfo.first = first; /* USERINFO BEGIN */
268 return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast);
269
270 default:
271 /* "" regname host */
272 state->uri->hostText.first = URI_FUNC(SafeToPointTo);
273 state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo);
274 return first;
275 }
276}
277
278
279
280/*
281 * [authorityTwo]-><:>[port]
282 * [authorityTwo]-><NULL>
283 */
284static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
285 if (first >= afterLast) {
286 return afterLast;
287 }
288
289 switch (*first) {
290 case _UT(':'):
291 {
292 const URI_CHAR * const afterPort = URI_FUNC(ParsePort)(state, first + 1, afterLast);
293 if (afterPort == NULL) {
294 return NULL;
295 }
296 state->uri->portText.first = first + 1; /* PORT BEGIN */
297 state->uri->portText.afterLast = afterPort; /* PORT END */
298 return afterPort;
299 }
300
301 default:
302 return first;
303 }
304}
305
306
307
308/*
309 * [hexZero]->[HEXDIG][hexZero]
310 * [hexZero]-><NULL>
311 */
312static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
313 if (first >= afterLast) {
314 return afterLast;
315 }
316
317 switch (*first) {
318 case URI_SET_HEXDIG:
319 return URI_FUNC(ParseHexZero)(state, first + 1, afterLast);
320
321 default:
322 return first;
323 }
324}
325
326
327
328/*
329 * [hierPart]->[pathRootless]
330 * [hierPart]-></>[partHelperTwo]
331 * [hierPart]-><NULL>
332 */
333static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
334 if (first >= afterLast) {
335 return afterLast;
336 }
337
338 switch (*first) {
339 case _UT('!'):
340 case _UT('$'):
341 case _UT('%'):
342 case _UT('&'):
343 case _UT('('):
344 case _UT(')'):
345 case _UT('-'):
346 case _UT('*'):
347 case _UT(','):
348 case _UT('.'):
349 case _UT(':'):
350 case _UT(';'):
351 case _UT('@'):
352 case _UT('\''):
353 case _UT('_'):
354 case _UT('~'):
355 case _UT('+'):
356 case _UT('='):
357 case URI_SET_DIGIT:
358 case URI_SET_ALPHA:
359 return URI_FUNC(ParsePathRootless)(state, first, afterLast);
360
361 case _UT('/'):
362 return URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast);
363
364 default:
365 return first;
366 }
367}
368
369
370
371/*
372 * [ipFutLoop]->[subDelims][ipFutStopGo]
373 * [ipFutLoop]->[unreserved][ipFutStopGo]
374 * [ipFutLoop]-><:>[ipFutStopGo]
375 */
376static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
377 if (first >= afterLast) {
378 URI_FUNC(StopSyntax)(state, first);
379 return NULL;
380 }
381
382 switch (*first) {
383 case _UT('!'):
384 case _UT('$'):
385 case _UT('&'):
386 case _UT('('):
387 case _UT(')'):
388 case _UT('-'):
389 case _UT('*'):
390 case _UT(','):
391 case _UT('.'):
392 case _UT(':'):
393 case _UT(';'):
394 case _UT('\''):
395 case _UT('_'):
396 case _UT('~'):
397 case _UT('+'):
398 case _UT('='):
399 case URI_SET_DIGIT:
400 case URI_SET_ALPHA:
401 return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast);
402
403 default:
404 URI_FUNC(StopSyntax)(state, first);
405 return NULL;
406 }
407}
408
409
410
411/*
412 * [ipFutStopGo]->[ipFutLoop]
413 * [ipFutStopGo]-><NULL>
414 */
415static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
416 if (first >= afterLast) {
417 return afterLast;
418 }
419
420 switch (*first) {
421 case _UT('!'):
422 case _UT('$'):
423 case _UT('&'):
424 case _UT('('):
425 case _UT(')'):
426 case _UT('-'):
427 case _UT('*'):
428 case _UT(','):
429 case _UT('.'):
430 case _UT(':'):
431 case _UT(';'):
432 case _UT('\''):
433 case _UT('_'):
434 case _UT('~'):
435 case _UT('+'):
436 case _UT('='):
437 case URI_SET_DIGIT:
438 case URI_SET_ALPHA:
439 return URI_FUNC(ParseIpFutLoop)(state, first, afterLast);
440
441 default:
442 return first;
443 }
444}
445
446
447
448/*
449 * [ipFuture]-><v>[HEXDIG][hexZero]<.>[ipFutLoop]
450 */
451static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
452 if (first >= afterLast) {
453 URI_FUNC(StopSyntax)(state, first);
454 return NULL;
455 }
456
457 /*
458 First character has already been
459 checked before entering this rule.
460
461 switch (*first) {
462 case _UT('v'):
463 */
464 if (first + 1 >= afterLast) {
465 URI_FUNC(StopSyntax)(state, first + 1);
466 return NULL;
467 }
468
469 switch (first[1]) {
470 case URI_SET_HEXDIG:
471 {
472 const URI_CHAR * afterIpFutLoop;
473 const URI_CHAR * const afterHexZero
474 = URI_FUNC(ParseHexZero)(state, first + 2, afterLast);
475 if (afterHexZero == NULL) {
476 return NULL;
477 }
478 if ((afterHexZero >= afterLast)
479 || (*afterHexZero != _UT('.'))) {
480 URI_FUNC(StopSyntax)(state, afterHexZero);
481 return NULL;
482 }
483 state->uri->hostText.first = first; /* HOST BEGIN */
484 state->uri->hostData.ipFuture.first = first; /* IPFUTURE BEGIN */
485 afterIpFutLoop = URI_FUNC(ParseIpFutLoop)(state, afterHexZero + 1, afterLast);
486 if (afterIpFutLoop == NULL) {
487 return NULL;
488 }
489 state->uri->hostText.afterLast = afterIpFutLoop; /* HOST END */
490 state->uri->hostData.ipFuture.afterLast = afterIpFutLoop; /* IPFUTURE END */
491 return afterIpFutLoop;
492 }
493
494 default:
495 URI_FUNC(StopSyntax)(state, first + 1);
496 return NULL;
497 }
498
499 /*
500 default:
501 URI_FUNC(StopSyntax)(state, first);
502 return NULL;
503 }
504 */
505}
506
507
508
509/*
510 * [ipLit2]->[ipFuture]<]>
511 * [ipLit2]->[IPv6address2]
512 */
513static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
514 if (first >= afterLast) {
515 URI_FUNC(StopSyntax)(state, first);
516 return NULL;
517 }
518
519 switch (*first) {
520 case _UT('v'):
521 {
522 const URI_CHAR * const afterIpFuture
523 = URI_FUNC(ParseIpFuture)(state, first, afterLast);
524 if (afterIpFuture == NULL) {
525 return NULL;
526 }
527 if ((afterIpFuture >= afterLast)
528 || (*afterIpFuture != _UT(']'))) {
529 URI_FUNC(StopSyntax)(state, first);
530 return NULL;
531 }
532 return afterIpFuture + 1;
533 }
534
535 case _UT(':'):
536 case _UT(']'):
537 case URI_SET_HEXDIG:
538 state->uri->hostData.ip6 = malloc(1 * sizeof(UriIp6)); /* Freed when stopping on parse error */
539 if (state->uri->hostData.ip6 == NULL) {
540 URI_FUNC(StopMalloc)(state);
541 return NULL;
542 }
543 return URI_FUNC(ParseIPv6address2)(state, first, afterLast);
544
545 default:
546 URI_FUNC(StopSyntax)(state, first);
547 return NULL;
548 }
549}
550
551
552
553/*
554 * [IPv6address2]->..<]>
555 */
556static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
557 int zipperEver = 0;
558 int quadsDone = 0;
559 int digitCount = 0;
560 unsigned char digitHistory[4];
561 int ip4OctetsDone = 0;
562
563 unsigned char quadsAfterZipper[14];
564 int quadsAfterZipperCount = 0;
565
566
567 for (;;) {
568 if (first >= afterLast) {
569 URI_FUNC(StopSyntax)(state, first);
570 return NULL;
571 }
572
573 /* Inside IPv4 part? */
574 if (ip4OctetsDone > 0) {
575 /* Eat rest of IPv4 address */
576 for (;;) {
577 switch (*first) {
578 case URI_SET_DIGIT:
579 if (digitCount == 4) {
580 URI_FUNC(StopSyntax)(state, first);
581 return NULL;
582 }
583 digitHistory[digitCount++] = (unsigned char)(9 + *first - _UT('9'));
584 break;
585
586 case _UT('.'):
587 if ((ip4OctetsDone == 4) /* NOTE! */
588 || (digitCount == 0)
589 || (digitCount == 4)) {
590 /* Invalid digit or octet count */
591 URI_FUNC(StopSyntax)(state, first);
592 return NULL;
593 } else if ((digitCount > 1)
594 && (digitHistory[0] == 0)) {
595 /* Leading zero */
596 URI_FUNC(StopSyntax)(state, first - digitCount);
597 return NULL;
598 } else if ((digitCount > 2)
599 && (digitHistory[1] == 0)) {
600 /* Leading zero */
601 URI_FUNC(StopSyntax)(state, first - digitCount + 1);
602 return NULL;
603 } else if ((digitCount == 3)
604 && (100 * digitHistory[0]
605 + 10 * digitHistory[1]
606 + digitHistory[2] > 255)) {
607 /* Octet value too large */
608 if (digitHistory[0] > 2) {
609 URI_FUNC(StopSyntax)(state, first - 3);
610 } else if (digitHistory[1] > 5) {
611 URI_FUNC(StopSyntax)(state, first - 2);
612 } else {
613 URI_FUNC(StopSyntax)(state, first - 1);
614 }
615 return NULL;
616 }
617
618 /* Copy IPv4 octet */
619 state->uri->hostData.ip6->data[16 - 4 + ip4OctetsDone] = uriGetOctetValue(digitHistory, digitCount);
620 digitCount = 0;
621 ip4OctetsDone++;
622 break;
623
624 case _UT(']'):
625 if ((ip4OctetsDone != 3) /* NOTE! */
626 || (digitCount == 0)
627 || (digitCount == 4)) {
628 /* Invalid digit or octet count */
629 URI_FUNC(StopSyntax)(state, first);
630 return NULL;
631 } else if ((digitCount > 1)
632 && (digitHistory[0] == 0)) {
633 /* Leading zero */
634 URI_FUNC(StopSyntax)(state, first - digitCount);
635 return NULL;
636 } else if ((digitCount > 2)
637 && (digitHistory[1] == 0)) {
638 /* Leading zero */
639 URI_FUNC(StopSyntax)(state, first - digitCount + 1);
640 return NULL;
641 } else if ((digitCount == 3)
642 && (100 * digitHistory[0]
643 + 10 * digitHistory[1]
644 + digitHistory[2] > 255)) {
645 /* Octet value too large */
646 if (digitHistory[0] > 2) {
647 URI_FUNC(StopSyntax)(state, first - 3);
648 } else if (digitHistory[1] > 5) {
649 URI_FUNC(StopSyntax)(state, first - 2);
650 } else {
651 URI_FUNC(StopSyntax)(state, first - 1);
652 }
653 return NULL;
654 }
655
656 state->uri->hostText.afterLast = first; /* HOST END */
657
658 /* Copy missing quads right before IPv4 */
659 memcpy(state->uri->hostData.ip6->data + 16 - 4 - 2 * quadsAfterZipperCount,
660 quadsAfterZipper, 2 * quadsAfterZipperCount);
661
662 /* Copy last IPv4 octet */
663 state->uri->hostData.ip6->data[16 - 4 + 3] = uriGetOctetValue(digitHistory, digitCount);
664
665 return first + 1;
666
667 default:
668 URI_FUNC(StopSyntax)(state, first);
669 return NULL;
670 }
671 first++;
672 }
673 } else {
674 /* Eat while no dot in sight */
675 int letterAmong = 0;
676 int walking = 1;
677 do {
678 switch (*first) {
679 case URI_SET_HEX_LETTER_LOWER:
680 letterAmong = 1;
681 if (digitCount == 4) {
682 URI_FUNC(StopSyntax)(state, first);
683 return NULL;
684 }
685 digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('f'));
686 digitCount++;
687 break;
688
689 case URI_SET_HEX_LETTER_UPPER:
690 letterAmong = 1;
691 if (digitCount == 4) {
692 URI_FUNC(StopSyntax)(state, first);
693 return NULL;
694 }
695 digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('F'));
696 digitCount++;
697 break;
698
699 case URI_SET_DIGIT:
700 if (digitCount == 4) {
701 URI_FUNC(StopSyntax)(state, first);
702 return NULL;
703 }
704 digitHistory[digitCount] = (unsigned char)(9 + *first - _UT('9'));
705 digitCount++;
706 break;
707
708 case _UT(':'):
709 {
710 int setZipper = 0;
711
712 if (digitCount > 0) {
713 if (zipperEver) {
714 uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount);
715 quadsAfterZipperCount++;
716 } else {
717 uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone);
718 }
719 quadsDone++;
720 digitCount = 0;
721 }
722 letterAmong = 0;
723
724 /* Too many quads? */
725 if (quadsDone >= 8 - zipperEver) {
726 URI_FUNC(StopSyntax)(state, first);
727 return NULL;
728 }
729
730 /* "::"? */
731 if (first + 1 >= afterLast) {
732 URI_FUNC(StopSyntax)(state, first + 1);
733 return NULL;
734 }
735 if (first[1] == _UT(':')) {
736 const int resetOffset = 2 * (quadsDone + (digitCount > 0));
737
738 first++;
739 if (zipperEver) {
740 URI_FUNC(StopSyntax)(state, first);
741 return NULL; /* "::.+::" */
742 }
743
744 /* Zero everything after zipper */
745 memset(state->uri->hostData.ip6->data + resetOffset, 0, 16 - resetOffset);
746 setZipper = 1;
747
748 /* ":::+"? */
749 if (first + 1 >= afterLast) {
750 URI_FUNC(StopSyntax)(state, first + 1);
751 return NULL; /* No ']' yet */
752 }
753 if (first[1] == _UT(':')) {
754 URI_FUNC(StopSyntax)(state, first + 1);
755 return NULL; /* ":::+ "*/
756 }
757 }
758
759 if (setZipper) {
760 zipperEver = 1;
761 }
762 }
763 break;
764
765 case _UT('.'):
766 if ((quadsDone > 6) /* NOTE */
767 || (!zipperEver && (quadsDone < 6))
768 || letterAmong
769 || (digitCount == 0)
770 || (digitCount == 4)) {
771 /* Invalid octet before */
772 URI_FUNC(StopSyntax)(state, first);
773 return NULL;
774 } else if ((digitCount > 1)
775 && (digitHistory[0] == 0)) {
776 /* Leading zero */
777 URI_FUNC(StopSyntax)(state, first - digitCount);
778 return NULL;
779 } else if ((digitCount > 2)
780 && (digitHistory[1] == 0)) {
781 /* Leading zero */
782 URI_FUNC(StopSyntax)(state, first - digitCount + 1);
783 return NULL;
784 } else if ((digitCount == 3)
785 && (100 * digitHistory[0]
786 + 10 * digitHistory[1]
787 + digitHistory[2] > 255)) {
788 /* Octet value too large */
789 if (digitHistory[0] > 2) {
790 URI_FUNC(StopSyntax)(state, first - 3);
791 } else if (digitHistory[1] > 5) {
792 URI_FUNC(StopSyntax)(state, first - 2);
793 } else {
794 URI_FUNC(StopSyntax)(state, first - 1);
795 }
796 return NULL;
797 }
798
799 /* Copy first IPv4 octet */
800 state->uri->hostData.ip6->data[16 - 4] = uriGetOctetValue(digitHistory, digitCount);
801 digitCount = 0;
802
803 /* Switch over to IPv4 loop */
804 ip4OctetsDone = 1;
805 walking = 0;
806 break;
807
808 case _UT(']'):
809 /* Too little quads? */
810 if (!zipperEver && !((quadsDone == 7) && (digitCount > 0))) {
811 URI_FUNC(StopSyntax)(state, first);
812 return NULL;
813 }
814
815 if (digitCount > 0) {
816 if (zipperEver) {
817 uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount);
818 quadsAfterZipperCount++;
819 } else {
820 uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone);
821 }
822 /*
823 quadsDone++;
824 digitCount = 0;
825 */
826 }
827
828 /* Copy missing quads to the end */
829 memcpy(state->uri->hostData.ip6->data + 16 - 2 * quadsAfterZipperCount,
830 quadsAfterZipper, 2 * quadsAfterZipperCount);
831
832 state->uri->hostText.afterLast = first; /* HOST END */
833 return first + 1; /* Fine */
834
835 default:
836 URI_FUNC(StopSyntax)(state, first);
837 return NULL;
838 }
839 first++;
840
841 if (first >= afterLast) {
842 URI_FUNC(StopSyntax)(state, first);
843 return NULL; /* No ']' yet */
844 }
845 } while (walking);
846 }
847 }
848}
849
850
851
852/*
853 * [mustBeSegmentNzNc]->[pctEncoded][mustBeSegmentNzNc]
854 * [mustBeSegmentNzNc]->[subDelims][mustBeSegmentNzNc]
855 * [mustBeSegmentNzNc]->[unreserved][mustBeSegmentNzNc]
856 * [mustBeSegmentNzNc]->[uriTail] // can take <NULL>
857 * [mustBeSegmentNzNc]-></>[segment][zeroMoreSlashSegs][uriTail]
858 * [mustBeSegmentNzNc]-><@>[mustBeSegmentNzNc]
859 */
860static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
861 if (first >= afterLast) {
862 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */
863 URI_FUNC(StopMalloc)(state);
864 return NULL;
865 }
866 state->uri->scheme.first = NULL; /* Not a scheme, reset */
867 return afterLast;
868 }
869
870 switch (*first) {
871 case _UT('%'):
872 {
873 const URI_CHAR * const afterPctEncoded
874 = URI_FUNC(ParsePctEncoded)(state, first, afterLast);
875 if (afterPctEncoded == NULL) {
876 return NULL;
877 }
878 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast);
879 }
880
881 case _UT('@'):
882 case _UT('!'):
883 case _UT('$'):
884 case _UT('&'):
885 case _UT('('):
886 case _UT(')'):
887 case _UT('*'):
888 case _UT(','):
889 case _UT(';'):
890 case _UT('\''):
891 case _UT('+'):
892 case _UT('='):
893 case _UT('-'):
894 case _UT('.'):
895 case _UT('_'):
896 case _UT('~'):
897 case URI_SET_DIGIT:
898 case URI_SET_ALPHA:
899 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast);
900
901 case _UT('/'):
902 {
903 const URI_CHAR * afterZeroMoreSlashSegs;
904 const URI_CHAR * afterSegment;
905 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */
906 URI_FUNC(StopMalloc)(state);
907 return NULL;
908 }
909 state->uri->scheme.first = NULL; /* Not a scheme, reset */
910 afterSegment = URI_FUNC(ParseSegment)(state, first + 1, afterLast);
911 if (afterSegment == NULL) {
912 return NULL;
913 }
914 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */
915 URI_FUNC(StopMalloc)(state);
916 return NULL;
917 }
918 afterZeroMoreSlashSegs
919 = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast);
920 if (afterZeroMoreSlashSegs == NULL) {
921 return NULL;
922 }
923 return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast);
924 }
925
926 default:
927 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */
928 URI_FUNC(StopMalloc)(state);
929 return NULL;
930 }
931 state->uri->scheme.first = NULL; /* Not a scheme, reset */
932 return URI_FUNC(ParseUriTail)(state, first, afterLast);
933 }
934}
935
936
937
938/*
939 * [ownHost]-><[>[ipLit2][authorityTwo]
940 * [ownHost]->[ownHost2] // can take <NULL>
941 */
942static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
943 if (first >= afterLast) {
944 return afterLast;
945 }
946
947 switch (*first) {
948 case _UT('['):
949 {
950 const URI_CHAR * const afterIpLit2
951 = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast);
952 if (afterIpLit2 == NULL) {
953 return NULL;
954 }
955 state->uri->hostText.first = first + 1; /* HOST BEGIN */
956 return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
957 }
958
959 default:
960 return URI_FUNC(ParseOwnHost2)(state, first, afterLast);
961 }
962}
963
964
965
966static URI_INLINE UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first) {
967 state->uri->hostText.afterLast = first; /* HOST END */
968
969 /* Valid IPv4 or just a regname? */
970 state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
971 if (state->uri->hostData.ip4 == NULL) {
972 return URI_FALSE; /* Raises malloc error */
973 }
974 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
975 state->uri->hostText.first, state->uri->hostText.afterLast)) {
976 /* Not IPv4 */
977 free(state->uri->hostData.ip4);
978 state->uri->hostData.ip4 = NULL;
979 }
980 return URI_TRUE; /* Success */
981}
982
983
984
985/*
986 * [ownHost2]->[authorityTwo] // can take <NULL>
987 * [ownHost2]->[pctSubUnres][ownHost2]
988 */
989static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
990 if (first >= afterLast) {
991 if (!URI_FUNC(OnExitOwnHost2)(state, first)) {
992 URI_FUNC(StopMalloc)(state);
993 return NULL;
994 }
995 return afterLast;
996 }
997
998 switch (*first) {
999 case _UT('!'):
1000 case _UT('$'):
1001 case _UT('%'):
1002 case _UT('&'):
1003 case _UT('('):
1004 case _UT(')'):
1005 case _UT('-'):
1006 case _UT('*'):
1007 case _UT(','):
1008 case _UT('.'):
1009 case _UT(';'):
1010 case _UT('\''):
1011 case _UT('_'):
1012 case _UT('~'):
1013 case _UT('+'):
1014 case _UT('='):
1015 case URI_SET_DIGIT:
1016 case URI_SET_ALPHA:
1017 {
1018 const URI_CHAR * const afterPctSubUnres
1019 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast);
1020 if (afterPctSubUnres == NULL) {
1021 return NULL;
1022 }
1023 return URI_FUNC(ParseOwnHost2)(state, afterPctSubUnres, afterLast);
1024 }
1025
1026 default:
1027 if (!URI_FUNC(OnExitOwnHost2)(state, first)) {
1028 URI_FUNC(StopMalloc)(state);
1029 return NULL;
1030 }
1031 return URI_FUNC(ParseAuthorityTwo)(state, first, afterLast);
1032 }
1033}
1034
1035
1036
1037static URI_INLINE UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first) {
1038 state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */
1039 state->uri->userInfo.first = NULL; /* Not a userInfo, reset */
1040 state->uri->hostText.afterLast = first; /* HOST END */
1041
1042 /* Valid IPv4 or just a regname? */
1043 state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
1044 if (state->uri->hostData.ip4 == NULL) {
1045 return URI_FALSE; /* Raises malloc error */
1046 }
1047 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
1048 state->uri->hostText.first, state->uri->hostText.afterLast)) {
1049 /* Not IPv4 */
1050 free(state->uri->hostData.ip4);
1051 state->uri->hostData.ip4 = NULL;
1052 }
1053 return URI_TRUE; /* Success */
1054}
1055
1056
1057
1058/*
1059 * [ownHostUserInfo]->[ownHostUserInfoNz]
1060 * [ownHostUserInfo]-><NULL>
1061 */
1062static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1063 if (first >= afterLast) {
1064 if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first)) {
1065 URI_FUNC(StopMalloc)(state);
1066 return NULL;
1067 }
1068 return afterLast;
1069 }
1070
1071 switch (*first) {
1072 case _UT('!'):
1073 case _UT('$'):
1074 case _UT('%'):
1075 case _UT('&'):
1076 case _UT('('):
1077 case _UT(')'):
1078 case _UT('-'):
1079 case _UT('*'):
1080 case _UT(','):
1081 case _UT('.'):
1082 case _UT(':'):
1083 case _UT(';'):
1084 case _UT('@'):
1085 case _UT('\''):
1086 case _UT('_'):
1087 case _UT('~'):
1088 case _UT('+'):
1089 case _UT('='):
1090 case URI_SET_DIGIT:
1091 case URI_SET_ALPHA:
1092 return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast);
1093
1094 default:
1095 if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first)) {
1096 URI_FUNC(StopMalloc)(state);
1097 return NULL;
1098 }
1099 return first;
1100 }
1101}
1102
1103
1104
1105/*
1106 * [ownHostUserInfoNz]->[pctSubUnres][ownHostUserInfo]
1107 * [ownHostUserInfoNz]-><:>[ownPortUserInfo]
1108 * [ownHostUserInfoNz]-><@>[ownHost]
1109 */
1110static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1111 if (first >= afterLast) {
1112 URI_FUNC(StopSyntax)(state, first);
1113 return NULL;
1114 }
1115
1116 switch (*first) {
1117 case _UT('!'):
1118 case _UT('$'):
1119 case _UT('%'):
1120 case _UT('&'):
1121 case _UT('('):
1122 case _UT(')'):
1123 case _UT('-'):
1124 case _UT('*'):
1125 case _UT(','):
1126 case _UT('.'):
1127 case _UT(';'):
1128 case _UT('\''):
1129 case _UT('_'):
1130 case _UT('~'):
1131 case _UT('+'):
1132 case _UT('='):
1133 case URI_SET_DIGIT:
1134 case URI_SET_ALPHA:
1135 {
1136 const URI_CHAR * const afterPctSubUnres
1137 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast);
1138 if (afterPctSubUnres == NULL) {
1139 return NULL;
1140 }
1141 return URI_FUNC(ParseOwnHostUserInfo)(state, afterPctSubUnres, afterLast);
1142 }
1143
1144 case _UT(':'):
1145 state->uri->hostText.afterLast = first; /* HOST END */
1146 state->uri->portText.first = first + 1; /* PORT BEGIN */
1147 return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast);
1148
1149 case _UT('@'):
1150 state->uri->userInfo.afterLast = first; /* USERINFO END */
1151 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1152 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast);
1153
1154 default:
1155 URI_FUNC(StopSyntax)(state, first);
1156 return NULL;
1157 }
1158}
1159
1160
1161
1162static URI_INLINE UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first) {
1163 state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */
1164 state->uri->userInfo.first = NULL; /* Not a userInfo, reset */
1165 state->uri->portText.afterLast = first; /* PORT END */
1166
1167 /* Valid IPv4 or just a regname? */
1168 state->uri->hostData.ip4 = malloc(1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
1169 if (state->uri->hostData.ip4 == NULL) {
1170 return URI_FALSE; /* Raises malloc error */
1171 }
1172 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
1173 state->uri->hostText.first, state->uri->hostText.afterLast)) {
1174 /* Not IPv4 */
1175 free(state->uri->hostData.ip4);
1176 state->uri->hostData.ip4 = NULL;
1177 }
1178 return URI_TRUE; /* Success */
1179}
1180
1181
1182
1183/*
1184 * [ownPortUserInfo]->[ALPHA][ownUserInfo]
1185 * [ownPortUserInfo]->[DIGIT][ownPortUserInfo]
1186 * [ownPortUserInfo]-><.>[ownUserInfo]
1187 * [ownPortUserInfo]-><_>[ownUserInfo]
1188 * [ownPortUserInfo]-><~>[ownUserInfo]
1189 * [ownPortUserInfo]-><->[ownUserInfo]
1190 * [ownPortUserInfo]->[subDelims][ownUserInfo]
1191 * [ownPortUserInfo]->[pctEncoded][ownUserInfo]
1192 * [ownPortUserInfo]-><:>[ownUserInfo]
1193 * [ownPortUserInfo]-><@>[ownHost]
1194 * [ownPortUserInfo]-><NULL>
1195 */
1196static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1197 if (first >= afterLast) {
1198 if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first)) {
1199 URI_FUNC(StopMalloc)(state);
1200 return NULL;
1201 }
1202 return afterLast;
1203 }
1204
1205 switch (*first) {
1206 /* begin sub-delims */
1207 case _UT('!'):
1208 case _UT('$'):
1209 case _UT('&'):
1210 case _UT('\''):
1211 case _UT('('):
1212 case _UT(')'):
1213 case _UT('*'):
1214 case _UT('+'):
1215 case _UT(','):
1216 case _UT(';'):
1217 case _UT('='):
1218 /* end sub-delims */
1219 /* begin unreserved (except alpha and digit) */
1220 case _UT('-'):
1221 case _UT('.'):
1222 case _UT('_'):
1223 case _UT('~'):
1224 /* end unreserved (except alpha and digit) */
1225 case _UT(':'):
1226 case URI_SET_ALPHA:
1227 state->uri->hostText.afterLast = NULL; /* Not a host, reset */
1228 state->uri->portText.first = NULL; /* Not a port, reset */
1229 return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast);
1230
1231 case URI_SET_DIGIT:
1232 return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast);
1233
1234 case _UT('%'):
1235 state->uri->portText.first = NULL; /* Not a port, reset */
1236 {
1237 const URI_CHAR * const afterPct
1238 = URI_FUNC(ParsePctEncoded)(state, first, afterLast);
1239 if (afterPct == NULL) {
1240 return NULL;
1241 }
1242 return URI_FUNC(ParseOwnUserInfo)(state, afterPct, afterLast);
1243 }
1244
1245 case _UT('@'):
1246 state->uri->hostText.afterLast = NULL; /* Not a host, reset */
1247 state->uri->portText.first = NULL; /* Not a port, reset */
1248 state->uri->userInfo.afterLast = first; /* USERINFO END */
1249 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1250 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast);
1251
1252 default:
1253 if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first)) {
1254 URI_FUNC(StopMalloc)(state);
1255 return NULL;
1256 }
1257 return first;
1258 }
1259}
1260
1261
1262
1263/*
1264 * [ownUserInfo]->[pctSubUnres][ownUserInfo]
1265 * [ownUserInfo]-><:>[ownUserInfo]
1266 * [ownUserInfo]-><@>[ownHost]
1267 */
1268static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1269 if (first >= afterLast) {
1270 URI_FUNC(StopSyntax)(state, first);
1271 return NULL;
1272 }
1273
1274 switch (*first) {
1275 case _UT('!'):
1276 case _UT('$'):
1277 case _UT('%'):
1278 case _UT('&'):
1279 case _UT('('):
1280 case _UT(')'):
1281 case _UT('-'):
1282 case _UT('*'):
1283 case _UT(','):
1284 case _UT('.'):
1285 case _UT(';'):
1286 case _UT('\''):
1287 case _UT('_'):
1288 case _UT('~'):
1289 case _UT('+'):
1290 case _UT('='):
1291 case URI_SET_DIGIT:
1292 case URI_SET_ALPHA:
1293 {
1294 const URI_CHAR * const afterPctSubUnres
1295 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast);
1296 if (afterPctSubUnres == NULL) {
1297 return NULL;
1298 }
1299 return URI_FUNC(ParseOwnUserInfo)(state, afterPctSubUnres, afterLast);
1300 }
1301
1302 case _UT(':'):
1303 return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast);
1304
1305 case _UT('@'):
1306 /* SURE */
1307 state->uri->userInfo.afterLast = first; /* USERINFO END */
1308 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1309 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast);
1310
1311 default:
1312 URI_FUNC(StopSyntax)(state, first);
1313 return NULL;
1314 }
1315}
1316
1317
1318
1319static URI_INLINE void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state) {
1320 state->uri->absolutePath = URI_TRUE;
1321}
1322
1323
1324
1325/*
1326 * [partHelperTwo]->[pathAbsNoLeadSlash] // can take <NULL>
1327 * [partHelperTwo]-></>[authority][pathAbsEmpty]
1328 */
1329static URI_INLINE const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1330 if (first >= afterLast) {
1331 URI_FUNC(OnExitPartHelperTwo)(state);
1332 return afterLast;
1333 }
1334
1335 switch (*first) {
1336 case _UT('/'):
1337 {
1338 const URI_CHAR * const afterAuthority
1339 = URI_FUNC(ParseAuthority)(state, first + 1, afterLast);
1340 const URI_CHAR * afterPathAbsEmpty;
1341 if (afterAuthority == NULL) {
1342 return NULL;
1343 }
1344 afterPathAbsEmpty = URI_FUNC(ParsePathAbsEmpty)(state, afterAuthority, afterLast);
1345
1346 URI_FUNC(FixEmptyTrailSegment)(state->uri);
1347
1348 return afterPathAbsEmpty;
1349 }
1350
1351 default:
1352 URI_FUNC(OnExitPartHelperTwo)(state);
1353 return URI_FUNC(ParsePathAbsNoLeadSlash)(state, first, afterLast);
1354 }
1355}
1356
1357
1358
1359/*
1360 * [pathAbsEmpty]-></>[segment][pathAbsEmpty]
1361 * [pathAbsEmpty]-><NULL>
1362 */
1363static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1364 if (first >= afterLast) {
1365 return afterLast;
1366 }
1367
1368 switch (*first) {
1369 case _UT('/'):
1370 {
1371 const URI_CHAR * const afterSegment
1372 = URI_FUNC(ParseSegment)(state, first + 1, afterLast);
1373 if (afterSegment == NULL) {
1374 return NULL;
1375 }
1376 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */
1377 URI_FUNC(StopMalloc)(state);
1378 return NULL;
1379 }
1380 return URI_FUNC(ParsePathAbsEmpty)(state, afterSegment, afterLast);
1381 }
1382
1383 default:
1384 return first;
1385 }
1386}
1387
1388
1389
1390/*
1391 * [pathAbsNoLeadSlash]->[segmentNz][zeroMoreSlashSegs]
1392 * [pathAbsNoLeadSlash]-><NULL>
1393 */
1394static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1395 if (first >= afterLast) {
1396 return afterLast;
1397 }
1398
1399 switch (*first) {
1400 case _UT('!'):
1401 case _UT('$'):
1402 case _UT('%'):
1403 case _UT('&'):
1404 case _UT('('):
1405 case _UT(')'):
1406 case _UT('-'):
1407 case _UT('*'):
1408 case _UT(','):
1409 case _UT('.'):
1410 case _UT(':'):
1411 case _UT(';'):
1412 case _UT('@'):
1413 case _UT('\''):
1414 case _UT('_'):
1415 case _UT('~'):
1416 case _UT('+'):
1417 case _UT('='):
1418 case URI_SET_DIGIT:
1419 case URI_SET_ALPHA:
1420 {
1421 const URI_CHAR * const afterSegmentNz
1422 = URI_FUNC(ParseSegmentNz)(state, first, afterLast);
1423 if (afterSegmentNz == NULL) {
1424 return NULL;
1425 }
1426 if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz)) { /* SEGMENT BOTH */
1427 URI_FUNC(StopMalloc)(state);
1428 return NULL;
1429 }
1430 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast);
1431 }
1432
1433 default:
1434 return first;
1435 }
1436}
1437
1438
1439
1440/*
1441 * [pathRootless]->[segmentNz][zeroMoreSlashSegs]
1442 */
1443static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1444 const URI_CHAR * const afterSegmentNz
1445 = URI_FUNC(ParseSegmentNz)(state, first, afterLast);
1446 if (afterSegmentNz == NULL) {
1447 return NULL;
1448 } else {
1449 if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz)) { /* SEGMENT BOTH */
1450 URI_FUNC(StopMalloc)(state);
1451 return NULL;
1452 }
1453 }
1454 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast);
1455}
1456
1457
1458
1459/*
1460 * [pchar]->[pctEncoded]
1461 * [pchar]->[subDelims]
1462 * [pchar]->[unreserved]
1463 * [pchar]-><:>
1464 * [pchar]-><@>
1465 */
1466static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1467 if (first >= afterLast) {
1468 URI_FUNC(StopSyntax)(state, first);
1469 return NULL;
1470 }
1471
1472 switch (*first) {
1473 case _UT('%'):
1474 return URI_FUNC(ParsePctEncoded)(state, first, afterLast);
1475
1476 case _UT(':'):
1477 case _UT('@'):
1478 case _UT('!'):
1479 case _UT('$'):
1480 case _UT('&'):
1481 case _UT('('):
1482 case _UT(')'):
1483 case _UT('*'):
1484 case _UT(','):
1485 case _UT(';'):
1486 case _UT('\''):
1487 case _UT('+'):
1488 case _UT('='):
1489 case _UT('-'):
1490 case _UT('.'):
1491 case _UT('_'):
1492 case _UT('~'):
1493 case URI_SET_DIGIT:
1494 case URI_SET_ALPHA:
1495 return first + 1;
1496
1497 default:
1498 URI_FUNC(StopSyntax)(state, first);
1499 return NULL;
1500 }
1501}
1502
1503
1504
1505/*
1506 * [pctEncoded]-><%>[HEXDIG][HEXDIG]
1507 */
1508static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1509 if (first >= afterLast) {
1510 URI_FUNC(StopSyntax)(state, first);
1511 return NULL;
1512 }
1513
1514 /*
1515 First character has already been
1516 checked before entering this rule.
1517
1518 switch (*first) {
1519 case _UT('%'):
1520 */
1521 if (first + 1 >= afterLast) {
1522 URI_FUNC(StopSyntax)(state, first + 1);
1523 return NULL;
1524 }
1525
1526 switch (first[1]) {
1527 case URI_SET_HEXDIG:
1528 if (first + 2 >= afterLast) {
1529 URI_FUNC(StopSyntax)(state, first + 2);
1530 return NULL;
1531 }
1532
1533 switch (first[2]) {
1534 case URI_SET_HEXDIG:
1535 return first + 3;
1536
1537 default:
1538 URI_FUNC(StopSyntax)(state, first + 2);
1539 return NULL;
1540 }
1541
1542 default:
1543 URI_FUNC(StopSyntax)(state, first + 1);
1544 return NULL;
1545 }
1546
1547 /*
1548 default:
1549 URI_FUNC(StopSyntax)(state, first);
1550 return NULL;
1551 }
1552 */
1553}
1554
1555
1556
1557/*
1558 * [pctSubUnres]->[pctEncoded]
1559 * [pctSubUnres]->[subDelims]
1560 * [pctSubUnres]->[unreserved]
1561 */
1562static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1563 if (first >= afterLast) {
1564 URI_FUNC(StopSyntax)(state, first);
1565 return NULL;
1566 }
1567
1568 switch (*first) {
1569 case _UT('%'):
1570 return URI_FUNC(ParsePctEncoded)(state, first, afterLast);
1571
1572 case _UT('!'):
1573 case _UT('$'):
1574 case _UT('&'):
1575 case _UT('('):
1576 case _UT(')'):
1577 case _UT('*'):
1578 case _UT(','):
1579 case _UT(';'):
1580 case _UT('\''):
1581 case _UT('+'):
1582 case _UT('='):
1583 case _UT('-'):
1584 case _UT('.'):
1585 case _UT('_'):
1586 case _UT('~'):
1587 case URI_SET_DIGIT:
1588 case URI_SET_ALPHA:
1589 return first + 1;
1590
1591 default:
1592 URI_FUNC(StopSyntax)(state, first);
1593 return NULL;
1594 }
1595}
1596
1597
1598
1599/*
1600 * [port]->[DIGIT][port]
1601 * [port]-><NULL>
1602 */
1603static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1604 if (first >= afterLast) {
1605 return afterLast;
1606 }
1607
1608 switch (*first) {
1609 case URI_SET_DIGIT:
1610 return URI_FUNC(ParsePort)(state, first + 1, afterLast);
1611
1612 default:
1613 return first;
1614 }
1615}
1616
1617
1618
1619/*
1620 * [queryFrag]->[pchar][queryFrag]
1621 * [queryFrag]-></>[queryFrag]
1622 * [queryFrag]-><?>[queryFrag]
1623 * [queryFrag]-><NULL>
1624 */
1625static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1626 if (first >= afterLast) {
1627 return afterLast;
1628 }
1629
1630 switch (*first) {
1631 case _UT('!'):
1632 case _UT('$'):
1633 case _UT('%'):
1634 case _UT('&'):
1635 case _UT('('):
1636 case _UT(')'):
1637 case _UT('-'):
1638 case _UT('*'):
1639 case _UT(','):
1640 case _UT('.'):
1641 case _UT(':'):
1642 case _UT(';'):
1643 case _UT('@'):
1644 case _UT('\''):
1645 case _UT('_'):
1646 case _UT('~'):
1647 case _UT('+'):
1648 case _UT('='):
1649 case URI_SET_DIGIT:
1650 case URI_SET_ALPHA:
1651 {
1652 const URI_CHAR * const afterPchar
1653 = URI_FUNC(ParsePchar)(state, first, afterLast);
1654 if (afterPchar == NULL) {
1655 return NULL;
1656 }
1657 return URI_FUNC(ParseQueryFrag)(state, afterPchar, afterLast);
1658 }
1659
1660 case _UT('/'):
1661 case _UT('?'):
1662 return URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast);
1663
1664 default:
1665 return first;
1666 }
1667}
1668
1669
1670
1671/*
1672 * [segment]->[pchar][segment]
1673 * [segment]-><NULL>
1674 */
1675static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1676 if (first >= afterLast) {
1677 return afterLast;
1678 }
1679
1680 switch (*first) {
1681 case _UT('!'):
1682 case _UT('$'):
1683 case _UT('%'):
1684 case _UT('&'):
1685 case _UT('('):
1686 case _UT(')'):
1687 case _UT('-'):
1688 case _UT('*'):
1689 case _UT(','):
1690 case _UT('.'):
1691 case _UT(':'):
1692 case _UT(';'):
1693 case _UT('@'):
1694 case _UT('\''):
1695 case _UT('_'):
1696 case _UT('~'):
1697 case _UT('+'):
1698 case _UT('='):
1699 case URI_SET_DIGIT:
1700 case URI_SET_ALPHA:
1701 {
1702 const URI_CHAR * const afterPchar
1703 = URI_FUNC(ParsePchar)(state, first, afterLast);
1704 if (afterPchar == NULL) {
1705 return NULL;
1706 }
1707 return URI_FUNC(ParseSegment)(state, afterPchar, afterLast);
1708 }
1709
1710 default:
1711 return first;
1712 }
1713}
1714
1715
1716
1717/*
1718 * [segmentNz]->[pchar][segment]
1719 */
1720static URI_INLINE const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1721 const URI_CHAR * const afterPchar
1722 = URI_FUNC(ParsePchar)(state, first, afterLast);
1723 if (afterPchar == NULL) {
1724 return NULL;
1725 }
1726 return URI_FUNC(ParseSegment)(state, afterPchar, afterLast);
1727}
1728
1729
1730
1731static URI_INLINE UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first) {
1732 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */
1733 return URI_FALSE; /* Raises malloc error*/
1734 }
1735 state->uri->scheme.first = NULL; /* Not a scheme, reset */
1736 return URI_TRUE; /* Success */
1737}
1738
1739
1740
1741/*
1742 * [segmentNzNcOrScheme2]->[ALPHA][segmentNzNcOrScheme2]
1743 * [segmentNzNcOrScheme2]->[DIGIT][segmentNzNcOrScheme2]
1744 * [segmentNzNcOrScheme2]->[pctEncoded][mustBeSegmentNzNc]
1745 * [segmentNzNcOrScheme2]->[uriTail] // can take <NULL>
1746 * [segmentNzNcOrScheme2]-><!>[mustBeSegmentNzNc]
1747 * [segmentNzNcOrScheme2]-><$>[mustBeSegmentNzNc]
1748 * [segmentNzNcOrScheme2]-><&>[mustBeSegmentNzNc]
1749 * [segmentNzNcOrScheme2]-><(>[mustBeSegmentNzNc]
1750 * [segmentNzNcOrScheme2]-><)>[mustBeSegmentNzNc]
1751 * [segmentNzNcOrScheme2]-><*>[mustBeSegmentNzNc]
1752 * [segmentNzNcOrScheme2]-><,>[mustBeSegmentNzNc]
1753 * [segmentNzNcOrScheme2]-><.>[segmentNzNcOrScheme2]
1754 * [segmentNzNcOrScheme2]-></>[segment][zeroMoreSlashSegs][uriTail]
1755 * [segmentNzNcOrScheme2]-><:>[hierPart][uriTail]
1756 * [segmentNzNcOrScheme2]-><;>[mustBeSegmentNzNc]
1757 * [segmentNzNcOrScheme2]-><@>[mustBeSegmentNzNc]
1758 * [segmentNzNcOrScheme2]-><_>[mustBeSegmentNzNc]
1759 * [segmentNzNcOrScheme2]-><~>[mustBeSegmentNzNc]
1760 * [segmentNzNcOrScheme2]-><+>[segmentNzNcOrScheme2]
1761 * [segmentNzNcOrScheme2]-><=>[mustBeSegmentNzNc]
1762 * [segmentNzNcOrScheme2]-><'>[mustBeSegmentNzNc]
1763 * [segmentNzNcOrScheme2]-><->[segmentNzNcOrScheme2]
1764 */
1765static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1766 if (first >= afterLast) {
1767 if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first)) {
1768 URI_FUNC(StopMalloc)(state);
1769 return NULL;
1770 }
1771 return afterLast;
1772 }
1773
1774 switch (*first) {
1775 case _UT('.'):
1776 case _UT('+'):
1777 case _UT('-'):
1778 case URI_SET_ALPHA:
1779 case URI_SET_DIGIT:
1780 return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast);
1781
1782 case _UT('%'):
1783 {
1784 const URI_CHAR * const afterPctEncoded
1785 = URI_FUNC(ParsePctEncoded)(state, first, afterLast);
1786 if (afterPctEncoded == NULL) {
1787 return NULL;
1788 }
1789 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast);
1790 }
1791
1792 case _UT('!'):
1793 case _UT('$'):
1794 case _UT('&'):
1795 case _UT('('):
1796 case _UT(')'):
1797 case _UT('*'):
1798 case _UT(','):
1799 case _UT(';'):
1800 case _UT('@'):
1801 case _UT('_'):
1802 case _UT('~'):
1803 case _UT('='):
1804 case _UT('\''):
1805 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast);
1806
1807 case _UT('/'):
1808 {
1809 const URI_CHAR * afterZeroMoreSlashSegs;
1810 const URI_CHAR * const afterSegment
1811 = URI_FUNC(ParseSegment)(state, first + 1, afterLast);
1812 if (afterSegment == NULL) {
1813 return NULL;
1814 }
1815 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first)) { /* SEGMENT BOTH */
1816 URI_FUNC(StopMalloc)(state);
1817 return NULL;
1818 }
1819 state->uri->scheme.first = NULL; /* Not a scheme, reset */
1820 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */
1821 URI_FUNC(StopMalloc)(state);
1822 return NULL;
1823 }
1824 afterZeroMoreSlashSegs
1825 = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast);
1826 if (afterZeroMoreSlashSegs == NULL) {
1827 return NULL;
1828 }
1829 return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast);
1830 }
1831
1832 case _UT(':'):
1833 {
1834 const URI_CHAR * const afterHierPart
1835 = URI_FUNC(ParseHierPart)(state, first + 1, afterLast);
1836 state->uri->scheme.afterLast = first; /* SCHEME END */
1837 if (afterHierPart == NULL) {
1838 return NULL;
1839 }
1840 return URI_FUNC(ParseUriTail)(state, afterHierPart, afterLast);
1841 }
1842
1843 default:
1844 if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first)) {
1845 URI_FUNC(StopMalloc)(state);
1846 return NULL;
1847 }
1848 return URI_FUNC(ParseUriTail)(state, first, afterLast);
1849 }
1850}
1851
1852
1853
1854/*
1855 * [uriReference]->[ALPHA][segmentNzNcOrScheme2]
1856 * [uriReference]->[DIGIT][mustBeSegmentNzNc]
1857 * [uriReference]->[pctEncoded][mustBeSegmentNzNc]
1858 * [uriReference]->[subDelims][mustBeSegmentNzNc]
1859 * [uriReference]->[uriTail] // can take <NULL>
1860 * [uriReference]-><.>[mustBeSegmentNzNc]
1861 * [uriReference]-></>[partHelperTwo][uriTail]
1862 * [uriReference]-><@>[mustBeSegmentNzNc]
1863 * [uriReference]-><_>[mustBeSegmentNzNc]
1864 * [uriReference]-><~>[mustBeSegmentNzNc]
1865 * [uriReference]-><->[mustBeSegmentNzNc]
1866 */
1867static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1868 if (first >= afterLast) {
1869 return afterLast;
1870 }
1871
1872 switch (*first) {
1873 case URI_SET_ALPHA:
1874 state->uri->scheme.first = first; /* SCHEME BEGIN */
1875 return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast);
1876
1877 case URI_SET_DIGIT:
1878 case _UT('!'):
1879 case _UT('$'):
1880 case _UT('&'):
1881 case _UT('('):
1882 case _UT(')'):
1883 case _UT('*'):
1884 case _UT(','):
1885 case _UT(';'):
1886 case _UT('\''):
1887 case _UT('+'):
1888 case _UT('='):
1889 case _UT('.'):
1890 case _UT('_'):
1891 case _UT('~'):
1892 case _UT('-'):
1893 case _UT('@'):
1894 state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */
1895 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast);
1896
1897 case _UT('%'):
1898 {
1899 const URI_CHAR * const afterPctEncoded
1900 = URI_FUNC(ParsePctEncoded)(state, first, afterLast);
1901 if (afterPctEncoded == NULL) {
1902 return NULL;
1903 }
1904 state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */
1905 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast);
1906 }
1907
1908 case _UT('/'):
1909 {
1910 const URI_CHAR * const afterPartHelperTwo
1911 = URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast);
1912 if (afterPartHelperTwo == NULL) {
1913 return NULL;
1914 }
1915 return URI_FUNC(ParseUriTail)(state, afterPartHelperTwo, afterLast);
1916 }
1917
1918 default:
1919 return URI_FUNC(ParseUriTail)(state, first, afterLast);
1920 }
1921}
1922
1923
1924
1925/*
1926 * [uriTail]-><#>[queryFrag]
1927 * [uriTail]-><?>[queryFrag][uriTailTwo]
1928 * [uriTail]-><NULL>
1929 */
1930static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1931 if (first >= afterLast) {
1932 return afterLast;
1933 }
1934
1935 switch (*first) {
1936 case _UT('#'):
1937 {
1938 const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast);
1939 if (afterQueryFrag == NULL) {
1940 return NULL;
1941 }
1942 state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */
1943 state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */
1944 return afterQueryFrag;
1945 }
1946
1947 case _UT('?'):
1948 {
1949 const URI_CHAR * const afterQueryFrag
1950 = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast);
1951 if (afterQueryFrag == NULL) {
1952 return NULL;
1953 }
1954 state->uri->query.first = first + 1; /* QUERY BEGIN */
1955 state->uri->query.afterLast = afterQueryFrag; /* QUERY END */
1956 return URI_FUNC(ParseUriTailTwo)(state, afterQueryFrag, afterLast);
1957 }
1958
1959 default:
1960 return first;
1961 }
1962}
1963
1964
1965
1966/*
1967 * [uriTailTwo]-><#>[queryFrag]
1968 * [uriTailTwo]-><NULL>
1969 */
1970static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1971 if (first >= afterLast) {
1972 return afterLast;
1973 }
1974
1975 switch (*first) {
1976 case _UT('#'):
1977 {
1978 const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast);
1979 if (afterQueryFrag == NULL) {
1980 return NULL;
1981 }
1982 state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */
1983 state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */
1984 return afterQueryFrag;
1985 }
1986
1987 default:
1988 return first;
1989 }
1990}
1991
1992
1993
1994/*
1995 * [zeroMoreSlashSegs]-></>[segment][zeroMoreSlashSegs]
1996 * [zeroMoreSlashSegs]-><NULL>
1997 */
1998static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1999 if (first >= afterLast) {
2000 return afterLast;
2001 }
2002
2003 switch (*first) {
2004 case _UT('/'):
2005 {
2006 const URI_CHAR * const afterSegment
2007 = URI_FUNC(ParseSegment)(state, first + 1, afterLast);
2008 if (afterSegment == NULL) {
2009 return NULL;
2010 }
2011 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment)) { /* SEGMENT BOTH */
2012 URI_FUNC(StopMalloc)(state);
2013 return NULL;
2014 }
2015 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast);
2016 }
2017
2018 default:
2019 return first;
2020 }
2021}
2022
2023
2024
2025static URI_INLINE void URI_FUNC(ResetParserState)(URI_TYPE(ParserState) * state) {
2026 URI_TYPE(Uri) * const uriBackup = state->uri;
2027 memset(state, 0, sizeof(URI_TYPE(ParserState)));
2028 state->uri = uriBackup;
2029}
2030
2031
2032
2033static URI_INLINE UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
2034 URI_TYPE(PathSegment) * segment = malloc(1 * sizeof(URI_TYPE(PathSegment)));
2035 if (segment == NULL) {
2036 return URI_FALSE; /* Raises malloc error */
2037 }
2038 memset(segment, 0, sizeof(URI_TYPE(PathSegment)));
2039 if (first == afterLast) {
2040 segment->text.first = URI_FUNC(SafeToPointTo);
2041 segment->text.afterLast = URI_FUNC(SafeToPointTo);
2042 } else {
2043 segment->text.first = first;
2044 segment->text.afterLast = afterLast;
2045 }
2046
2047 /* First segment ever? */
2048 if (state->uri->pathHead == NULL) {
2049 /* First segment ever, set head and tail */
2050 state->uri->pathHead = segment;
2051 state->uri->pathTail = segment;
2052 } else {
2053 /* Append, update tail */
2054 state->uri->pathTail->next = segment;
2055 state->uri->pathTail = segment;
2056 }
2057
2058 return URI_TRUE; /* Success */
2059}
2060
2061
2062
2063int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
2064 const URI_CHAR * afterUriReference;
2065 URI_TYPE(Uri) * uri;
2066
2067 /* Check params */
2068 if ((state == NULL) || (first == NULL) || (afterLast == NULL)) {
2069 return URI_ERROR_NULL;
2070 }
2071 uri = state->uri;
2072
2073 /* Init parser */
2074 URI_FUNC(ResetParserState)(state);
2075 URI_FUNC(ResetUri)(uri);
2076
2077 /* Parse */
2078 afterUriReference = URI_FUNC(ParseUriReference)(state, first, afterLast);
2079 if (afterUriReference == NULL) {
2080 return state->errorCode;
2081 }
2082 if (afterUriReference != afterLast) {
2083 URI_FUNC(StopSyntax)(state, afterUriReference);
2084 return state->errorCode;
2085 }
2086 return URI_SUCCESS;
2087}
2088
2089
2090
2091int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, const URI_CHAR * text) {
2092 if ((state == NULL) || (text == NULL)) {
2093 return URI_ERROR_NULL;
2094 }
2095 return URI_FUNC(ParseUriEx)(state, text, text + URI_STRLEN(text));
2096}
2097
2098
2099
2100void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri) {
2101 if (uri == NULL) {
2102 return;
2103 }
2104
2105 if (uri->owner) {
2106 /* Scheme */
2107 if (uri->scheme.first != NULL) {
2108 if (uri->scheme.first != uri->scheme.afterLast) {
2109 free((URI_CHAR *)uri->scheme.first);
2110 }
2111 uri->scheme.first = NULL;
2112 uri->scheme.afterLast = NULL;
2113 }
2114
2115 /* User info */
2116 if (uri->userInfo.first != NULL) {
2117 if (uri->userInfo.first != uri->userInfo.afterLast) {
2118 free((URI_CHAR *)uri->userInfo.first);
2119 }
2120 uri->userInfo.first = NULL;
2121 uri->userInfo.afterLast = NULL;
2122 }
2123
2124 /* Host data - IPvFuture */
2125 if (uri->hostData.ipFuture.first != NULL) {
2126 if (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast) {
2127 free((URI_CHAR *)uri->hostData.ipFuture.first);
2128 }
2129 uri->hostData.ipFuture.first = NULL;
2130 uri->hostData.ipFuture.afterLast = NULL;
2131 uri->hostText.first = NULL;
2132 uri->hostText.afterLast = NULL;
2133 }
2134
2135 /* Host text (if regname, after IPvFuture!) */
2136 if ((uri->hostText.first != NULL)
2137 && (uri->hostData.ip4 == NULL)
2138 && (uri->hostData.ip6 == NULL)) {
2139 /* Real regname */
2140 if (uri->hostText.first != uri->hostText.afterLast) {
2141 free((URI_CHAR *)uri->hostText.first);
2142 }
2143 uri->hostText.first = NULL;
2144 uri->hostText.afterLast = NULL;
2145 }
2146 }
2147
2148 /* Host data - IPv4 */
2149 if (uri->hostData.ip4 != NULL) {
2150 free(uri->hostData.ip4);
2151 uri->hostData.ip4 = NULL;
2152 }
2153
2154 /* Host data - IPv6 */
2155 if (uri->hostData.ip6 != NULL) {
2156 free(uri->hostData.ip6);
2157 uri->hostData.ip6 = NULL;
2158 }
2159
2160 /* Port text */
2161 if (uri->owner && (uri->portText.first != NULL)) {
2162 if (uri->portText.first != uri->portText.afterLast) {
2163 free((URI_CHAR *)uri->portText.first);
2164 }
2165 uri->portText.first = NULL;
2166 uri->portText.afterLast = NULL;
2167 }
2168
2169 /* Path */
2170 if (uri->pathHead != NULL) {
2171 URI_TYPE(PathSegment) * segWalk = uri->pathHead;
2172 while (segWalk != NULL) {
2173 URI_TYPE(PathSegment) * const next = segWalk->next;
2174 if (uri->owner && (segWalk->text.first != NULL)
2175 && (segWalk->text.first < segWalk->text.afterLast)) {
2176 free((URI_CHAR *)segWalk->text.first);
2177 }
2178 free(segWalk);
2179 segWalk = next;
2180 }
2181 uri->pathHead = NULL;
2182 uri->pathTail = NULL;
2183 }
2184
2185 if (uri->owner) {
2186 /* Query */
2187 if (uri->query.first != NULL) {
2188 if (uri->query.first != uri->query.afterLast) {
2189 free((URI_CHAR *)uri->query.first);
2190 }
2191 uri->query.first = NULL;
2192 uri->query.afterLast = NULL;
2193 }
2194
2195 /* Fragment */
2196 if (uri->fragment.first != NULL) {
2197 if (uri->fragment.first != uri->fragment.afterLast) {
2198 free((URI_CHAR *)uri->fragment.first);
2199 }
2200 uri->fragment.first = NULL;
2201 uri->fragment.afterLast = NULL;
2202 }
2203 }
2204}
2205
2206
2207
2208UriBool URI_FUNC(_TESTING_ONLY_ParseIpSix)(const URI_CHAR * text) {
2209 URI_TYPE(Uri) uri;
2210 URI_TYPE(ParserState) parser;
2211 const URI_CHAR * const afterIpSix = text + URI_STRLEN(text);
2212 const URI_CHAR * res;
2213
2214 URI_FUNC(ResetParserState)(&parser);
2215 URI_FUNC(ResetUri)(&uri);
2216 parser.uri = &uri;
2217 parser.uri->hostData.ip6 = malloc(1 * sizeof(UriIp6));
2218 res = URI_FUNC(ParseIPv6address2)(&parser, text, afterIpSix);
2219 URI_FUNC(FreeUriMembers)(&uri);
2220 return res == afterIpSix ? URI_TRUE : URI_FALSE;
2221}
2222
2223
2224
2225UriBool URI_FUNC(_TESTING_ONLY_ParseIpFour)(const URI_CHAR * text) {
2226 unsigned char octets[4];
2227 int res = URI_FUNC(ParseIpFourAddress)(octets, text, text + URI_STRLEN(text));
2228 return (res == URI_SUCCESS) ? URI_TRUE : URI_FALSE;
2229}
2230
2231
2232
2233#undef URI_SET_DIGIT
2234#undef URI_SET_HEX_LETTER_UPPER
2235#undef URI_SET_HEX_LETTER_LOWER
2236#undef URI_SET_HEXDIG
2237#undef URI_SET_ALPHA
2238
2239
2240
2241#endif