diff options
Diffstat (limited to 'plugins/uriparser/UriNormalize.c')
-rw-r--r-- | plugins/uriparser/UriNormalize.c | 728 |
1 files changed, 728 insertions, 0 deletions
diff --git a/plugins/uriparser/UriNormalize.c b/plugins/uriparser/UriNormalize.c new file mode 100644 index 0000000..49db9ff --- /dev/null +++ b/plugins/uriparser/UriNormalize.c | |||
@@ -0,0 +1,728 @@ | |||
1 | /* | ||
2 | * uriparser - RFC 3986 URI parsing library | ||
3 | * | ||
4 | * Copyright (C) 2007, Weijia Song <songweijia@gmail.com> | ||
5 | * Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org> | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * Redistribution and use in source and binary forms, with or without | ||
9 | * modification, are permitted provided that the following conditions | ||
10 | * are met: | ||
11 | * | ||
12 | * * Redistributions of source code must retain the above | ||
13 | * copyright notice, this list of conditions and the following | ||
14 | * disclaimer. | ||
15 | * | ||
16 | * * Redistributions in binary form must reproduce the above | ||
17 | * copyright notice, this list of conditions and the following | ||
18 | * disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the <ORGANIZATION> nor the names of its | ||
22 | * contributors may be used to endorse or promote products | ||
23 | * derived from this software without specific prior written | ||
24 | * permission. | ||
25 | * | ||
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
29 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
30 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
32 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
33 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
35 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
36 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
37 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | /** | ||
41 | * @file UriNormalize.c | ||
42 | * Holds the RFC 3986 %URI normalization implementation. | ||
43 | * NOTE: This source file includes itself twice. | ||
44 | */ | ||
45 | |||
46 | /* What encodings are enabled? */ | ||
47 | #include <uriparser/UriDefsConfig.h> | ||
48 | #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) | ||
49 | /* Include SELF twice */ | ||
50 | # ifdef URI_ENABLE_ANSI | ||
51 | # define URI_PASS_ANSI 1 | ||
52 | # include "UriNormalize.c" | ||
53 | # undef URI_PASS_ANSI | ||
54 | # endif | ||
55 | # ifdef URI_ENABLE_UNICODE | ||
56 | # define URI_PASS_UNICODE 1 | ||
57 | # include "UriNormalize.c" | ||
58 | # undef URI_PASS_UNICODE | ||
59 | # endif | ||
60 | #else | ||
61 | # ifdef URI_PASS_ANSI | ||
62 | # include <uriparser/UriDefsAnsi.h> | ||
63 | # else | ||
64 | # include <uriparser/UriDefsUnicode.h> | ||
65 | # include <wchar.h> | ||
66 | # endif | ||
67 | |||
68 | |||
69 | |||
70 | #ifndef URI_DOXYGEN | ||
71 | # include <uriparser/Uri.h> | ||
72 | # include "UriNormalizeBase.h" | ||
73 | # include "UriCommon.h" | ||
74 | #endif | ||
75 | |||
76 | |||
77 | |||
78 | static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, | ||
79 | unsigned int * outMask); | ||
80 | |||
81 | static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, | ||
82 | unsigned int maskTest, URI_TYPE(TextRange) * range); | ||
83 | static UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri, | ||
84 | unsigned int * doneMask); | ||
85 | |||
86 | static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, | ||
87 | const URI_CHAR ** afterLast); | ||
88 | static UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, | ||
89 | const URI_CHAR ** afterLast); | ||
90 | static void URI_FUNC(FixPercentEncodingEngine)( | ||
91 | const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, | ||
92 | const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast); | ||
93 | |||
94 | static UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, | ||
95 | const URI_CHAR * afterLast); | ||
96 | static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, | ||
97 | const URI_CHAR * afterLast); | ||
98 | |||
99 | static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, | ||
100 | const URI_CHAR * afterLast); | ||
101 | static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, | ||
102 | const URI_CHAR ** afterLast); | ||
103 | |||
104 | static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, | ||
105 | unsigned int revertMask); | ||
106 | |||
107 | |||
108 | |||
109 | static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, | ||
110 | unsigned int revertMask) { | ||
111 | if (revertMask & URI_NORMALIZE_SCHEME) { | ||
112 | free((URI_CHAR *)uri->scheme.first); | ||
113 | uri->scheme.first = NULL; | ||
114 | uri->scheme.afterLast = NULL; | ||
115 | } | ||
116 | |||
117 | if (revertMask & URI_NORMALIZE_USER_INFO) { | ||
118 | free((URI_CHAR *)uri->userInfo.first); | ||
119 | uri->userInfo.first = NULL; | ||
120 | uri->userInfo.afterLast = NULL; | ||
121 | } | ||
122 | |||
123 | if (revertMask & URI_NORMALIZE_HOST) { | ||
124 | if (uri->hostData.ipFuture.first != NULL) { | ||
125 | /* IPvFuture */ | ||
126 | free((URI_CHAR *)uri->hostData.ipFuture.first); | ||
127 | uri->hostData.ipFuture.first = NULL; | ||
128 | uri->hostData.ipFuture.afterLast = NULL; | ||
129 | uri->hostText.first = NULL; | ||
130 | uri->hostText.afterLast = NULL; | ||
131 | } else if ((uri->hostText.first != NULL) | ||
132 | && (uri->hostData.ip4 == NULL) | ||
133 | && (uri->hostData.ip6 == NULL)) { | ||
134 | /* Regname */ | ||
135 | free((URI_CHAR *)uri->hostText.first); | ||
136 | uri->hostText.first = NULL; | ||
137 | uri->hostText.afterLast = NULL; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* NOTE: Port cannot happen! */ | ||
142 | |||
143 | if (revertMask & URI_NORMALIZE_PATH) { | ||
144 | URI_TYPE(PathSegment) * walker = uri->pathHead; | ||
145 | while (walker != NULL) { | ||
146 | URI_TYPE(PathSegment) * const next = walker->next; | ||
147 | if (walker->text.afterLast > walker->text.first) { | ||
148 | free((URI_CHAR *)walker->text.first); | ||
149 | } | ||
150 | free(walker); | ||
151 | walker = next; | ||
152 | } | ||
153 | uri->pathHead = NULL; | ||
154 | uri->pathTail = NULL; | ||
155 | } | ||
156 | |||
157 | if (revertMask & URI_NORMALIZE_QUERY) { | ||
158 | free((URI_CHAR *)uri->query.first); | ||
159 | uri->query.first = NULL; | ||
160 | uri->query.afterLast = NULL; | ||
161 | } | ||
162 | |||
163 | if (revertMask & URI_NORMALIZE_FRAGMENT) { | ||
164 | free((URI_CHAR *)uri->fragment.first); | ||
165 | uri->fragment.first = NULL; | ||
166 | uri->fragment.afterLast = NULL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | |||
171 | |||
172 | static URI_INLINE UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first, | ||
173 | const URI_CHAR * afterLast) { | ||
174 | if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { | ||
175 | const URI_CHAR * i = first; | ||
176 | for (; i < afterLast; i++) { | ||
177 | /* 6.2.2.1 Case Normalization: uppercase letters in scheme or host */ | ||
178 | if ((*i >= _UT('A')) && (*i <= _UT('Z'))) { | ||
179 | return URI_TRUE; | ||
180 | } | ||
181 | } | ||
182 | } | ||
183 | return URI_FALSE; | ||
184 | } | ||
185 | |||
186 | |||
187 | |||
188 | static URI_INLINE UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first, | ||
189 | const URI_CHAR * afterLast) { | ||
190 | if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { | ||
191 | const URI_CHAR * i = first; | ||
192 | for (; i + 2 < afterLast; i++) { | ||
193 | if (i[0] == _UT('%')) { | ||
194 | /* 6.2.2.1 Case Normalization: * | ||
195 | * lowercase percent-encodings */ | ||
196 | if (((i[1] >= _UT('a')) && (i[1] <= _UT('f'))) | ||
197 | || ((i[2] >= _UT('a')) && (i[2] <= _UT('f')))) { | ||
198 | return URI_TRUE; | ||
199 | } else { | ||
200 | /* 6.2.2.2 Percent-Encoding Normalization: * | ||
201 | * percent-encoded unreserved characters */ | ||
202 | const unsigned char left = URI_FUNC(HexdigToInt)(i[1]); | ||
203 | const unsigned char right = URI_FUNC(HexdigToInt)(i[2]); | ||
204 | const int code = 16 * left + right; | ||
205 | if (uriIsUnreserved(code)) { | ||
206 | return URI_TRUE; | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | return URI_FALSE; | ||
213 | } | ||
214 | |||
215 | |||
216 | |||
217 | static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first, | ||
218 | const URI_CHAR * afterLast) { | ||
219 | if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) { | ||
220 | URI_CHAR * i = (URI_CHAR *)first; | ||
221 | const int lowerUpperDiff = (_UT('a') - _UT('A')); | ||
222 | for (; i < afterLast; i++) { | ||
223 | if ((*i >= _UT('A')) && (*i <=_UT('Z'))) { | ||
224 | *i = (URI_CHAR)(*i + lowerUpperDiff); | ||
225 | } | ||
226 | } | ||
227 | } | ||
228 | } | ||
229 | |||
230 | |||
231 | |||
232 | static URI_INLINE UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, | ||
233 | const URI_CHAR ** afterLast) { | ||
234 | int lenInChars; | ||
235 | const int lowerUpperDiff = (_UT('a') - _UT('A')); | ||
236 | URI_CHAR * buffer; | ||
237 | int i = 0; | ||
238 | |||
239 | if ((first == NULL) || (afterLast == NULL) || (*first == NULL) | ||
240 | || (*afterLast == NULL)) { | ||
241 | return URI_FALSE; | ||
242 | } | ||
243 | |||
244 | lenInChars = (int)(*afterLast - *first); | ||
245 | if (lenInChars == 0) { | ||
246 | return URI_TRUE; | ||
247 | } else if (lenInChars < 0) { | ||
248 | return URI_FALSE; | ||
249 | } | ||
250 | |||
251 | buffer = malloc(lenInChars * sizeof(URI_CHAR)); | ||
252 | if (buffer == NULL) { | ||
253 | return URI_FALSE; | ||
254 | } | ||
255 | |||
256 | for (; i < lenInChars; i++) { | ||
257 | if (((*first)[i] >= _UT('A')) && ((*first)[i] <=_UT('Z'))) { | ||
258 | buffer[i] = (URI_CHAR)((*first)[i] + lowerUpperDiff); | ||
259 | } else { | ||
260 | buffer[i] = (*first)[i]; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | *first = buffer; | ||
265 | *afterLast = buffer + lenInChars; | ||
266 | return URI_TRUE; | ||
267 | } | ||
268 | |||
269 | |||
270 | |||
271 | /* NOTE: Implementation must stay inplace-compatible */ | ||
272 | static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)( | ||
273 | const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, | ||
274 | const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast) { | ||
275 | URI_CHAR * write = (URI_CHAR *)outFirst; | ||
276 | const int lenInChars = (int)(inAfterLast - inFirst); | ||
277 | int i = 0; | ||
278 | |||
279 | /* All but last two */ | ||
280 | for (; i + 2 < lenInChars; i++) { | ||
281 | if (inFirst[i] != _UT('%')) { | ||
282 | write[0] = inFirst[i]; | ||
283 | write++; | ||
284 | } else { | ||
285 | /* 6.2.2.2 Percent-Encoding Normalization: * | ||
286 | * percent-encoded unreserved characters */ | ||
287 | const URI_CHAR one = inFirst[i + 1]; | ||
288 | const URI_CHAR two = inFirst[i + 2]; | ||
289 | const unsigned char left = URI_FUNC(HexdigToInt)(one); | ||
290 | const unsigned char right = URI_FUNC(HexdigToInt)(two); | ||
291 | const int code = 16 * left + right; | ||
292 | if (uriIsUnreserved(code)) { | ||
293 | write[0] = (URI_CHAR)(code); | ||
294 | write++; | ||
295 | } else { | ||
296 | /* 6.2.2.1 Case Normalization: * | ||
297 | * lowercase percent-encodings */ | ||
298 | write[0] = _UT('%'); | ||
299 | write[1] = URI_FUNC(HexToLetter)(left); | ||
300 | write[2] = URI_FUNC(HexToLetter)(right); | ||
301 | write += 3; | ||
302 | } | ||
303 | |||
304 | i += 2; /* For the two chars of the percent group we just ate */ | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* Last two */ | ||
309 | for (; i < lenInChars; i++) { | ||
310 | write[0] = inFirst[i]; | ||
311 | write++; | ||
312 | } | ||
313 | |||
314 | *outAfterLast = write; | ||
315 | } | ||
316 | |||
317 | |||
318 | |||
319 | static URI_INLINE void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first, | ||
320 | const URI_CHAR ** afterLast) { | ||
321 | /* Death checks */ | ||
322 | if ((first == NULL) || (afterLast == NULL) || (*afterLast == NULL)) { | ||
323 | return; | ||
324 | } | ||
325 | |||
326 | /* Fix inplace */ | ||
327 | URI_FUNC(FixPercentEncodingEngine)(first, *afterLast, first, afterLast); | ||
328 | } | ||
329 | |||
330 | |||
331 | |||
332 | static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first, | ||
333 | const URI_CHAR ** afterLast) { | ||
334 | int lenInChars; | ||
335 | URI_CHAR * buffer; | ||
336 | |||
337 | /* Death checks */ | ||
338 | if ((first == NULL) || (afterLast == NULL) | ||
339 | || (*first == NULL) || (*afterLast == NULL)) { | ||
340 | return URI_FALSE; | ||
341 | } | ||
342 | |||
343 | /* Old text length */ | ||
344 | lenInChars = (int)(*afterLast - *first); | ||
345 | if (lenInChars == 0) { | ||
346 | return URI_TRUE; | ||
347 | } else if (lenInChars < 0) { | ||
348 | return URI_FALSE; | ||
349 | } | ||
350 | |||
351 | /* New buffer */ | ||
352 | buffer = malloc(lenInChars * sizeof(URI_CHAR)); | ||
353 | if (buffer == NULL) { | ||
354 | return URI_FALSE; | ||
355 | } | ||
356 | |||
357 | /* Fix on copy */ | ||
358 | URI_FUNC(FixPercentEncodingEngine)(*first, *afterLast, buffer, afterLast); | ||
359 | *first = buffer; | ||
360 | return URI_TRUE; | ||
361 | } | ||
362 | |||
363 | |||
364 | |||
365 | static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, | ||
366 | unsigned int maskTest, URI_TYPE(TextRange) * range) { | ||
367 | if (((*doneMask & maskTest) == 0) | ||
368 | && (range->first != NULL) | ||
369 | && (range->afterLast != NULL) | ||
370 | && (range->afterLast > range->first)) { | ||
371 | const int lenInChars = (int)(range->afterLast - range->first); | ||
372 | const int lenInBytes = lenInChars * sizeof(URI_CHAR); | ||
373 | URI_CHAR * dup = malloc(lenInBytes); | ||
374 | if (dup == NULL) { | ||
375 | return URI_FALSE; /* Raises malloc error */ | ||
376 | } | ||
377 | memcpy(dup, range->first, lenInBytes); | ||
378 | range->first = dup; | ||
379 | range->afterLast = dup + lenInChars; | ||
380 | *doneMask |= maskTest; | ||
381 | } | ||
382 | return URI_TRUE; | ||
383 | } | ||
384 | |||
385 | |||
386 | |||
387 | static URI_INLINE UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri, | ||
388 | unsigned int * doneMask) { | ||
389 | URI_TYPE(PathSegment) * walker = uri->pathHead; | ||
390 | if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME, | ||
391 | &(uri->scheme)) | ||
392 | || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO, | ||
393 | &(uri->userInfo)) | ||
394 | || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY, | ||
395 | &(uri->query)) | ||
396 | || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT, | ||
397 | &(uri->fragment))) { | ||
398 | return URI_FALSE; /* Raises malloc error */ | ||
399 | } | ||
400 | |||
401 | /* Host */ | ||
402 | if ((*doneMask & URI_NORMALIZE_HOST) == 0) { | ||
403 | if ((uri->hostData.ip4 == NULL) | ||
404 | && (uri->hostData.ip6 == NULL)) { | ||
405 | if (uri->hostData.ipFuture.first != NULL) { | ||
406 | /* IPvFuture */ | ||
407 | if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, | ||
408 | &(uri->hostData.ipFuture))) { | ||
409 | return URI_FALSE; /* Raises malloc error */ | ||
410 | } | ||
411 | uri->hostText.first = uri->hostData.ipFuture.first; | ||
412 | uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; | ||
413 | } else if (uri->hostText.first != NULL) { | ||
414 | /* Regname */ | ||
415 | if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST, | ||
416 | &(uri->hostText))) { | ||
417 | return URI_FALSE; /* Raises malloc error */ | ||
418 | } | ||
419 | } | ||
420 | } | ||
421 | } | ||
422 | |||
423 | /* Path */ | ||
424 | if ((*doneMask & URI_NORMALIZE_PATH) == 0) { | ||
425 | while (walker != NULL) { | ||
426 | if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text))) { | ||
427 | /* Free allocations done so far and kill path */ | ||
428 | |||
429 | /* Kill path to one before walker (if any) */ | ||
430 | URI_TYPE(PathSegment) * ranger = uri->pathHead; | ||
431 | while (ranger != walker) { | ||
432 | URI_TYPE(PathSegment) * const next = ranger->next; | ||
433 | if ((ranger->text.first != NULL) | ||
434 | && (ranger->text.afterLast != NULL) | ||
435 | && (ranger->text.afterLast > ranger->text.first)) { | ||
436 | free((URI_CHAR *)ranger->text.first); | ||
437 | free(ranger); | ||
438 | } | ||
439 | ranger = next; | ||
440 | } | ||
441 | |||
442 | /* Kill path from walker */ | ||
443 | while (walker != NULL) { | ||
444 | URI_TYPE(PathSegment) * const next = walker->next; | ||
445 | free(walker); | ||
446 | walker = next; | ||
447 | } | ||
448 | |||
449 | uri->pathHead = NULL; | ||
450 | uri->pathTail = NULL; | ||
451 | return URI_FALSE; /* Raises malloc error */ | ||
452 | } | ||
453 | walker = walker->next; | ||
454 | } | ||
455 | *doneMask |= URI_NORMALIZE_PATH; | ||
456 | } | ||
457 | |||
458 | /* Port text, must come last so we don't have to undo that one if it fails. * | ||
459 | * Otherwise we would need and extra enum flag for it although the port * | ||
460 | * cannot go unnormalized... */ | ||
461 | if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText))) { | ||
462 | return URI_FALSE; /* Raises malloc error */ | ||
463 | } | ||
464 | |||
465 | return URI_TRUE; | ||
466 | } | ||
467 | |||
468 | |||
469 | |||
470 | unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri) { | ||
471 | unsigned int res; | ||
472 | #if defined(__GNUC__) && ((__GNUC__ > 4) \ | ||
473 | || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2))) | ||
474 | /* Slower code that fixes a warning, not sure if this is a smart idea */ | ||
475 | URI_TYPE(Uri) writeableClone; | ||
476 | memcpy(&writeableClone, uri, 1 * sizeof(URI_TYPE(Uri))); | ||
477 | URI_FUNC(NormalizeSyntaxEngine)(&writeableClone, 0, &res); | ||
478 | #else | ||
479 | URI_FUNC(NormalizeSyntaxEngine)((URI_TYPE(Uri) *)uri, 0, &res); | ||
480 | #endif | ||
481 | return res; | ||
482 | } | ||
483 | |||
484 | |||
485 | |||
486 | int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask) { | ||
487 | return URI_FUNC(NormalizeSyntaxEngine)(uri, mask, NULL); | ||
488 | } | ||
489 | |||
490 | |||
491 | |||
492 | int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) { | ||
493 | return URI_FUNC(NormalizeSyntaxEx)(uri, (unsigned int)-1); | ||
494 | } | ||
495 | |||
496 | |||
497 | |||
498 | static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, unsigned int * outMask) { | ||
499 | unsigned int doneMask = URI_NORMALIZED; | ||
500 | if (uri == NULL) { | ||
501 | if (outMask != NULL) { | ||
502 | *outMask = URI_NORMALIZED; | ||
503 | return URI_SUCCESS; | ||
504 | } else { | ||
505 | return URI_ERROR_NULL; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | if (outMask != NULL) { | ||
510 | /* Reset mask */ | ||
511 | *outMask = URI_NORMALIZED; | ||
512 | } else if (inMask == URI_NORMALIZED) { | ||
513 | /* Nothing to do */ | ||
514 | return URI_SUCCESS; | ||
515 | } | ||
516 | |||
517 | /* Scheme, host */ | ||
518 | if (outMask != NULL) { | ||
519 | const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)( | ||
520 | uri->scheme.first, uri->scheme.afterLast); | ||
521 | const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)( | ||
522 | uri->hostText.first, uri->hostText.afterLast); | ||
523 | if (normalizeScheme) { | ||
524 | *outMask |= URI_NORMALIZE_SCHEME; | ||
525 | } | ||
526 | |||
527 | if (normalizeHostCase) { | ||
528 | *outMask |= URI_NORMALIZE_HOST; | ||
529 | } else { | ||
530 | const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)( | ||
531 | uri->hostText.first, uri->hostText.afterLast); | ||
532 | if (normalizeHostPrecent) { | ||
533 | *outMask |= URI_NORMALIZE_HOST; | ||
534 | } | ||
535 | } | ||
536 | } else { | ||
537 | /* Scheme */ | ||
538 | if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) { | ||
539 | if (uri->owner) { | ||
540 | URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast); | ||
541 | } else { | ||
542 | if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast))) { | ||
543 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
544 | return URI_ERROR_MALLOC; | ||
545 | } | ||
546 | doneMask |= URI_NORMALIZE_SCHEME; | ||
547 | } | ||
548 | } | ||
549 | |||
550 | /* Host */ | ||
551 | if (inMask & URI_NORMALIZE_HOST) { | ||
552 | if (uri->hostData.ipFuture.first != NULL) { | ||
553 | /* IPvFuture */ | ||
554 | if (uri->owner) { | ||
555 | URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first, | ||
556 | uri->hostData.ipFuture.afterLast); | ||
557 | } else { | ||
558 | if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first), | ||
559 | &(uri->hostData.ipFuture.afterLast))) { | ||
560 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
561 | return URI_ERROR_MALLOC; | ||
562 | } | ||
563 | doneMask |= URI_NORMALIZE_HOST; | ||
564 | } | ||
565 | uri->hostText.first = uri->hostData.ipFuture.first; | ||
566 | uri->hostText.afterLast = uri->hostData.ipFuture.afterLast; | ||
567 | } else if ((uri->hostText.first != NULL) | ||
568 | && (uri->hostData.ip4 == NULL) | ||
569 | && (uri->hostData.ip6 == NULL)) { | ||
570 | /* Regname */ | ||
571 | if (uri->owner) { | ||
572 | URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first, | ||
573 | &(uri->hostText.afterLast)); | ||
574 | } else { | ||
575 | if (!URI_FUNC(FixPercentEncodingMalloc)( | ||
576 | &(uri->hostText.first), | ||
577 | &(uri->hostText.afterLast))) { | ||
578 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
579 | return URI_ERROR_MALLOC; | ||
580 | } | ||
581 | doneMask |= URI_NORMALIZE_HOST; | ||
582 | } | ||
583 | |||
584 | URI_FUNC(LowercaseInplace)(uri->hostText.first, | ||
585 | uri->hostText.afterLast); | ||
586 | } | ||
587 | } | ||
588 | } | ||
589 | |||
590 | /* User info */ | ||
591 | if (outMask != NULL) { | ||
592 | const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)( | ||
593 | uri->userInfo.first, uri->userInfo.afterLast); | ||
594 | if (normalizeUserInfo) { | ||
595 | *outMask |= URI_NORMALIZE_USER_INFO; | ||
596 | } | ||
597 | } else { | ||
598 | if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) { | ||
599 | if (uri->owner) { | ||
600 | URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast)); | ||
601 | } else { | ||
602 | if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first), | ||
603 | &(uri->userInfo.afterLast))) { | ||
604 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
605 | return URI_ERROR_MALLOC; | ||
606 | } | ||
607 | doneMask |= URI_NORMALIZE_USER_INFO; | ||
608 | } | ||
609 | } | ||
610 | } | ||
611 | |||
612 | /* Path */ | ||
613 | if (outMask != NULL) { | ||
614 | const URI_TYPE(PathSegment) * walker = uri->pathHead; | ||
615 | while (walker != NULL) { | ||
616 | const URI_CHAR * const first = walker->text.first; | ||
617 | const URI_CHAR * const afterLast = walker->text.afterLast; | ||
618 | if ((first != NULL) | ||
619 | && (afterLast != NULL) | ||
620 | && (afterLast > first) | ||
621 | && ( | ||
622 | (((afterLast - first) == 1) | ||
623 | && (first[0] == _UT('.'))) | ||
624 | || | ||
625 | (((afterLast - first) == 2) | ||
626 | && (first[0] == _UT('.')) | ||
627 | && (first[1] == _UT('.'))) | ||
628 | || | ||
629 | URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast) | ||
630 | )) { | ||
631 | *outMask |= URI_NORMALIZE_PATH; | ||
632 | break; | ||
633 | } | ||
634 | walker = walker->next; | ||
635 | } | ||
636 | } else if (inMask & URI_NORMALIZE_PATH) { | ||
637 | URI_TYPE(PathSegment) * walker; | ||
638 | const UriBool relative = ((uri->scheme.first == NULL) | ||
639 | && !uri->absolutePath) ? URI_TRUE : URI_FALSE; | ||
640 | |||
641 | /* Fix percent-encoding for each segment */ | ||
642 | walker = uri->pathHead; | ||
643 | if (uri->owner) { | ||
644 | while (walker != NULL) { | ||
645 | URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast)); | ||
646 | walker = walker->next; | ||
647 | } | ||
648 | } else { | ||
649 | while (walker != NULL) { | ||
650 | if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first), | ||
651 | &(walker->text.afterLast))) { | ||
652 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
653 | return URI_ERROR_MALLOC; | ||
654 | } | ||
655 | walker = walker->next; | ||
656 | } | ||
657 | doneMask |= URI_NORMALIZE_PATH; | ||
658 | } | ||
659 | |||
660 | /* 6.2.2.3 Path Segment Normalization */ | ||
661 | if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative, | ||
662 | (uri->owner == URI_TRUE) | ||
663 | || ((doneMask & URI_NORMALIZE_PATH) != 0) | ||
664 | )) { | ||
665 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
666 | return URI_ERROR_MALLOC; | ||
667 | } | ||
668 | URI_FUNC(FixEmptyTrailSegment)(uri); | ||
669 | } | ||
670 | |||
671 | /* Query, fragment */ | ||
672 | if (outMask != NULL) { | ||
673 | const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)( | ||
674 | uri->query.first, uri->query.afterLast); | ||
675 | const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)( | ||
676 | uri->fragment.first, uri->fragment.afterLast); | ||
677 | if (normalizeQuery) { | ||
678 | *outMask |= URI_NORMALIZE_QUERY; | ||
679 | } | ||
680 | |||
681 | if (normalizeFragment) { | ||
682 | *outMask |= URI_NORMALIZE_FRAGMENT; | ||
683 | } | ||
684 | } else { | ||
685 | /* Query */ | ||
686 | if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) { | ||
687 | if (uri->owner) { | ||
688 | URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast)); | ||
689 | } else { | ||
690 | if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first), | ||
691 | &(uri->query.afterLast))) { | ||
692 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
693 | return URI_ERROR_MALLOC; | ||
694 | } | ||
695 | doneMask |= URI_NORMALIZE_QUERY; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | /* Fragment */ | ||
700 | if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) { | ||
701 | if (uri->owner) { | ||
702 | URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast)); | ||
703 | } else { | ||
704 | if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first), | ||
705 | &(uri->fragment.afterLast))) { | ||
706 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
707 | return URI_ERROR_MALLOC; | ||
708 | } | ||
709 | doneMask |= URI_NORMALIZE_FRAGMENT; | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | /* Dup all not duped yet */ | ||
715 | if ((outMask == NULL) && !uri->owner) { | ||
716 | if (!URI_FUNC(MakeOwner)(uri, &doneMask)) { | ||
717 | URI_FUNC(PreventLeakage)(uri, doneMask); | ||
718 | return URI_ERROR_MALLOC; | ||
719 | } | ||
720 | uri->owner = URI_TRUE; | ||
721 | } | ||
722 | |||
723 | return URI_SUCCESS; | ||
724 | } | ||
725 | |||
726 | |||
727 | |||
728 | #endif | ||