diff options
Diffstat (limited to 'plugins/uriparser/UriEscape.c')
-rw-r--r-- | plugins/uriparser/UriEscape.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/plugins/uriparser/UriEscape.c b/plugins/uriparser/UriEscape.c new file mode 100644 index 00000000..79ee3a68 --- /dev/null +++ b/plugins/uriparser/UriEscape.c | |||
@@ -0,0 +1,453 @@ | |||
1 | /* | ||
2 | * uriparser - RFC 3986 URI parsing library | ||
3 | * | ||
4 | * Copyright (C) 2007, Weijia Song <songweijia@gmail.com> | ||
5 | * Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org> | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * Redistribution and use in source and binary forms, with or without | ||
9 | * modification, are permitted provided that the following conditions | ||
10 | * are met: | ||
11 | * | ||
12 | * * Redistributions of source code must retain the above | ||
13 | * copyright notice, this list of conditions and the following | ||
14 | * disclaimer. | ||
15 | * | ||
16 | * * Redistributions in binary form must reproduce the above | ||
17 | * copyright notice, this list of conditions and the following | ||
18 | * disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the <ORGANIZATION> nor the names of its | ||
22 | * contributors may be used to endorse or promote products | ||
23 | * derived from this software without specific prior written | ||
24 | * permission. | ||
25 | * | ||
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
29 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
30 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
32 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
33 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
35 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
36 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
37 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | /* What encodings are enabled? */ | ||
41 | #include <uriparser/UriDefsConfig.h> | ||
42 | #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) | ||
43 | /* Include SELF twice */ | ||
44 | # ifdef URI_ENABLE_ANSI | ||
45 | # define URI_PASS_ANSI 1 | ||
46 | # include "UriEscape.c" | ||
47 | # undef URI_PASS_ANSI | ||
48 | # endif | ||
49 | # ifdef URI_ENABLE_UNICODE | ||
50 | # define URI_PASS_UNICODE 1 | ||
51 | # include "UriEscape.c" | ||
52 | # undef URI_PASS_UNICODE | ||
53 | # endif | ||
54 | #else | ||
55 | # ifdef URI_PASS_ANSI | ||
56 | # include <uriparser/UriDefsAnsi.h> | ||
57 | # else | ||
58 | # include <uriparser/UriDefsUnicode.h> | ||
59 | # include <wchar.h> | ||
60 | # endif | ||
61 | |||
62 | |||
63 | |||
64 | #ifndef URI_DOXYGEN | ||
65 | # include <uriparser/Uri.h> | ||
66 | # include "UriCommon.h" | ||
67 | #endif | ||
68 | |||
69 | |||
70 | |||
71 | URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, | ||
72 | UriBool spaceToPlus, UriBool normalizeBreaks) { | ||
73 | return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks); | ||
74 | } | ||
75 | |||
76 | |||
77 | |||
78 | URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, | ||
79 | const URI_CHAR * inAfterLast, URI_CHAR * out, | ||
80 | UriBool spaceToPlus, UriBool normalizeBreaks) { | ||
81 | const URI_CHAR * read = inFirst; | ||
82 | URI_CHAR * write = out; | ||
83 | UriBool prevWasCr = URI_FALSE; | ||
84 | if ((out == NULL) || (inFirst == out)) { | ||
85 | return NULL; | ||
86 | } else if (inFirst == NULL) { | ||
87 | if (out != NULL) { | ||
88 | out[0] = _UT('\0'); | ||
89 | } | ||
90 | return out; | ||
91 | } | ||
92 | |||
93 | for (;;) { | ||
94 | if ((inAfterLast != NULL) && (read >= inAfterLast)) { | ||
95 | write[0] = _UT('\0'); | ||
96 | return write; | ||
97 | } | ||
98 | |||
99 | switch (read[0]) { | ||
100 | case _UT('\0'): | ||
101 | write[0] = _UT('\0'); | ||
102 | return write; | ||
103 | |||
104 | case _UT(' '): | ||
105 | if (spaceToPlus) { | ||
106 | write[0] = _UT('+'); | ||
107 | write++; | ||
108 | } else { | ||
109 | write[0] = _UT('%'); | ||
110 | write[1] = _UT('2'); | ||
111 | write[2] = _UT('0'); | ||
112 | write += 3; | ||
113 | } | ||
114 | prevWasCr = URI_FALSE; | ||
115 | break; | ||
116 | |||
117 | case _UT('a'): /* ALPHA */ | ||
118 | case _UT('A'): | ||
119 | case _UT('b'): | ||
120 | case _UT('B'): | ||
121 | case _UT('c'): | ||
122 | case _UT('C'): | ||
123 | case _UT('d'): | ||
124 | case _UT('D'): | ||
125 | case _UT('e'): | ||
126 | case _UT('E'): | ||
127 | case _UT('f'): | ||
128 | case _UT('F'): | ||
129 | case _UT('g'): | ||
130 | case _UT('G'): | ||
131 | case _UT('h'): | ||
132 | case _UT('H'): | ||
133 | case _UT('i'): | ||
134 | case _UT('I'): | ||
135 | case _UT('j'): | ||
136 | case _UT('J'): | ||
137 | case _UT('k'): | ||
138 | case _UT('K'): | ||
139 | case _UT('l'): | ||
140 | case _UT('L'): | ||
141 | case _UT('m'): | ||
142 | case _UT('M'): | ||
143 | case _UT('n'): | ||
144 | case _UT('N'): | ||
145 | case _UT('o'): | ||
146 | case _UT('O'): | ||
147 | case _UT('p'): | ||
148 | case _UT('P'): | ||
149 | case _UT('q'): | ||
150 | case _UT('Q'): | ||
151 | case _UT('r'): | ||
152 | case _UT('R'): | ||
153 | case _UT('s'): | ||
154 | case _UT('S'): | ||
155 | case _UT('t'): | ||
156 | case _UT('T'): | ||
157 | case _UT('u'): | ||
158 | case _UT('U'): | ||
159 | case _UT('v'): | ||
160 | case _UT('V'): | ||
161 | case _UT('w'): | ||
162 | case _UT('W'): | ||
163 | case _UT('x'): | ||
164 | case _UT('X'): | ||
165 | case _UT('y'): | ||
166 | case _UT('Y'): | ||
167 | case _UT('z'): | ||
168 | case _UT('Z'): | ||
169 | case _UT('0'): /* DIGIT */ | ||
170 | case _UT('1'): | ||
171 | case _UT('2'): | ||
172 | case _UT('3'): | ||
173 | case _UT('4'): | ||
174 | case _UT('5'): | ||
175 | case _UT('6'): | ||
176 | case _UT('7'): | ||
177 | case _UT('8'): | ||
178 | case _UT('9'): | ||
179 | case _UT('-'): /* "-" / "." / "_" / "~" */ | ||
180 | case _UT('.'): | ||
181 | case _UT('_'): | ||
182 | case _UT('~'): | ||
183 | /* Copy unmodified */ | ||
184 | write[0] = read[0]; | ||
185 | write++; | ||
186 | |||
187 | prevWasCr = URI_FALSE; | ||
188 | break; | ||
189 | |||
190 | case _UT('\x0a'): | ||
191 | if (normalizeBreaks) { | ||
192 | if (!prevWasCr) { | ||
193 | write[0] = _UT('%'); | ||
194 | write[1] = _UT('0'); | ||
195 | write[2] = _UT('D'); | ||
196 | write[3] = _UT('%'); | ||
197 | write[4] = _UT('0'); | ||
198 | write[5] = _UT('A'); | ||
199 | write += 6; | ||
200 | } | ||
201 | } else { | ||
202 | write[0] = _UT('%'); | ||
203 | write[1] = _UT('0'); | ||
204 | write[2] = _UT('A'); | ||
205 | write += 3; | ||
206 | } | ||
207 | prevWasCr = URI_FALSE; | ||
208 | break; | ||
209 | |||
210 | case _UT('\x0d'): | ||
211 | if (normalizeBreaks) { | ||
212 | write[0] = _UT('%'); | ||
213 | write[1] = _UT('0'); | ||
214 | write[2] = _UT('D'); | ||
215 | write[3] = _UT('%'); | ||
216 | write[4] = _UT('0'); | ||
217 | write[5] = _UT('A'); | ||
218 | write += 6; | ||
219 | } else { | ||
220 | write[0] = _UT('%'); | ||
221 | write[1] = _UT('0'); | ||
222 | write[2] = _UT('D'); | ||
223 | write += 3; | ||
224 | } | ||
225 | prevWasCr = URI_TRUE; | ||
226 | break; | ||
227 | |||
228 | default: | ||
229 | /* Percent encode */ | ||
230 | { | ||
231 | const unsigned char code = (unsigned char)read[0]; | ||
232 | write[0] = _UT('%'); | ||
233 | write[1] = URI_FUNC(HexToLetter)(code >> 4); | ||
234 | write[2] = URI_FUNC(HexToLetter)(code & 0x0f); | ||
235 | write += 3; | ||
236 | } | ||
237 | prevWasCr = URI_FALSE; | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | read++; | ||
242 | } | ||
243 | } | ||
244 | |||
245 | |||
246 | |||
247 | const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) { | ||
248 | return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH); | ||
249 | } | ||
250 | |||
251 | |||
252 | |||
253 | const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, | ||
254 | UriBool plusToSpace, UriBreakConversion breakConversion) { | ||
255 | URI_CHAR * read = inout; | ||
256 | URI_CHAR * write = inout; | ||
257 | UriBool prevWasCr = URI_FALSE; | ||
258 | |||
259 | if (inout == NULL) { | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | for (;;) { | ||
264 | switch (read[0]) { | ||
265 | case _UT('\0'): | ||
266 | if (read > write) { | ||
267 | write[0] = _UT('\0'); | ||
268 | } | ||
269 | return write; | ||
270 | |||
271 | case _UT('%'): | ||
272 | switch (read[1]) { | ||
273 | case _UT('0'): | ||
274 | case _UT('1'): | ||
275 | case _UT('2'): | ||
276 | case _UT('3'): | ||
277 | case _UT('4'): | ||
278 | case _UT('5'): | ||
279 | case _UT('6'): | ||
280 | case _UT('7'): | ||
281 | case _UT('8'): | ||
282 | case _UT('9'): | ||
283 | case _UT('a'): | ||
284 | case _UT('b'): | ||
285 | case _UT('c'): | ||
286 | case _UT('d'): | ||
287 | case _UT('e'): | ||
288 | case _UT('f'): | ||
289 | case _UT('A'): | ||
290 | case _UT('B'): | ||
291 | case _UT('C'): | ||
292 | case _UT('D'): | ||
293 | case _UT('E'): | ||
294 | case _UT('F'): | ||
295 | switch (read[2]) { | ||
296 | case _UT('0'): | ||
297 | case _UT('1'): | ||
298 | case _UT('2'): | ||
299 | case _UT('3'): | ||
300 | case _UT('4'): | ||
301 | case _UT('5'): | ||
302 | case _UT('6'): | ||
303 | case _UT('7'): | ||
304 | case _UT('8'): | ||
305 | case _UT('9'): | ||
306 | case _UT('a'): | ||
307 | case _UT('b'): | ||
308 | case _UT('c'): | ||
309 | case _UT('d'): | ||
310 | case _UT('e'): | ||
311 | case _UT('f'): | ||
312 | case _UT('A'): | ||
313 | case _UT('B'): | ||
314 | case _UT('C'): | ||
315 | case _UT('D'): | ||
316 | case _UT('E'): | ||
317 | case _UT('F'): | ||
318 | { | ||
319 | /* Percent group found */ | ||
320 | const unsigned char left = URI_FUNC(HexdigToInt)(read[1]); | ||
321 | const unsigned char right = URI_FUNC(HexdigToInt)(read[2]); | ||
322 | const int code = 16 * left + right; | ||
323 | switch (code) { | ||
324 | case 10: | ||
325 | switch (breakConversion) { | ||
326 | case URI_BR_TO_LF: | ||
327 | if (!prevWasCr) { | ||
328 | write[0] = (URI_CHAR)10; | ||
329 | write++; | ||
330 | } | ||
331 | break; | ||
332 | |||
333 | case URI_BR_TO_CRLF: | ||
334 | if (!prevWasCr) { | ||
335 | write[0] = (URI_CHAR)13; | ||
336 | write[1] = (URI_CHAR)10; | ||
337 | write += 2; | ||
338 | } | ||
339 | break; | ||
340 | |||
341 | case URI_BR_TO_CR: | ||
342 | if (!prevWasCr) { | ||
343 | write[0] = (URI_CHAR)13; | ||
344 | write++; | ||
345 | } | ||
346 | break; | ||
347 | |||
348 | case URI_BR_DONT_TOUCH: | ||
349 | default: | ||
350 | write[0] = (URI_CHAR)10; | ||
351 | write++; | ||
352 | |||
353 | } | ||
354 | prevWasCr = URI_FALSE; | ||
355 | break; | ||
356 | |||
357 | case 13: | ||
358 | switch (breakConversion) { | ||
359 | case URI_BR_TO_LF: | ||
360 | write[0] = (URI_CHAR)10; | ||
361 | write++; | ||
362 | break; | ||
363 | |||
364 | case URI_BR_TO_CRLF: | ||
365 | write[0] = (URI_CHAR)13; | ||
366 | write[1] = (URI_CHAR)10; | ||
367 | write += 2; | ||
368 | break; | ||
369 | |||
370 | case URI_BR_TO_CR: | ||
371 | write[0] = (URI_CHAR)13; | ||
372 | write++; | ||
373 | break; | ||
374 | |||
375 | case URI_BR_DONT_TOUCH: | ||
376 | default: | ||
377 | write[0] = (URI_CHAR)13; | ||
378 | write++; | ||
379 | |||
380 | } | ||
381 | prevWasCr = URI_TRUE; | ||
382 | break; | ||
383 | |||
384 | default: | ||
385 | write[0] = (URI_CHAR)(code); | ||
386 | write++; | ||
387 | |||
388 | prevWasCr = URI_FALSE; | ||
389 | |||
390 | } | ||
391 | read += 3; | ||
392 | } | ||
393 | break; | ||
394 | |||
395 | default: | ||
396 | /* Copy two chars unmodified and */ | ||
397 | /* look at this char again */ | ||
398 | if (read > write) { | ||
399 | write[0] = read[0]; | ||
400 | write[1] = read[1]; | ||
401 | } | ||
402 | read += 2; | ||
403 | write += 2; | ||
404 | |||
405 | prevWasCr = URI_FALSE; | ||
406 | } | ||
407 | break; | ||
408 | |||
409 | default: | ||
410 | /* Copy one char unmodified and */ | ||
411 | /* look at this char again */ | ||
412 | if (read > write) { | ||
413 | write[0] = read[0]; | ||
414 | } | ||
415 | read++; | ||
416 | write++; | ||
417 | |||
418 | prevWasCr = URI_FALSE; | ||
419 | } | ||
420 | break; | ||
421 | |||
422 | case _UT('+'): | ||
423 | if (plusToSpace) { | ||
424 | /* Convert '+' to ' ' */ | ||
425 | write[0] = _UT(' '); | ||
426 | } else { | ||
427 | /* Copy one char unmodified */ | ||
428 | if (read > write) { | ||
429 | write[0] = read[0]; | ||
430 | } | ||
431 | } | ||
432 | read++; | ||
433 | write++; | ||
434 | |||
435 | prevWasCr = URI_FALSE; | ||
436 | break; | ||
437 | |||
438 | default: | ||
439 | /* Copy one char unmodified */ | ||
440 | if (read > write) { | ||
441 | write[0] = read[0]; | ||
442 | } | ||
443 | read++; | ||
444 | write++; | ||
445 | |||
446 | prevWasCr = URI_FALSE; | ||
447 | } | ||
448 | } | ||
449 | } | ||
450 | |||
451 | |||
452 | |||
453 | #endif | ||