diff options
Diffstat (limited to 'gl/base64.c')
-rw-r--r-- | gl/base64.c | 273 |
1 files changed, 212 insertions, 61 deletions
diff --git a/gl/base64.c b/gl/base64.c index e67075d1..42ccc9c2 100644 --- a/gl/base64.c +++ b/gl/base64.c | |||
@@ -52,6 +52,8 @@ | |||
52 | /* Get UCHAR_MAX. */ | 52 | /* Get UCHAR_MAX. */ |
53 | #include <limits.h> | 53 | #include <limits.h> |
54 | 54 | ||
55 | #include <string.h> | ||
56 | |||
55 | /* C89 compliant way to cast 'char' to 'unsigned char'. */ | 57 | /* C89 compliant way to cast 'char' to 'unsigned char'. */ |
56 | static inline unsigned char | 58 | static inline unsigned char |
57 | to_uchar (char ch) | 59 | to_uchar (char ch) |
@@ -300,89 +302,237 @@ isbase64 (char ch) | |||
300 | return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)]; | 302 | return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)]; |
301 | } | 303 | } |
302 | 304 | ||
303 | /* Decode base64 encoded input array IN of length INLEN to output | 305 | /* Initialize decode-context buffer, CTX. */ |
304 | array OUT that can hold *OUTLEN bytes. Return true if decoding was | 306 | void |
305 | successful, i.e. if the input was valid base64 data, false | 307 | base64_decode_ctx_init (struct base64_decode_context *ctx) |
306 | otherwise. If *OUTLEN is too small, as many bytes as possible will | ||
307 | be written to OUT. On return, *OUTLEN holds the length of decoded | ||
308 | bytes in OUT. Note that as soon as any non-alphabet characters are | ||
309 | encountered, decoding is stopped and false is returned. This means | ||
310 | that, when applicable, you must remove any line terminators that is | ||
311 | part of the data stream before calling this function. */ | ||
312 | bool | ||
313 | base64_decode (const char *restrict in, size_t inlen, | ||
314 | char *restrict out, size_t *outlen) | ||
315 | { | 308 | { |
316 | size_t outleft = *outlen; | 309 | ctx->i = 0; |
310 | } | ||
317 | 311 | ||
318 | while (inlen >= 2) | 312 | /* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and |
319 | { | 313 | none of those four is a newline, then return *IN. Otherwise, copy up to |
320 | if (!isbase64 (in[0]) || !isbase64 (in[1])) | 314 | 4 - CTX->i non-newline bytes from that range into CTX->buf, starting at |
321 | break; | 315 | index CTX->i and setting CTX->i to reflect the number of bytes copied, |
316 | and return CTX->buf. In either case, advance *IN to point to the byte | ||
317 | after the last one processed, and set *N_NON_NEWLINE to the number of | ||
318 | verified non-newline bytes accessible through the returned pointer. */ | ||
319 | static inline char * | ||
320 | get_4 (struct base64_decode_context *ctx, | ||
321 | char const *restrict *in, char const *restrict in_end, | ||
322 | size_t *n_non_newline) | ||
323 | { | ||
324 | if (ctx->i == 4) | ||
325 | ctx->i = 0; | ||
322 | 326 | ||
323 | if (outleft) | 327 | if (ctx->i == 0) |
328 | { | ||
329 | char const *t = *in; | ||
330 | if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL) | ||
324 | { | 331 | { |
325 | *out++ = ((b64[to_uchar (in[0])] << 2) | 332 | /* This is the common case: no newline. */ |
326 | | (b64[to_uchar (in[1])] >> 4)); | 333 | *in += 4; |
327 | outleft--; | 334 | *n_non_newline = 4; |
335 | return (char *) t; | ||
328 | } | 336 | } |
337 | } | ||
329 | 338 | ||
330 | if (inlen == 2) | 339 | { |
331 | break; | 340 | /* Copy non-newline bytes into BUF. */ |
341 | char const *p = *in; | ||
342 | while (p < in_end) | ||
343 | { | ||
344 | char c = *p++; | ||
345 | if (c != '\n') | ||
346 | { | ||
347 | ctx->buf[ctx->i++] = c; | ||
348 | if (ctx->i == 4) | ||
349 | break; | ||
350 | } | ||
351 | } | ||
352 | |||
353 | *in = p; | ||
354 | *n_non_newline = ctx->i; | ||
355 | return ctx->buf; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | #define return_false \ | ||
360 | do \ | ||
361 | { \ | ||
362 | *outp = out; \ | ||
363 | return false; \ | ||
364 | } \ | ||
365 | while (false) | ||
366 | |||
367 | /* Decode up to four bytes of base64-encoded data, IN, of length INLEN | ||
368 | into the output buffer, *OUT, of size *OUTLEN bytes. Return true if | ||
369 | decoding is successful, false otherwise. If *OUTLEN is too small, | ||
370 | as many bytes as possible are written to *OUT. On return, advance | ||
371 | *OUT to point to the byte after the last one written, and decrement | ||
372 | *OUTLEN to reflect the number of bytes remaining in *OUT. */ | ||
373 | static inline bool | ||
374 | decode_4 (char const *restrict in, size_t inlen, | ||
375 | char *restrict *outp, size_t *outleft) | ||
376 | { | ||
377 | char *out = *outp; | ||
378 | if (inlen < 2) | ||
379 | return false; | ||
380 | |||
381 | if (!isbase64 (in[0]) || !isbase64 (in[1])) | ||
382 | return false; | ||
383 | |||
384 | if (*outleft) | ||
385 | { | ||
386 | *out++ = ((b64[to_uchar (in[0])] << 2) | ||
387 | | (b64[to_uchar (in[1])] >> 4)); | ||
388 | --*outleft; | ||
389 | } | ||
390 | |||
391 | if (inlen == 2) | ||
392 | return_false; | ||
393 | |||
394 | if (in[2] == '=') | ||
395 | { | ||
396 | if (inlen != 4) | ||
397 | return_false; | ||
398 | |||
399 | if (in[3] != '=') | ||
400 | return_false; | ||
401 | } | ||
402 | else | ||
403 | { | ||
404 | if (!isbase64 (in[2])) | ||
405 | return_false; | ||
332 | 406 | ||
333 | if (in[2] == '=') | 407 | if (*outleft) |
334 | { | 408 | { |
335 | if (inlen != 4) | 409 | *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0) |
336 | break; | 410 | | (b64[to_uchar (in[2])] >> 2)); |
411 | --*outleft; | ||
412 | } | ||
337 | 413 | ||
338 | if (in[3] != '=') | 414 | if (inlen == 3) |
339 | break; | 415 | return_false; |
340 | 416 | ||
417 | if (in[3] == '=') | ||
418 | { | ||
419 | if (inlen != 4) | ||
420 | return_false; | ||
341 | } | 421 | } |
342 | else | 422 | else |
343 | { | 423 | { |
344 | if (!isbase64 (in[2])) | 424 | if (!isbase64 (in[3])) |
345 | break; | 425 | return_false; |
346 | 426 | ||
347 | if (outleft) | 427 | if (*outleft) |
348 | { | 428 | { |
349 | *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0) | 429 | *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0) |
350 | | (b64[to_uchar (in[2])] >> 2)); | 430 | | b64[to_uchar (in[3])]); |
351 | outleft--; | 431 | --*outleft; |
352 | } | 432 | } |
433 | } | ||
434 | } | ||
353 | 435 | ||
354 | if (inlen == 3) | 436 | *outp = out; |
355 | break; | 437 | return true; |
438 | } | ||
356 | 439 | ||
357 | if (in[3] == '=') | 440 | /* Decode base64-encoded input array IN of length INLEN to output array |
358 | { | 441 | OUT that can hold *OUTLEN bytes. The input data may be interspersed |
359 | if (inlen != 4) | 442 | with newlines. Return true if decoding was successful, i.e. if the |
360 | break; | 443 | input was valid base64 data, false otherwise. If *OUTLEN is too |
361 | } | 444 | small, as many bytes as possible will be written to OUT. On return, |
362 | else | 445 | *OUTLEN holds the length of decoded bytes in OUT. Note that as soon |
446 | as any non-alphabet, non-newline character is encountered, decoding | ||
447 | is stopped and false is returned. If INLEN is zero, then process | ||
448 | only whatever data is stored in CTX. | ||
449 | |||
450 | Initially, CTX must have been initialized via base64_decode_ctx_init. | ||
451 | Subsequent calls to this function must reuse whatever state is recorded | ||
452 | in that buffer. It is necessary for when a quadruple of base64 input | ||
453 | bytes spans two input buffers. | ||
454 | |||
455 | If CTX is NULL then newlines are treated as garbage and the input | ||
456 | buffer is processed as a unit. */ | ||
457 | |||
458 | bool | ||
459 | base64_decode_ctx (struct base64_decode_context *ctx, | ||
460 | const char *restrict in, size_t inlen, | ||
461 | char *restrict out, size_t *outlen) | ||
462 | { | ||
463 | size_t outleft = *outlen; | ||
464 | bool ignore_newlines = ctx != NULL; | ||
465 | bool flush_ctx = false; | ||
466 | unsigned int ctx_i = 0; | ||
467 | |||
468 | if (ignore_newlines) | ||
469 | { | ||
470 | ctx_i = ctx->i; | ||
471 | flush_ctx = inlen == 0; | ||
472 | } | ||
473 | |||
474 | |||
475 | while (true) | ||
476 | { | ||
477 | size_t outleft_save = outleft; | ||
478 | if (ctx_i == 0 && !flush_ctx) | ||
479 | { | ||
480 | while (true) | ||
363 | { | 481 | { |
364 | if (!isbase64 (in[3])) | 482 | /* Save a copy of outleft, in case we need to re-parse this |
483 | block of four bytes. */ | ||
484 | outleft_save = outleft; | ||
485 | if (!decode_4 (in, inlen, &out, &outleft)) | ||
365 | break; | 486 | break; |
366 | 487 | ||
367 | if (outleft) | 488 | in += 4; |
368 | { | 489 | inlen -= 4; |
369 | *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0) | ||
370 | | b64[to_uchar (in[3])]); | ||
371 | outleft--; | ||
372 | } | ||
373 | } | 490 | } |
374 | } | 491 | } |
375 | 492 | ||
376 | in += 4; | 493 | if (inlen == 0 && !flush_ctx) |
377 | inlen -= 4; | 494 | break; |
495 | |||
496 | /* Handle the common case of 72-byte wrapped lines. | ||
497 | This also handles any other multiple-of-4-byte wrapping. */ | ||
498 | if (inlen && *in == '\n' && ignore_newlines) | ||
499 | { | ||
500 | ++in; | ||
501 | --inlen; | ||
502 | continue; | ||
503 | } | ||
504 | |||
505 | /* Restore OUT and OUTLEFT. */ | ||
506 | out -= outleft_save - outleft; | ||
507 | outleft = outleft_save; | ||
508 | |||
509 | { | ||
510 | char const *in_end = in + inlen; | ||
511 | char const *non_nl; | ||
512 | |||
513 | if (ignore_newlines) | ||
514 | non_nl = get_4 (ctx, &in, in_end, &inlen); | ||
515 | else | ||
516 | non_nl = in; /* Might have nl in this case. */ | ||
517 | |||
518 | /* If the input is empty or consists solely of newlines (0 non-newlines), | ||
519 | then we're done. Likewise if there are fewer than 4 bytes when not | ||
520 | flushing context and not treating newlines as garbage. */ | ||
521 | if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines)) | ||
522 | { | ||
523 | inlen = 0; | ||
524 | break; | ||
525 | } | ||
526 | if (!decode_4 (non_nl, inlen, &out, &outleft)) | ||
527 | break; | ||
528 | |||
529 | inlen = in_end - in; | ||
530 | } | ||
378 | } | 531 | } |
379 | 532 | ||
380 | *outlen -= outleft; | 533 | *outlen -= outleft; |
381 | 534 | ||
382 | if (inlen != 0) | 535 | return inlen == 0; |
383 | return false; | ||
384 | |||
385 | return true; | ||
386 | } | 536 | } |
387 | 537 | ||
388 | /* Allocate an output buffer in *OUT, and decode the base64 encoded | 538 | /* Allocate an output buffer in *OUT, and decode the base64 encoded |
@@ -397,12 +547,13 @@ base64_decode (const char *restrict in, size_t inlen, | |||
397 | input was invalid, in which case *OUT is NULL and *OUTLEN is | 547 | input was invalid, in which case *OUT is NULL and *OUTLEN is |
398 | undefined. */ | 548 | undefined. */ |
399 | bool | 549 | bool |
400 | base64_decode_alloc (const char *in, size_t inlen, char **out, | 550 | base64_decode_alloc_ctx (struct base64_decode_context *ctx, |
401 | size_t *outlen) | 551 | const char *in, size_t inlen, char **out, |
552 | size_t *outlen) | ||
402 | { | 553 | { |
403 | /* This may allocate a few bytes too much, depending on input, | 554 | /* This may allocate a few bytes too many, depending on input, |
404 | but it's not worth the extra CPU time to compute the exact amount. | 555 | but it's not worth the extra CPU time to compute the exact size. |
405 | The exact amount is 3 * inlen / 4, minus 1 if the input ends | 556 | The exact size is 3 * inlen / 4, minus 1 if the input ends |
406 | with "=" and minus another 1 if the input ends with "==". | 557 | with "=" and minus another 1 if the input ends with "==". |
407 | Dividing before multiplying avoids the possibility of overflow. */ | 558 | Dividing before multiplying avoids the possibility of overflow. */ |
408 | size_t needlen = 3 * (inlen / 4) + 2; | 559 | size_t needlen = 3 * (inlen / 4) + 2; |
@@ -411,7 +562,7 @@ base64_decode_alloc (const char *in, size_t inlen, char **out, | |||
411 | if (!*out) | 562 | if (!*out) |
412 | return true; | 563 | return true; |
413 | 564 | ||
414 | if (!base64_decode (in, inlen, *out, &needlen)) | 565 | if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen)) |
415 | { | 566 | { |
416 | free (*out); | 567 | free (*out); |
417 | *out = NULL; | 568 | *out = NULL; |