1 files changed, 212 insertions, 61 deletions
diff --git a/gl/base64.c b/gl/base64.c
index e67075d1..42ccc9c2 100644
--- a/gl/base64.c
+++ b/gl/base64.c
@@ -52,6 +52,8 @@
 /* Get UCHAR_MAX. */
 #include <limits.h>
+#include <string.h>
 /* C89 compliant way to cast 'char' to 'unsigned char'. */
 static inline unsigned char
 to_uchar (char ch)
@@ -300,89 +302,237 @@ isbase64 (char ch)
  return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
 }
-/* Decode base64 encoded input array IN of length INLEN to output
+/* Initialize decode-context buffer, CTX.  */
-   array OUT that can hold *OUTLEN bytes.  Return true if decoding was
+void
-   successful, i.e. if the input was valid base64 data, false
+base64_decode_ctx_init (struct base64_decode_context *ctx)
-   otherwise.  If *OUTLEN is too small, as many bytes as possible will
-   be written to OUT.  On return, *OUTLEN holds the length of decoded
-   bytes in OUT.  Note that as soon as any non-alphabet characters are
-   encountered, decoding is stopped and false is returned.  This means
-   that, when applicable, you must remove any line terminators that is
-   part of the data stream before calling this function.  */
-bool
-base64_decode (const char *restrict in, size_t inlen,
-               char *restrict out, size_t *outlen)
 {
-  size_t outleft = *outlen;
+  ctx->i = 0;
+}
-  while (inlen >= 2)
+/* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and
-    {
+   none of those four is a newline, then return *IN.  Otherwise, copy up to
-      if (!isbase64 (in[0]) || !isbase64 (in[1]))
+   4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
-        break;
+   index CTX->i and setting CTX->i to reflect the number of bytes copied,
+   and return CTX->buf.  In either case, advance *IN to point to the byte
+   after the last one processed, and set *N_NON_NEWLINE to the number of
+   verified non-newline bytes accessible through the returned pointer.  */
+static inline char *
+get_4 (struct base64_decode_context *ctx,
+       char const *restrict *in, char const *restrict in_end,
+       size_t *n_non_newline)
+{
+  if (ctx->i == 4)
+    ctx->i = 0;
-      if (outleft)
+  if (ctx->i == 0)
+    {
+      char const *t = *in;
+      if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL)
        {
-          *out++ = ((b64[to_uchar (in[0])] << 2)
+          /* This is the common case: no newline.  */
-                    | (b64[to_uchar (in[1])] >> 4));
+          *in += 4;
-          outleft--;
+          *n_non_newline = 4;
+          return (char *) t;
        }
+    }
-      if (inlen == 2)
+  {
-        break;
+    /* Copy non-newline bytes into BUF.  */
+    char const *p = *in;
+    while (p < in_end)
+      {
+        char c = *p++;
+        if (c != '\n')
+          {
+            ctx->buf[ctx->i++] = c;
+            if (ctx->i == 4)
+              break;
+          }
+      }
+    *in = p;
+    *n_non_newline = ctx->i;
+    return ctx->buf;
+  }
+}
+#define return_false                            \
+  do                                            \
+    {                                           \
+      *outp = out;                              \
+      return false;                             \
+    }                                           \
+  while (false)
+/* Decode up to four bytes of base64-encoded data, IN, of length INLEN
+   into the output buffer, *OUT, of size *OUTLEN bytes.  Return true if
+   decoding is successful, false otherwise.  If *OUTLEN is too small,
+   as many bytes as possible are written to *OUT.  On return, advance
+   *OUT to point to the byte after the last one written, and decrement
+   *OUTLEN to reflect the number of bytes remaining in *OUT.  */
+static inline bool
+decode_4 (char const *restrict in, size_t inlen,
+          char *restrict *outp, size_t *outleft)
+{
+  char *out = *outp;
+  if (inlen < 2)
+    return false;
+  if (!isbase64 (in[0]) || !isbase64 (in[1]))
+    return false;
+  if (*outleft)
+    {
+      *out++ = ((b64[to_uchar (in[0])] << 2)
+                | (b64[to_uchar (in[1])] >> 4));
+      --*outleft;
+    }
+  if (inlen == 2)
+    return_false;
+  if (in[2] == '=')
+    {
+      if (inlen != 4)
+        return_false;
+      if (in[3] != '=')
+        return_false;
+    }
+  else
+    {
+      if (!isbase64 (in[2]))
+        return_false;
-      if (in[2] == '=')
+      if (*outleft)
        {
-          if (inlen != 4)
+          *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
-            break;
+                    | (b64[to_uchar (in[2])] >> 2));
+          --*outleft;
+        }
-          if (in[3] != '=')
+      if (inlen == 3)
-            break;
+        return_false;
+      if (in[3] == '=')
+        {
+          if (inlen != 4)
+            return_false;
        }
      else
        {
-          if (!isbase64 (in[2]))
+          if (!isbase64 (in[3]))
-            break;
+            return_false;
-          if (outleft)
+          if (*outleft)
            {
-              *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
+              *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
-                        | (b64[to_uchar (in[2])] >> 2));
+                        | b64[to_uchar (in[3])]);
-              outleft--;
+              --*outleft;
            }
+        }
+    }
-          if (inlen == 3)
+  *outp = out;
-            break;
+  return true;
+}
-          if (in[3] == '=')
+/* Decode base64-encoded input array IN of length INLEN to output array
-            {
+   OUT that can hold *OUTLEN bytes.  The input data may be interspersed
-              if (inlen != 4)
+   with newlines.  Return true if decoding was successful, i.e. if the
-                break;
+   input was valid base64 data, false otherwise.  If *OUTLEN is too
-            }
+   small, as many bytes as possible will be written to OUT.  On return,
-          else
+   *OUTLEN holds the length of decoded bytes in OUT.  Note that as soon
+   as any non-alphabet, non-newline character is encountered, decoding
+   is stopped and false is returned.  If INLEN is zero, then process
+   only whatever data is stored in CTX.
+   Initially, CTX must have been initialized via base64_decode_ctx_init.
+   Subsequent calls to this function must reuse whatever state is recorded
+   in that buffer.  It is necessary for when a quadruple of base64 input
+   bytes spans two input buffers.
+   If CTX is NULL then newlines are treated as garbage and the input
+   buffer is processed as a unit.  */
+bool
+base64_decode_ctx (struct base64_decode_context *ctx,
+                   const char *restrict in, size_t inlen,
+                   char *restrict out, size_t *outlen)
+{
+  size_t outleft = *outlen;
+  bool ignore_newlines = ctx != NULL;
+  bool flush_ctx = false;
+  unsigned int ctx_i = 0;
+  if (ignore_newlines)
+    {
+      ctx_i = ctx->i;
+      flush_ctx = inlen == 0;
+    }
+  while (true)
+    {
+      size_t outleft_save = outleft;
+      if (ctx_i == 0 && !flush_ctx)
+        {
+          while (true)
            {
-              if (!isbase64 (in[3]))
+              /* Save a copy of outleft, in case we need to re-parse this
+                 block of four bytes.  */
+              outleft_save = outleft;
+              if (!decode_4 (in, inlen, &out, &outleft))
                break;
-              if (outleft)
+              in += 4;
-                {
+              inlen -= 4;
-                  *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
-                            | b64[to_uchar (in[3])]);
-                  outleft--;
-                }
            }
        }
-      in += 4;
+      if (inlen == 0 && !flush_ctx)
-      inlen -= 4;
+        break;
+      /* Handle the common case of 72-byte wrapped lines.
+         This also handles any other multiple-of-4-byte wrapping.  */
+      if (inlen && *in == '\n' && ignore_newlines)
+        {
+          ++in;
+          --inlen;
+          continue;
+        }
+      /* Restore OUT and OUTLEFT.  */
+      out -= outleft_save - outleft;
+      outleft = outleft_save;
+      {
+        char const *in_end = in + inlen;
+        char const *non_nl;
+        if (ignore_newlines)
+          non_nl = get_4 (ctx, &in, in_end, &inlen);
+        else
+          non_nl = in;  /* Might have nl in this case. */
+        /* If the input is empty or consists solely of newlines (0 non-newlines),
+           then we're done.  Likewise if there are fewer than 4 bytes when not
+           flushing context and not treating newlines as garbage.  */
+        if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines))
+          {
+            inlen = 0;
+            break;
+          }
+        if (!decode_4 (non_nl, inlen, &out, &outleft))
+          break;
+        inlen = in_end - in;
+      }
    }
  *outlen -= outleft;
-  if (inlen != 0)
+  return inlen == 0;
-    return false;
-  return true;
 }
 /* Allocate an output buffer in *OUT, and decode the base64 encoded
@@ -397,12 +547,13 @@ base64_decode (const char *restrict in, size_t inlen,
   input was invalid, in which case *OUT is NULL and *OUTLEN is
   undefined. */
 bool
-base64_decode_alloc (const char *in, size_t inlen, char **out,
+base64_decode_alloc_ctx (struct base64_decode_context *ctx,
-                     size_t *outlen)
+                         const char *in, size_t inlen, char **out,
+                         size_t *outlen)
 {
-  /* This may allocate a few bytes too much, depending on input,
+  /* This may allocate a few bytes too many, depending on input,
-     but it's not worth the extra CPU time to compute the exact amount.
+     but it's not worth the extra CPU time to compute the exact size.
-     The exact amount is 3 * inlen / 4, minus 1 if the input ends
+     The exact size is 3 * inlen / 4, minus 1 if the input ends
     with "=" and minus another 1 if the input ends with "==".
     Dividing before multiplying avoids the possibility of overflow.  */
  size_t needlen = 3 * (inlen / 4) + 2;
@@ -411,7 +562,7 @@ base64_decode_alloc (const char *in, size_t inlen, char **out,
  if (!*out)
    return true;
-  if (!base64_decode (in, inlen, *out, &needlen))
+  if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen))
    {
      free (*out);
      *out = NULL;

diff --git a/gl/base64.c b/gl/base64.c index e67075d1..42ccc9c2 100644 --- a/gl/base64.c +++ b/gl/base64.c
@@ -52,6 +52,8 @@
52	/* Get UCHAR_MAX. */	52	/* Get UCHAR_MAX. */
53	#include <limits.h>	53	#include <limits.h>
54		54
		55	#include <string.h>
		56
55	/* C89 compliant way to cast 'char' to 'unsigned char'. */	57	/* C89 compliant way to cast 'char' to 'unsigned char'. */
56	static inline unsigned char	58	static inline unsigned char
57	to_uchar (char ch)	59	to_uchar (char ch)
@@ -300,89 +302,237 @@ isbase64 (char ch)
300	return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];	302	return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
301	}	303	}
302		304
303	/* Decode base64 encoded input array IN of length INLEN to output	305	/* Initialize decode-context buffer, CTX. */
304	array OUT that can hold *OUTLEN bytes. Return true if decoding was	306	void
305	successful, i.e. if the input was valid base64 data, false	307	base64_decode_ctx_init (struct base64_decode_context *ctx)
306	otherwise. If *OUTLEN is too small, as many bytes as possible will
307	be written to OUT. On return, *OUTLEN holds the length of decoded
308	bytes in OUT. Note that as soon as any non-alphabet characters are
309	encountered, decoding is stopped and false is returned. This means
310	that, when applicable, you must remove any line terminators that is
311	part of the data stream before calling this function. */
312	bool
313	base64_decode (const char *restrict in, size_t inlen,
314	char restrict out, size_t outlen)
315	{	308	{
316	size_t outleft = *outlen;	309	ctx->i = 0;
		310	}
317		311
318	while (inlen >= 2)	312	/* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and
319	{	313	none of those four is a newline, then return *IN. Otherwise, copy up to
320	if (!isbase64 (in[0]) \|\| !isbase64 (in[1]))	314	4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
321	break;	315	index CTX->i and setting CTX->i to reflect the number of bytes copied,
		316	and return CTX->buf. In either case, advance *IN to point to the byte
		317	after the last one processed, and set *N_NON_NEWLINE to the number of
		318	verified non-newline bytes accessible through the returned pointer. */
		319	static inline char *
		320	get_4 (struct base64_decode_context *ctx,
		321	char const restrict in, char const *restrict in_end,
		322	size_t *n_non_newline)
		323	{
		324	if (ctx->i == 4)
		325	ctx->i = 0;
322		326
323	if (outleft)	327	if (ctx->i == 0)
		328	{
		329	char const t = in;
		330	if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL)
324	{	331	{
325	*out++ = ((b64[to_uchar (in[0])] << 2)	332	/* This is the common case: no newline. */
326	\| (b64[to_uchar (in[1])] >> 4));	333	*in += 4;
327	outleft--;	334	*n_non_newline = 4;
		335	return (char *) t;
328	}	336	}
		337	}
329		338
330	if (inlen == 2)	339	{
331	break;	340	/* Copy non-newline bytes into BUF. */
		341	char const p = in;
		342	while (p < in_end)
		343	{
		344	char c = *p++;
		345	if (c != '\n')
		346	{
		347	ctx->buf[ctx->i++] = c;
		348	if (ctx->i == 4)
		349	break;
		350	}
		351	}
		352
		353	*in = p;
		354	*n_non_newline = ctx->i;
		355	return ctx->buf;
		356	}
		357	}
		358
		359	#define return_false \
		360	do \
		361	{ \
		362	*outp = out; \
		363	return false; \
		364	} \
		365	while (false)
		366
		367	/* Decode up to four bytes of base64-encoded data, IN, of length INLEN
		368	into the output buffer, OUT, of size OUTLEN bytes. Return true if
		369	decoding is successful, false otherwise. If *OUTLEN is too small,
		370	as many bytes as possible are written to *OUT. On return, advance
		371	*OUT to point to the byte after the last one written, and decrement
		372	OUTLEN to reflect the number of bytes remaining in OUT. */
		373	static inline bool
		374	decode_4 (char const *restrict in, size_t inlen,
		375	char restrict outp, size_t *outleft)
		376	{
		377	char out = outp;
		378	if (inlen < 2)
		379	return false;
		380
		381	if (!isbase64 (in[0]) \|\| !isbase64 (in[1]))
		382	return false;
		383
		384	if (*outleft)
		385	{
		386	*out++ = ((b64[to_uchar (in[0])] << 2)
		387	\| (b64[to_uchar (in[1])] >> 4));
		388	--*outleft;
		389	}
		390
		391	if (inlen == 2)
		392	return_false;
		393
		394	if (in[2] == '=')
		395	{
		396	if (inlen != 4)
		397	return_false;
		398
		399	if (in[3] != '=')
		400	return_false;
		401	}
		402	else
		403	{
		404	if (!isbase64 (in[2]))
		405	return_false;
332		406
333	if (in[2] == '=')	407	if (*outleft)
334	{	408	{
335	if (inlen != 4)	409	*out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
336	break;	410	\| (b64[to_uchar (in[2])] >> 2));
		411	--*outleft;
		412	}
337		413
338	if (in[3] != '=')	414	if (inlen == 3)
339	break;	415	return_false;
340		416
		417	if (in[3] == '=')
		418	{
		419	if (inlen != 4)
		420	return_false;
341	}	421	}
342	else	422	else
343	{	423	{
344	if (!isbase64 (in[2]))	424	if (!isbase64 (in[3]))
345	break;	425	return_false;
346		426
347	if (outleft)	427	if (*outleft)
348	{	428	{
349	*out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)	429	*out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
350	\| (b64[to_uchar (in[2])] >> 2));	430	\| b64[to_uchar (in[3])]);
351	outleft--;	431	--*outleft;
352	}	432	}
		433	}
		434	}
353		435
354	if (inlen == 3)	436	*outp = out;
355	break;	437	return true;
		438	}
356		439
357	if (in[3] == '=')	440	/* Decode base64-encoded input array IN of length INLEN to output array
358	{	441	OUT that can hold *OUTLEN bytes. The input data may be interspersed
359	if (inlen != 4)	442	with newlines. Return true if decoding was successful, i.e. if the
360	break;	443	input was valid base64 data, false otherwise. If *OUTLEN is too
361	}	444	small, as many bytes as possible will be written to OUT. On return,
362	else	445	*OUTLEN holds the length of decoded bytes in OUT. Note that as soon
		446	as any non-alphabet, non-newline character is encountered, decoding
		447	is stopped and false is returned. If INLEN is zero, then process
		448	only whatever data is stored in CTX.
		449
		450	Initially, CTX must have been initialized via base64_decode_ctx_init.
		451	Subsequent calls to this function must reuse whatever state is recorded
		452	in that buffer. It is necessary for when a quadruple of base64 input
		453	bytes spans two input buffers.
		454
		455	If CTX is NULL then newlines are treated as garbage and the input
		456	buffer is processed as a unit. */
		457
		458	bool
		459	base64_decode_ctx (struct base64_decode_context *ctx,
		460	const char *restrict in, size_t inlen,
		461	char restrict out, size_t outlen)
		462	{
		463	size_t outleft = *outlen;
		464	bool ignore_newlines = ctx != NULL;
		465	bool flush_ctx = false;
		466	unsigned int ctx_i = 0;
		467
		468	if (ignore_newlines)
		469	{
		470	ctx_i = ctx->i;
		471	flush_ctx = inlen == 0;
		472	}
		473
		474
		475	while (true)
		476	{
		477	size_t outleft_save = outleft;
		478	if (ctx_i == 0 && !flush_ctx)
		479	{
		480	while (true)
363	{	481	{
364	if (!isbase64 (in[3]))	482	/* Save a copy of outleft, in case we need to re-parse this
		483	block of four bytes. */
		484	outleft_save = outleft;
		485	if (!decode_4 (in, inlen, &out, &outleft))
365	break;	486	break;
366		487
367	if (outleft)	488	in += 4;
368	{	489	inlen -= 4;
369	*out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
370	\| b64[to_uchar (in[3])]);
371	outleft--;
372	}
373	}	490	}
374	}	491	}
375		492
376	in += 4;	493	if (inlen == 0 && !flush_ctx)
377	inlen -= 4;	494	break;
		495
		496	/* Handle the common case of 72-byte wrapped lines.
		497	This also handles any other multiple-of-4-byte wrapping. */
		498	if (inlen && *in == '\n' && ignore_newlines)
		499	{
		500	++in;
		501	--inlen;
		502	continue;
		503	}
		504
		505	/* Restore OUT and OUTLEFT. */
		506	out -= outleft_save - outleft;
		507	outleft = outleft_save;
		508
		509	{
		510	char const *in_end = in + inlen;
		511	char const *non_nl;
		512
		513	if (ignore_newlines)
		514	non_nl = get_4 (ctx, &in, in_end, &inlen);
		515	else
		516	non_nl = in; /* Might have nl in this case. */
		517
		518	/* If the input is empty or consists solely of newlines (0 non-newlines),
		519	then we're done. Likewise if there are fewer than 4 bytes when not
		520	flushing context and not treating newlines as garbage. */
		521	if (inlen == 0 \|\| (inlen < 4 && !flush_ctx && ignore_newlines))
		522	{
		523	inlen = 0;
		524	break;
		525	}
		526	if (!decode_4 (non_nl, inlen, &out, &outleft))
		527	break;
		528
		529	inlen = in_end - in;
		530	}
378	}	531	}
379		532
380	*outlen -= outleft;	533	*outlen -= outleft;
381		534
382	if (inlen != 0)	535	return inlen == 0;
383	return false;
384
385	return true;
386	}	536	}
387		537
388	/* Allocate an output buffer in *OUT, and decode the base64 encoded	538	/* Allocate an output buffer in *OUT, and decode the base64 encoded
@@ -397,12 +547,13 @@ base64_decode (const char *restrict in, size_t inlen,
397	input was invalid, in which case OUT is NULL and OUTLEN is	547	input was invalid, in which case OUT is NULL and OUTLEN is
398	undefined. */	548	undefined. */
399	bool	549	bool
400	base64_decode_alloc (const char in, size_t inlen, char *out,	550	base64_decode_alloc_ctx (struct base64_decode_context *ctx,
401	size_t *outlen)	551	const char in, size_t inlen, char *out,
		552	size_t *outlen)
402	{	553	{
403	/* This may allocate a few bytes too much, depending on input,	554	/* This may allocate a few bytes too many, depending on input,
404	but it's not worth the extra CPU time to compute the exact amount.	555	but it's not worth the extra CPU time to compute the exact size.
405	The exact amount is 3 * inlen / 4, minus 1 if the input ends	556	The exact size is 3 * inlen / 4, minus 1 if the input ends
406	with "=" and minus another 1 if the input ends with "==".	557	with "=" and minus another 1 if the input ends with "==".
407	Dividing before multiplying avoids the possibility of overflow. */	558	Dividing before multiplying avoids the possibility of overflow. */
408	size_t needlen = 3 * (inlen / 4) + 2;	559	size_t needlen = 3 * (inlen / 4) + 2;
@@ -411,7 +562,7 @@ base64_decode_alloc (const char in, size_t inlen, char *out,
411	if (!*out)	562	if (!*out)
412	return true;	563	return true;
413		564
414	if (!base64_decode (in, inlen, *out, &needlen))	565	if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen))
415	{	566	{
416	free (*out);	567	free (*out);
417	*out = NULL;	568	*out = NULL;