[ccan] [PATCH] base64: implements rfc4648, the base64 encoding

Tue Feb 24 03:41:42 AEDT 2015

On Thu, Jan 22, 2015 at 11:53:26AM +1100, Peter Barker wrote:
> Encode buffers into base64 according to rfc4648.
> Decode base64-encoded buffers according to the same standard.

Some comments below. However I took a look at RFC4648 and it actually
points to a C99 implementation, which is also GPL2. So maybe a laxer
license could help to differentiate a bit (of course that's up to you).

> Signed-off-by: <pb-ccan at barker.dropbear.id.au>
> ---
>  Makefile-ccan              |    1 +
>  ccan/base64/GPL-2          |    1 +
>  ccan/base64/LICENSE        |    1 +
>  ccan/base64/_info          |   39 +++++
>  ccan/base64/base64.c       |  273 ++++++++++++++++++++++++++++++++++
>  ccan/base64/base64.h       |  200 +++++++++++++++++++++++++
>  ccan/base64/test/moretap.h |   96 ++++++++++++
>  ccan/base64/test/run.c     |  351 ++++++++++++++++++++++++++++++++++++++++++++
>  8 files changed, 962 insertions(+)
>  create mode 120000 ccan/base64/GPL-2
>  create mode 120000 ccan/base64/LICENSE
>  create mode 100644 ccan/base64/_info
>  create mode 100644 ccan/base64/base64.c
>  create mode 100644 ccan/base64/base64.h
>  create mode 100644 ccan/base64/test/moretap.h
>  create mode 100644 ccan/base64/test/run.c
> 
> diff --git a/Makefile-ccan b/Makefile-ccan
> index acfbe19..8321ab0 100644
> --- a/Makefile-ccan
> +++ b/Makefile-ccan
> @@ -35,6 +35,7 @@ MODS_WITH_SRC := antithread \
>  	asprintf \
>  	autodata \
>  	avl \
> +	base64 \
>  	bdelta \
>  	block_pool \
>  	breakpoint \
> diff --git a/ccan/base64/GPL-2 b/ccan/base64/GPL-2
> new file mode 120000
> index 0000000..9961ca9
> --- /dev/null
> +++ b/ccan/base64/GPL-2
> @@ -0,0 +1 @@
> +../../licenses/GPL-2
> \ No newline at end of file
> diff --git a/ccan/base64/LICENSE b/ccan/base64/LICENSE
> new file mode 120000
> index 0000000..9961ca9
> --- /dev/null
> +++ b/ccan/base64/LICENSE
> @@ -0,0 +1 @@
> +../../licenses/GPL-2
> \ No newline at end of file
> diff --git a/ccan/base64/_info b/ccan/base64/_info
> new file mode 100644
> index 0000000..d69a7cb
> --- /dev/null
> +++ b/ccan/base64/_info
> @@ -0,0 +1,39 @@
> +#include "config.h"
> +
> +/**
> + * base64 - base64 encoding and decoding (rfc4648).
> + *
> + * base64 encoding is used to encode data in a 7-bit clean manner.
> + *  Commonly used for escaping data before encapsulation or transfer
> + *
> + * Example:
> + *      #include <stdio.h>
> + *      #include <string.h>
> + *      #include <ccan/base64/base64.h>
> + *      
> + *      int main(int argc, char *argv[])
> + *      {
> + *      	char *base64_encoded_string;
> + *      	int i;
> + *      
> + *      	// print the base64-encoded form of the program arguments
> + *      	for(i=1;i<argc;i++) {
> + *      		size_t space_required = base64_encoded_length(strlen(argv[i]));

I know it's just an example but maybe save the strlen instead of calling
it twice?

> + *      		base64_encoded_string = malloc(space_required);
> + *      		base64_encode(base64_encoded_string,sizeof(space_required),argv[i],strlen(argv[i]));

If I got it right, the `sizeof` here should be removed.

> + *      		printf("%s\n",base64_encoded_string);
> + *      		free(base64_encoded_string);
> + *      	}
> + *      
> + *      	return 0;
> + *      }
> + *            
> + * License: GPL (v2 or any later version)
> + */
> +int main(int argc, char *argv[])
> +{
> +	if (argc != 2)
> +		return 1;
> +
> +	return 1;
> +}
> diff --git a/ccan/base64/base64.c b/ccan/base64/base64.c
> new file mode 100644
> index 0000000..59d7c31
> --- /dev/null
> +++ b/ccan/base64/base64.c
> @@ -0,0 +1,273 @@
> +/* Licensed under LGPLv2+ - see LICENSE file for details */
> +#include "base64.h"
> +
> +#include <errno.h>
> +#include <string.h>
> +#include <assert.h>
> +
> +/**
> + * sixbit_to_b64 - maps a 6-bit value to the base64 alphabet
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param sixbit Six-bit value to map
> + * @return a base 64 character
> + */
> +static char sixbit_to_b64(const base64_alphabet_t *alphabet, const char sixbit)
> +{
> +	assert(sixbit >= 0);
> +	assert(sixbit <= 63);
> +
> +	return alphabet->encode_map[(unsigned char)sixbit];
> +}
> +
> +/**
> + * sixbit_from_b64 - maps a base64-alphabet character to its 6-bit value
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param sixbit Six-bit value to map
> + * @return a six-bit value
> + */
> +static signed char sixbit_from_b64(const base64_alphabet_t *alphabet,
> +				   const unsigned char b64letter)
> +{
> +	signed char ret;
> +
> +	ret = alphabet->decode_map[(unsigned char)b64letter];
> +	if (ret == '~') {

This assumes that `~` >= 64 right? That's valid but `0xff` would feel
less arbitrary.

> +		errno = ERANGE;
> +		return -1;
> +	}
> +
> +	return ret;
> +}
> +
> +int base64_char_in_alphabet(const base64_alphabet_t *alphabet,
> +			    const char b64char) {
> +	return (alphabet->decode_map[(unsigned char)b64char] != '~');

This ought to give a warning about casting out a const (I'd just remove
it).

> +}
> +
> +void base64_init_alphabet(base64_alphabet_t *dest, const char src[64]) {
> +	unsigned char i;
> +
> +	memcpy(dest->encode_map,src,64);
> +	memset(dest->decode_map,'~',256);
> +	for (i=0; i<64; i++) {
> +	  dest->decode_map[(unsigned char)src[i]] = i;
> +	}
> +}
> +
> +size_t base64_encoded_length(size_t srclen)
> +{
> +	return ((srclen + 2) / 3) * 4;
> +}
> +
> +void base64_encode_triplet_using_alphabet(const base64_alphabet_t *alphabet,
> +					  char dest[4], const char src[3])
> +{
> +	char a = src[0];
> +	char b = src[1];
> +	char c = src[2];
> +
> +	dest[0] = sixbit_to_b64(alphabet, (a & 0xfc) >> 2);
> +	dest[1] = sixbit_to_b64(alphabet, ((a & 0x3) << 4) | ((b & 0xf0) >> 4));
> +	dest[2] = sixbit_to_b64(alphabet, ((c & 0xc0) >> 6) | ((b & 0xf) << 2));
> +	dest[3] = sixbit_to_b64(alphabet, c & 0x3f);
> +}
> +
> +void base64_encode_tail_using_alphabet(const base64_alphabet_t *alphabet,
> +				       char dest[4],
> +				       const char *src, const size_t srclen)
> +{
> +	char longsrc[3];
> +
> +	memcpy(longsrc, src, srclen);
> +	memset(longsrc+srclen, '\0', 3-srclen);

`3 - srclen` this looks dangerous; maybe add a warning about `srclen`
max value in the doc comments. In this case thought you can init longsrc
to be zerod, i.e. `char longsrc[3] = { 0 };`.

> +	base64_encode_triplet_using_alphabet(alphabet, dest, longsrc);
> +	memset(dest+1+srclen, '=', 3-srclen);
> +}
> +
> +size_t base64_encode_using_alphabet(const base64_alphabet_t *alphabet,
> +				    char *dest, const size_t destlen,
> +				    const char *src, const size_t srclen)
> +{
> +	size_t src_offset = 0;
> +	size_t dest_offset = 0;
> +
> +	if (destlen < base64_encoded_length(srclen)) {
> +		errno = EOVERFLOW;
> +		return -1;
> +	}
> +
> +	while (srclen - src_offset >= 3) {

It's easier to see there's no underflow if it's written as
    src_offset + 3 <= srclen

> +		base64_encode_triplet_using_alphabet(alphabet, &dest[dest_offset], &src[src_offset]);
> +		src_offset += 3;
> +		dest_offset += 4;
> +	}
> +
> +	if (srclen - src_offset) {

Here too: `srclen != src_offset` or better `src_offset < srclen`.

> +		base64_encode_tail_using_alphabet(alphabet, &dest[dest_offset], &src[src_offset], srclen-src_offset);
> +		dest_offset += 4;
> +	}
> +
> +	memset(&dest[dest_offset], '\0', destlen-dest_offset);
> +
> +	return dest_offset;
> +}
> +
> +size_t base64_decoded_length(size_t srclen)
> +{
> +	return ((srclen+3)/4*3);
> +}
> +
> +int base64_decode_quartet_using_alphabet(const base64_alphabet_t *alphabet,
> +				    char dest[3], const char src[4])
> +{
> +	signed char a;
> +	signed char b;
> +	signed char c;
> +	signed char d;
> +
> +	a = sixbit_from_b64(alphabet, src[0]);
> +	b = sixbit_from_b64(alphabet, src[1]);
> +	c = sixbit_from_b64(alphabet, src[2]);
> +	d = sixbit_from_b64(alphabet, src[3]);
> +
> +	if ((a == -1) || (b == -1) || (c == -1) || (d == -1)) {
> +		return -1;
> +	}
> +
> +	dest[0] = (a << 2) | (b >> 4);
> +	dest[1] = ((b & 0xf) << 4) | (c >> 2);
> +	dest[2] = ((c & 0x3) << 6) | d;
> +
> +	return 0;
> +}
> +
> +
> +int base64_decode_tail_using_alphabet(const base64_alphabet_t *alphabet, char dest[3],
> +				 const char * src, const size_t srclen)
> +{
> +	char longsrc[4];
> +	int quartet_result;
> +	size_t insize = srclen;
> +
> +	if (insize == 0) {
> +		return 0;
> +	}
> +	while (src[insize-1] == '=') { /* throw away padding symbols */

If I give srclen=1, src="=" there's an underflow here (don't think
that's valid but might be malicious).

> +		insize--;
> +	}
> +	if (insize == 1) {
> +		/* the input is malformed.... */
> +		errno = EINVAL;
> +		return -1;
> +	}
> +	memcpy(longsrc, src, insize);
> +	memset(longsrc+insize, 'A', 4-insize);
> +	quartet_result = base64_decode_quartet_using_alphabet(alphabet, dest, longsrc);
> +	if (quartet_result == -1) {
> +		return -1;
> +	}
> +
> +	return insize - 1;

Here too if I follow correctly `insize` can be 0. Not sure which int
conversions happen here but it'd be better not to need that...

> +}
> +
> +size_t base64_decode_using_alphabet(const base64_alphabet_t *alphabet,
> +			       char *dest, const size_t destlen,
> +			       const char *src, const size_t srclen)
> +{
> +	size_t dest_offset = 0;
> +	size_t i;
> +	size_t more;
> +
> +	if (destlen < base64_decoded_length(srclen)) {
> +		errno = EOVERFLOW;
> +		return -1;
> +	}
> +
> +	for(i=0; srclen - i > 4; i+=4) {

`i + 4 < srclen` would be nicer re. underflow (though I looks fine
here).

> +		if (base64_decode_quartet_using_alphabet(alphabet, &dest[dest_offset], &src[i]) == -1) {
> +			return -1;
> +		}
> +		dest_offset += 3;
> +	}
> +
> +	more = base64_decode_tail_using_alphabet(alphabet, &dest[dest_offset], &src[i], srclen - i);
> +	if (more == -1) {
> +		return -1;
> +	}
> +	dest_offset += more;
> +
> +	memset(&dest[dest_offset], '\0', destlen-dest_offset);
> +
> +	return dest_offset;
> +}
> +
> +
> +/* the rfc4648 functions: */
> +#define base64_decode_map_rfc4648					\
> +  "~~~~~" /* 0 */							\
> +  "~~~~~" /* 5 */							\
> +  "~~~~~" /* 10 */							\
> +  "~~~~~" /* 15 */							\
> +  "~~~~~" /* 20 */							\
> +  "~~~~~" /* 25 */							\
> +  "~~~~~" /* 30 */							\
> +  "~~~~~" /* 35 */							\
> +  "~~~\x3e~" /* 40 */							\
> +  "~~\x3f\x34\x35" /* 45 */						\
> +  "\x36\x37\x38\x39\x3a" /* 50 */					\
> +  "\x3b\x3c\x3d~~" /* 55 */						\
> +  "~~~~~" /* 60 */							\
> +  "\x00\x01\x02\x03\x04" /* 65 A */					\
> +  "\x05\x06\x07\x08\x09" /* 70 */					\
> +  "\x0a\x0b\x0c\x0d\x0e" /* 75 */					\
> +  "\x0f\x10\x11\x12\x13" /* 80 */					\
> +  "\x14\x15\x16\x17\x18" /* 85 */					\
> +  "\x19~~~~" /* 90 */							\
> +  "~~\x1a\x1b\x1c" /* 95 */						\
> +  "\x1d\x1e\x1f\x20\x21" /* 100 */					\
> +  "\x22\x23\x24\x25\x26" /* 105 */					\
> +  "\x27\x28\x29\x2a\x2b" /* 110 */					\
> +  "\x2c\x2d\x2e\x2f\x30" /* 115 */					\
> +  "\x31\x32\x33~~" /* 120 */						\
> +  "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 125 */	\
> +  "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 175 */	\
> +  "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 225 */
> +
> +static const base64_alphabet_t alphabet_rfc4648_pregen = {

Why not export this one?

> +  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
> +  base64_decode_map_rfc4648
> +};
> +
> +#undef base64_decode_map_rfc4648
> +

Perhaps the following functions should be `static inline` in the header?
Otherwise in some cases the function call can't be inlined away. It also
makes it clear these are just simple wrappers.

> +int base64_decode_quartet(char dest[3], const char src[4])
> +{
> +	return base64_decode_quartet_using_alphabet(&alphabet_rfc4648_pregen,
> +						    dest, src);
> +}
> +size_t base64_decode_tail(char dest[3], const char *src, const size_t srclen)
> +{
> +	return base64_decode_tail_using_alphabet(&alphabet_rfc4648_pregen,
> +						 dest, src, srclen);
> +}
> +size_t base64_decode(char *dest, const size_t destlen, const char *src, const size_t srclen) {
> +	return base64_decode_using_alphabet(&alphabet_rfc4648_pregen,
> +					    dest, destlen, src, srclen);
> +}
> +
> +
> +
> +void base64_encode_triplet(char dest[4], const char src[3]) {
> +	base64_encode_triplet_using_alphabet(&alphabet_rfc4648_pregen,
> +					     dest, src);
> +}
> +void base64_encode_tail(char dest[4], const char *src, const size_t srclen) {
> +	base64_encode_tail_using_alphabet(&alphabet_rfc4648_pregen,
> +					  dest, src, srclen);
> +}
> +size_t base64_encode(char *dest, const size_t destlen, const char *src, const size_t srclen) {
> +	return base64_encode_using_alphabet(&alphabet_rfc4648_pregen,
> +					    dest, destlen, src, srclen);
> +}
> +
> +/* end rfc4648 functions */
> diff --git a/ccan/base64/base64.h b/ccan/base64/base64.h
> new file mode 100644
> index 0000000..929a9d4
> --- /dev/null
> +++ b/ccan/base64/base64.h
> @@ -0,0 +1,200 @@
> +/* Licensed under LGPLv2+ - see LICENSE file for details */
> +#ifndef CCAN_BASE64_H
> +#define CCAN_BASE64_H
> +
> +#include <stdio.h> /* For size_t */

If you only need `size_t`, you can include stddef.h instead.

> +
> +
> +/**
> + * base64_alphabet_rfc4648 - the base64 alphabet as defined in rfc4648
> + */
> +static const char base64_alphabet_rfc4648[] =

I'd put an explicit length here, though there's no reason to...

> +	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
> +
> +/**
> + * base64_alphabet_t - structure to hold maps for encode/decode
> + */
> +typedef struct {
> +	char encode_map[64];
> +	char decode_map[256];
> +} base64_alphabet_t;
> +
> +/**
> + * base64_encoded_length - Calculate encode buffer length
> + * @param srclen the size of the data to be encoded
> + * @note add 1 to this to get null-termination
> + * @return Buffer length required for encode
> + */
> +size_t base64_encoded_length(size_t srclen);
> +
> +/**
> + * base64_encode - Encode a buffer into base64 according to rfc4648
> + * @param dest Buffer to encode into
> + * @param destlen Length of the destination buffer
> + * @param src Buffer to encode
> + * @param srclen Length of the data to encode
> + * @return Number of encoded bytes set in dest. -1 on error (and errno set)

Maybe return `-ERRORCODE` on error, instead of -1 (errno is annoying to
use). What errors can happen here? (Also for other functions).

> + * @note dest will be nul-padded to destlen
> + *
> + * This function encodes src according to http://tools.ietf.org/html/rfc4648
> + *
> + * Example:
> + *	size_t encoded_length;
> + *	char dest[100];
> + *	const char *src = "This string gets encoded";
> + *	encoded_length = base64_encode(dest, sizeof(dest), src, strlen(src));
> + *	printf("Returned data of length %zd @%p\n", encoded_length, &dest);
> + */
> +size_t base64_encode(char *dest, const size_t destlen, const char *src, const size_t srclen);

`const size_t foo` parameter is a bit funny-looking.

Have you thought about adding a convenience function which only takes
src+srclen and returns a malloc'd buffer? This seems to be a pretty
common use case, e.g. in the example in `_info`. Also for decode.

> +
> +/**
> + * base64_decoded_length - Calculate decode buffer length
> + * @param srclen Length of the data to be decoded
> + * @note This does not return the size of the decoded data!  see base64_decode
> + * @return Minimum buffer length for safe decode
> + */
> +size_t base64_decoded_length(size_t srclen);
> +
> +/**
> + * base64_decode - decode An rfc4648 base64-encoded string
> + * @param dest Buffer to decode into
> + * @param destlen Length of the destination buffer
> + * @param src Buffer to decode
> + * @param srclen Length of the data to decode
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + * @note dest will be nul-padded to destlen

Since base-64 pads with `=` in the last bits this can give the wrong
impression!
In any case why should `dest` be nul-padded? (I always thought strncpy
was bad for doing that unless you write out directory entries in a
filesystem or something).

> + *
> + * This function decodes the buffer according to
> + * http://tools.ietf.org/html/rfc4648
> + *
> + * Example:
> + *	size_t decoded_length;
> + *	char ret[100];
> + *	const char *src = "Zm9vYmFyYmF6";
> + *	decoded_length = base64_decode(ret, sizeof(ret), src, strlen(src));
> + *	printf("Returned data of length %zd @%p\n", decoded_length, &ret);
> + */
> +size_t base64_decode(char *dest, const size_t destlen,
> +		     const char *src, const size_t srclen);
> +
> +
> +/**
> + * base64_encode_triplet - encode 3 bytes into base64
> + * @param dest Buffer containing at least 4 bytes

I'd remove the "at least" here, to match the type `char dest[4]`.

> + * @param src Buffer containing 3 bytes
> + */
> +void base64_encode_triplet(char dest[4], const char src[3]);

These sets of functions with tail, triplet, quartet seem like
implementation related and shouldn't be exposed as API (unless they're
useful for something I can't think of).

> +
> +/**
> + * base64_encode_tail - encode the final bytes of a source
> + * @param dest Buffer containing at least 4 bytes
> + * @param src Buffer containing at least srclen bytes
> + * @param srclen Number of bytes to encode in src
> + */
> +void base64_encode_tail(char dest[4], const char *src, const size_t srclen);
> +
> +
> +/**
> + * base64_decode_quartet - decode the first 4 characters in src into dest
> + * @param dest Buffer containing at least 3 bytes
> + * @param src Buffer containing at least 4 characters
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + */
> +int base64_decode_quartet(char dest[3], const char src[4]);
> +
> +/**
> + * @brief decode the final bytes of a base64 string from src into dest
> + * @param dest Buffer containing 3 bytes
> + * @param src Buffer containing 4 bytes - padded with '=' as required
> + * @param srclen Number of bytes to decode in src
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + */
> +size_t base64_decode_tail(char dest[3], const char src[4], const size_t srclen);
> +
> +
> +/**
> + * base64_encode_triplet_using_alphabet - encode 3 bytes into base64 using a specific alphabet
> + * @param alphabet Alphabet to use for encoding
> + * @param dest Buffer containing at least 3 bytes
> + * @param src Buffer containing at least 4 characters
> + */
> +void base64_encode_triplet_using_alphabet(const base64_alphabet_t *alphabet,
> +					  char dest[4], const char src[3]);
> +
> +/**
> + * base64_encode_tail_using_alphabet - encode the final bytes of a source using a specific alphabet
> + * @param alphabet Alphabet to use for encoding
> + * @param dest Buffer containing at least 4 bytes
> + * @param src Buffer containing at least srclen bytes
> + * @param srclen Number of bytes to encode in src
> + */
> +void base64_encode_tail_using_alphabet(const base64_alphabet_t *alphabet,
> +				       char dest[4],
> +				       const char *src, const size_t srclen);
> +
> +/**
> + * base64_encode_using_alphabet - encode a buffer into base64 using a specific alphabet
> + * @param alphabet Alphabet to use for encoding
> + * @param dest Buffer to encode into
> + * @param destlen Length of dest
> + * @param src Buffer to encode
> + * @param srclen Length of the data to encode
> + * @return Number of encoded bytes set in dest. -1 on error (and errno set)
> + * @note dest will be nul-padded to destlen
> + */
> +size_t base64_encode_using_alphabet(const base64_alphabet_t *alphabet,
> +				    char *dest, const size_t destlen,
> +				    const char *src, const size_t srclen);
> +
> +/**
> + * base64_init_alphabet - populate a base64_alphabet_t based on a supplied map
> + * @param dest A base64 alphabet
> + * @param src Map to populate the alphabet from (e.g. base64_alphabet_rfc4648)
> + */
> +void base64_init_alphabet(base64_alphabet_t *dest, const char src[64]);
> +
> +/*
> + * base64_char_in_alphabet - returns true if character can be part of an encoded string
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param b64char Character to check
> + */
> +int base64_char_in_alphabet(const base64_alphabet_t *alphabet,
> +			    const char b64char);

I'm assuming this is for checking if an encoded string is valid? If so,
wouldn't it be more straightforward to have a `validate` function (i.e.
just checks if the string is OK without actual decoding).

Ran

> +/**
> + * base64_decode_using_alphabet - decode a base64-encoded string using a specific alphabet
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param dest Buffer to decode into
> + * @param destlen length of dest
> + * @param src the buffer to decode
> + * @param srclen the length of the data to decode
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + * @note dest will be nul-padded to destlen
> + */
> +size_t base64_decode_using_alphabet(const base64_alphabet_t *alphabet,
> +				    char *dest, const size_t destlen,
> +				    const char *src, const size_t srclen);
> +
> +/**
> + * base64_decode_quartet_using_alphabet - decode 4 bytes from base64 using a specific alphabet
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param dest Buffer containing at least 3 bytes
> + * @param src Buffer containing at least 4 characters
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + */
> +int base64_decode_quartet_using_alphabet(const base64_alphabet_t *alphabet,
> +					 char dest[3], const char src[4]);
> +
> +/**
> + * base64_decode_tail_using_alphabet - decode the final bytes of a base64 string using a specific alphabet
> + * @param alphabet A base 64 alphabet (see base64_init_alphabet)
> + * @param dest Buffer of at least 3 bytes
> + * @param src Buffer containing 4 bytes - padded with '=' as required
> + * @param srclen Number of bytes to decode in src
> + * @return Number of decoded bytes set in dest. -1 on error (and errno set)
> + */
> +int base64_decode_tail_using_alphabet(const base64_alphabet_t *alphabet,
> +				      char *dest,
> +				      const char *src, const size_t srclen);
> +
> +#endif /* CCAN_BASE64_H */
> diff --git a/ccan/base64/test/moretap.h b/ccan/base64/test/moretap.h
> new file mode 100644
> index 0000000..114445c
> --- /dev/null
> +++ b/ccan/base64/test/moretap.h
> @@ -0,0 +1,96 @@
> +#ifndef _BASE64_MORETAP_H
> +#define _BASE64_MORETAP_H
> +
> +#include <ccan/str/str.h>
> +
> +/**
> + * is_str - OK if strings are equal
> + * @e1: expression for the variable string
> + * @e2: expression for the expected string
> + *
> + * If the strings are equal, the test passes.
> + *
> + * Example:
> + *     is_str(give_me_a_fred(),"fred");
> + */
> +static void _is_str(char *got,const char *expected, const char *got_string, const char *expected_string, const char *func, const char *file, int line) {
> +	if (streq(expected,got)) {
> +		_gen_result(1, func, file, line,"%s eq %s",
> +			    got_string,expected_string);
> +	} else {
> +		_gen_result(0, func, file, line,"%s eq %s",
> +			    got_string,expected_string);
> +		diag("Expected: %s",expected);
> +		diag("     Got: %s",got);
> +	}
> +}
> +# define is_str(got,expected) _is_str(got,expected,#got,#expected,__func__, __FILE__, __LINE__)
> +
> +
> +/**
> + * is_int - OK if arguments are equal when cast to integers
> + * @e1: expression for the number
> + * @e2: expression for the expected number
> + *
> + * If the numbers are equal, the test passes.
> + *
> + * Example:
> + *     is_int(give_me_17(),17);
> + */
> +# define is_int(e1,e2 ...)						\
> +  (((int)e1)==((int)e2) ?						\
> +   _gen_result(1, __func__, __FILE__, __LINE__,"%s == %s",#e1,#e2) :	\
> +   (_gen_result(0, __func__, __FILE__, __LINE__,"%s == %s",#e1,#e2)) || (diag("Expected: %d",e2),diag("     Got: %d",e1),0)) /* diag is void; note commas. */
> +
> +
> +
> +/**
> + * is_mem - OK if arguments are identical up to length @e3
> + * @e1: expression for the buffer
> + * @e2: expression for the expected buffer
> + * @e2: length to compare in buffers
> + *
> + * If the buffers are equal up to @e2, the test passes.
> + *
> + * Example:
> + *     is_mem(give_me_foo(),"foo",3);
> + */
> +static void _is_mem(const char *got, const char *expected, const size_t len,
> +	      const char *got_string, const char *expected_string, const char *len_string,
> +	      const char *func, const char *file, int line) {
> +	size_t offset = 0;
> +
> +	for (offset=0; offset<len; offset++) {
> +		if (got[offset] != expected[offset]) {
> +			_gen_result(0, func, file, line,"%s eq %s",got_string,expected_string);
> +			/* diag("Expected: %s",e2); */
> +			/* diag("     Got: %s",e1); */
> +			diag("Buffers differ at offset %zd (got=0x%02x expected=0x%02x)",
> +			     offset,got[offset],expected[offset]);
> +			return;
> +		}
> +	}
> +
> +	_gen_result(1, __func__, __FILE__, __LINE__,"%s eq %s",
> +		    expected_string,got_string);
> +}
> +# define is_mem(got,expected,len) \
> +	_is_mem(got,expected,len,#got,#expected,#len,__func__, __FILE__, __LINE__)
> +
> +/**
> + * is_size_t - OK if arguments are equal when cast to size_t
> + * @e1: expression for the number
> + * @e2: expression for the expected number
> + *
> + * If the numbers are equal, the test passes.
> + *
> + * Example:
> + *     is_size_t(give_me_17(),17);
> + */
> +# define is_size_t(e1,e2 ...)						\
> +  ((size_t)(e1)==((size_t)e2) ?						\
> +   _gen_result(1, __func__, __FILE__, __LINE__,"%s == %s",#e1,#e2) :	\
> +   (_gen_result(0, __func__, __FILE__, __LINE__,			\
> +		"%s == %s",#e1,#e2)) || (diag("Expected: %zd",(size_t)e2),diag("     Got: %zd",(size_t)e1),0)) /* diag is void; note commas. */
> +
> +#endif
> diff --git a/ccan/base64/test/run.c b/ccan/base64/test/run.c
> new file mode 100644
> index 0000000..2159409
> --- /dev/null
> +++ b/ccan/base64/test/run.c
> @@ -0,0 +1,351 @@
> +/* Start of run.c test */
> +#include "config.h"
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include <ccan/base64/base64.h>
> +#include <ccan/tap/tap.h>
> +
> +#include <ccan/base64/base64.c>
> +#include "moretap.h"
> +
> +static void * xmalloc(size_t size);
> +
> +/* not defined in terms of test_encode_using_alphabet so we cross
> +   appropriate paths in library */
> +#define test_encode(src,srclen,expected)			\
> +	do {							\
> +		size_t destlen;					\
> +		char * dest;					\
> +		destlen = base64_encoded_length(srclen);	\
> +		destlen++; /* null termination */		\
> +		dest = xmalloc(destlen);			\
> +		ok1(base64_encode(dest,destlen,src,srclen) != -1);	\
> +		is_str(dest,expected);					\
> +		free(dest);						\
> +	} while (0)
> +
> +#define test_encode_using_alphabet(alphastring,src,srclen,expected)	\
> +	do {								\
> +		size_t destlen;						\
> +		char * dest;						\
> +		base64_alphabet_t alphabet;				\
> +		base64_init_alphabet(&alphabet,alphastring);		\
> +		destlen = base64_encoded_length(srclen);		\
> +		destlen++; /* null termination */		\
> +		dest = xmalloc(destlen);				\
> +		ok1(base64_encode_using_alphabet(&alphabet,dest,destlen,src,srclen) != -1); \
> +		is_str(dest,expected);					\
> +		free(dest);						\
> +	} while (0)
> +
> +/* not defined in terms of test_decode_using_alphabet so we cross
> +   appropriate paths in library */
> +#define test_decode(src,srclen,expected,expectedlen)			\
> +	do {								\
> +		size_t destlen;						\
> +		size_t bytes_used;					\
> +		char * dest;						\
> +		destlen = base64_decoded_length(srclen);		\
> +		dest = xmalloc(destlen);				\
> +		ok1((bytes_used = base64_decode(dest,destlen,src,srclen)) != -1); \
> +		is_size_t(bytes_used,expectedlen);			\
> +		is_mem(dest,expected,bytes_used);			\
> +		free(dest);						\
> +	} while (0)
> +
> +#define test_decode_using_alphabet(alphastring,src,srclen,expected,expectedlen) \
> +	do {								\
> +		size_t destlen;						\
> +		size_t bytes_used;					\
> +		char * dest;						\
> +		base64_alphabet_t alphabet;				\
> +									\
> +		base64_init_alphabet(&alphabet,alphastring);		\
> +		destlen = base64_decoded_length(srclen);		\
> +		dest = xmalloc(destlen);				\
> +		ok1((bytes_used = base64_decode_using_alphabet(&alphabet,dest,destlen,src,srclen)) != -1); \
> +		is_size_t(bytes_used,expectedlen);			\
> +		is_mem(dest,expected,bytes_used);			\
> +		free(dest);						\
> +	} while (0)
> +
> +#define check_bad_range_decode(stuff_to_test,stufflen)	\
> +do {							\
> +	char dest[10];							\
> +	errno = 0;							\
> +	is_size_t(base64_decode(dest,sizeof(dest),stuff_to_test,(size_t)stufflen), \
> +		  (size_t)-1);						\
> +	is_int(errno,ERANGE);						\
> +} while (0)
> +
> +int
> +main(int argc, char *argv[])
> +{
> +	plan_tests(131);
> +
> +	is_size_t(base64_encoded_length(0),(size_t)0);
> +	is_size_t(base64_encoded_length(1),(size_t)4);
> +	is_size_t(base64_encoded_length(2),(size_t)4);
> +	is_size_t(base64_encoded_length(3),(size_t)4);
> +	is_size_t(base64_encoded_length(512),(size_t)684);
> +
> +	/* straight from page 11 of http://tools.ietf.org/html/rfc4648 */ 
> +	test_encode("",0,"");
> +	test_encode("f",1,"Zg==");
> +	test_encode("fo",2,"Zm8=");
> +
> +	test_encode("foo",3,"Zm9v");
> +	test_encode("foob",4,"Zm9vYg==");
> +	test_encode("fooba",5,"Zm9vYmE=");
> +	test_encode("foobar",6,"Zm9vYmFy");
> +
> +	/* a few more */
> +	test_encode("foobarb",7,"Zm9vYmFyYg==");
> +	test_encode("foobarba",8,"Zm9vYmFyYmE=");
> +	test_encode("foobarbaz",9,"Zm9vYmFyYmF6");
> +
> +	test_encode("foobart",7,"Zm9vYmFydA==");
> +
> +	test_encode("abcdefghijklmnopqrstuvwxyz",26,"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=");
> +	test_encode("\x05\x05\x01\x00\x07",5,"BQUBAAc=");
> +
> +	test_encode("FOO",3,"Rk9P");
> +	test_encode("Z",1,"Wg==");
> +
> +	/* decode testing */
> +
> +	test_decode("",0,"",0);
> +	test_decode("Zg==",4,"f",1);
> +	test_decode("Zm8=",4,"fo",2);
> +	test_decode("Zm9v",4,"foo",3);
> +	test_decode("Zm9vYg==",8,"foob",4);
> +	test_decode("Zm9vYmE=",8,"fooba",5);
> +	test_decode("Zm9vYmFy",8,"foobar",6);
> +	test_decode("Zm9vYmFyYg==",12,"foobarb",7);
> +	test_decode("Zm9vYmFyYmE=",12,"foobarba",8);
> +	test_decode("Zm9vYmFyYmF6",12,"foobarbaz",9);
> +
> +	test_decode("Rk9P",4,"FOO",3);
> +
> +	test_decode("Wg==",4,"Z",1);
> +	test_decode("AA==",4,"\0",1);
> +	test_decode("AAA=",4,"\0\0",2);
> +
> +	{
> +		const char *binary = "\x01\x00\x03";
> +		const size_t binarylen = 3;
> +		
> +		char * decoded;
> +		char * encoded;
> +		size_t encoded_len;
> +		size_t decoded_len;
> +		size_t decoded_space_required;
> +
> +		size_t encoded_space_required = base64_encoded_length(binarylen);
> +		encoded_space_required++; /* null termination */
> +		encoded = xmalloc(encoded_space_required);
> +		encoded_len = base64_encode(encoded,encoded_space_required,binary,binarylen);
> +		is_mem(encoded,"AQAD",encoded_len);
> +
> +		decoded_space_required = base64_decoded_length(encoded_len);
> +		decoded = xmalloc(decoded_space_required);
> +		decoded_len = base64_decode(decoded,decoded_space_required,encoded,encoded_len);
> +		is_size_t(decoded_len,binarylen);
> +		is_mem(binary,decoded,decoded_len);
> +	}
> +
> +	/* some expected encode failures: */
> +	{
> +		size_t destlen = 1;
> +		char dest[destlen];
> +		errno = 0;
> +		is_size_t(base64_encode(dest,destlen,"A",1),(size_t)-1);
> +		is_int(errno,EOVERFLOW);
> +	}
> +
> +	/* some expected decode failures: */
> +	{
> +		base64_alphabet_t alphabet;
> +		const char * src = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
> +		base64_init_alphabet(&alphabet,src);
> +
> +		is_int(sixbit_from_b64(&alphabet,'\xfe'),(signed char)-1);
> +		is_int(errno,ERANGE);
> +	}
> +	{
> +		size_t destlen = 10;
> +		char dest[destlen];
> +		errno = 0;
> +		is_size_t(base64_decode(dest,destlen,"A",1),(size_t)-1);
> +		is_int(errno,EINVAL);
> +	}
> +	{
> +		size_t destlen = 1;
> +		char dest[destlen];
> +		errno = 0;
> +		is_size_t(base64_decode(dest,destlen,"A",1),(size_t)-1);
> +		is_int(errno,EOVERFLOW);
> +	}
> +	{
> +		/* (char)1 is not a valid base64 character: */
> +		check_bad_range_decode("A\x01",2);
> +		/* (char)255 is not a valid base64 character: (char is signed on most platforms, so this is actually < 0 */
> +		check_bad_range_decode("\xff""A",2);
> +		check_bad_range_decode("A\xff",2);
> +		check_bad_range_decode("AA\xff",3);
> +		check_bad_range_decode("A\xff""A",3);
> +		check_bad_range_decode("\xff""AA",3);
> +		check_bad_range_decode("AAA\xff",4);
> +		check_bad_range_decode("\xff\x41\x41\x41\x41",5);
> +		check_bad_range_decode("A\xff\x41\x41\x41\x41",6);
> +		check_bad_range_decode("AA\xff\x41\x41\x41\x41",7);
> +		check_bad_range_decode("AAA\xff\x41\x41\x41\x41",8);
> +	}
> +	/* trigger some failures in the sixbit-to-b64 encoder: */
> +	/* this function now aborts rather than returning -1/setting errno */
> +	/* { */
> +	/* 	is_int(sixbit_to_b64(base64_alphabet_rfc4648,'\x70'),(char)-1); */
> +	/* 	is_int(sixbit_to_b64(base64_alphabet_rfc4648,'\xff'),(char)-1); */
> +	/* } */
> +	/* following tests all of the mapping from b64 chars to 6-bit values: */
> +	test_decode("//+FwHRSRIsFU2IhAEGD+AMPhOA=",28,"\xff\xff\x85\xc0\x74\x52\x44\x8b\x05\x53\x62\x21\x00\x41\x83\xf8\x03\x0f\x84\xe0",20);
> +	test_encode("\xff\xff\x85\xc0\x74\x52\x44\x8b\x05\x53\x62\x21\x00\x41\x83\xf8\x03\x0f\x84\xe0",20,"//+FwHRSRIsFU2IhAEGD+AMPhOA=");
> +
> +
> +	/* check the null-padding stuff */
> +	{
> +		size_t destlen = 8;
> +		char dest[destlen];
> +		memset(dest,'\1',sizeof(dest));
> +		is_size_t(base64_encode(dest,destlen,"A",1),(size_t)4);
> +		is_mem(&dest[4],"\0\0\0\0",4);
> +	}
> +	{
> +		size_t destlen = 3;
> +		char dest[destlen];
> +		memset(dest,'\1',sizeof(dest));
> +		is_size_t(base64_decode(dest,destlen,"Wg==",4), 1);
> +		is_mem(&dest[1],"\0",2);
> +	}
> +
> +	/* test encoding using different alphabets */
> +	{ 
> +		char alphabet_fs_safe[64];
> +		memcpy(alphabet_fs_safe,base64_alphabet_rfc4648,sizeof(alphabet_fs_safe));
> +		alphabet_fs_safe[62] = '-';
> +		alphabet_fs_safe[63] = '_';
> +		test_encode_using_alphabet(alphabet_fs_safe,"\xff\xff\x85\xc0\x74\x52\x44\x8b\x05\x53\x62\x21\x00\x41\x83\xf8\x03\x0f\x84\xe0",20,"__-FwHRSRIsFU2IhAEGD-AMPhOA=");
> +	}
> +
> +	/* test decoding using different alphabets */
> +	{ 
> +		char alphabet_fs_safe[64];
> +		#define src "__-FwHRSRIsFU2IhAEGD-AMPhOA="
> +		#define expected "\xff\xff\x85\xc0\x74\x52\x44\x8b\x05\x53\x62\x21\x00\x41\x83\xf8\x03\x0f\x84\xe0"
> +
> +		memcpy(alphabet_fs_safe,base64_alphabet_rfc4648,sizeof(alphabet_fs_safe));
> +		alphabet_fs_safe[62] = '-';
> +		alphabet_fs_safe[63] = '_';
> +
> +		test_decode_using_alphabet(alphabet_fs_safe,src,strlen(src),expected,20);
> +		#undef src
> +		#undef expected
> +	}
> +
> +	/* explicitly test the non-alphabet encode_triplet and
> +	   encode_tail functions */
> +	{
> +		size_t destlen = 4;
> +		char dest[destlen];
> +		const char *src = "AB\04";
> +		memset(dest,'\1',sizeof(dest));
> +		base64_encode_triplet(dest,src);
> +		is_mem(dest,"QUIE",sizeof(dest));
> +	}
> +	{
> +		size_t destlen = 4;
> +		char dest[destlen];
> +		const char *src = "A";
> +		memset(dest,'\1',sizeof(dest));
> +		base64_encode_tail(dest,src,strlen(src));
> +		is_mem(dest,"QQ==",sizeof(dest));
> +	}
> +
> +	/* test the alphabet inversion */
> +	{
> +		base64_alphabet_t dest;
> +		const char expected_inverse[] =
> +			"~~~~~" /* 0 */
> +			"~~~~~" /* 5 */
> +			"~~~~~" /* 10 */
> +			"~~~~~" /* 15 */
> +			"~~~~~" /* 20 */
> +			"~~~~~" /* 25 */
> +			"~~~~~" /* 30 */
> +			"~~~~~" /* 35 */
> +			"~~~\x3e~" /* 40 */
> +			"~~\x3f" /* 45 - */
> +			"\x34\x35" /* 48 0 */
> +			"\x36\x37\x38\x39\x3a" /* 50 */
> +			"\x3b\x3c\x3d~~" /* 55 */
> +			"~~~~~" /* 60 */
> +			"\x00\x01\x02\x03\x04" /* 65 A */
> +			"\x05\x06\x07\x08\x09" /* 70 */
> +			"\x0a\x0b\x0c\x0d\x0e" /* 75 */
> +			"\x0f\x10\x11\x12\x13" /* 80 */
> +			"\x14\x15\x16\x17\x18" /* 85 */
> +			"\x19~~~~" /* 90 */
> +			"~~" /* 95 _ */
> +			"\x1a\x1b\x1c" /* 97 a */
> +			"\x1d\x1e\x1f\x20\x21" /* 100 */
> +			"\x22\x23\x24\x25\x26" /* 105 */
> +			"\x27\x28\x29\x2a\x2b" /* 110 */
> +			"\x2c\x2d\x2e\x2f\x30" /* 115 */
> +			"\x31\x32\x33~~" /* 120 */
> +			"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 125 */
> +			"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 175 */
> +			"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" /* 225 */
> +			;
> +		const char * src = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
> +		base64_init_alphabet(&dest, src);
> +		is_mem(dest.decode_map, expected_inverse, 256);
> +		ok1(base64_char_in_alphabet(&dest,'A'));
> +		ok1(!base64_char_in_alphabet(&dest,'\n'));
> +	}
> +
> +	/* explicitly test the non-alpha decode_tail and decode_quartet */
> +	{
> +		char dest[4];
> +		const char *src = "QQ==";
> +		const char * expected = "A";
> +		memset(dest, '%', sizeof(dest));
> +		base64_decode_tail(dest,src,4);
> +		is_mem(dest, expected, 1);
> +	}
> +	{
> +		char dest[4];
> +		const char *src = "Zm9v";
> +		const char * expected = "foo";
> +		memset(dest, '%', sizeof(dest));
> +		base64_decode_quartet(dest,src);
> +		is_mem(dest, expected, 1);
> +	}
> +
> +	exit(exit_status());
> +}
> +
> +static void * xmalloc(size_t size)
> +{
> +	char * ret;
> +	ret = malloc(size);
> +	if (ret == NULL) {
> +		perror("malloc");
> +		abort();
> +	}
> +	return ret;
> +}
> +
> +/* End of run.c test */
> -- 
> 1.7.10.4
> 
> _______________________________________________
> ccan mailing list
> ccan at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/ccan