[ccan] tok_itr pull request

Sat Apr 6 09:18:38 EST 2013

Hi Rusty,
Thanks for the helpful suggestions, Ive sent another pull request with
updates to the code based on your suggestions, please let me know what you
think.

I addressed the truncation problem by refactoring some of the code to allow
for a tok_itr_val_len function which can be called before a call to
tok_itr_val such that the user of the module can allocate a correctly sized
buffer on their own. this also allows for another higher level module to be
built on top of the original one that captures some of the simplicity of
your suggestion which defined a fundamental tok_itr(...) function:

/**

 * tok_stream - pulls tokens out of a stream of
bytes
 *

 *
Example:

 *  const char *input[] =
{

 *    "A black hole is a region of
sp",

 *    "acetime from which gravity
pre",

 *    "vents anything, including
ligh",

 *    "t, from
escaping."

 *
};

 *  char
*tokens[32];

 *  int i,
tok_idx;

 *  struct tok_stream
ts;

 *

 *  tok_stream_init(&ts, '
');

 *  tok_idx =
0;

 *  for(i = 0; i < 4; i++)
{

 *    tok_stream_input(&ts,
input[i]);

 *    while(tok_stream(&ts, &tokens[tok_idx]) == 0)
{
 *      printf("token '%s'\n",
tokens[tok_idx]);

 *
tok_idx++;

 *
}

 *
}

 *
 *  if(tok_stream_free(&ts, &tokens[tok_idx])) {
 *    printf("last token '%s'\n", tokens[tok_idx]);
 *    tok_idx++;
 *  }
 *
 */

struct tok_stream
{

        struct tok_itr
itr;

        char *partial_tok;
        size_t partial_tok_len;
        char delim;
};

void tok_stream_init(struct tok_stream *ts, char delim) {
    ts->partial_tok = NULL;
    ts->partial_tok_len = 0;
    ts->delim = delim;
}

void tok_stream_input(struct tok_stream *ts, const char *input) {
        tok_itr_init(&ts->itr, input, ts->delim);
}

bool tok_stream_free(struct tok_stream *ts, char **remainder) {
    if(ts->partial_tok != NULL) {
        if(remainder == NULL)
            free(ts->partial_tok);
        else
            *remainder = ts->partial_tok;

        return true;
    }

    return false;
}

static int add_val(const struct tok_stream *ts, char **base, size_t *len) {
    size_t offset, val_len;
    char *tmp;

    /* add one for the null byte */
    val_len = tok_itr_val_len(&ts->itr) + 1;
    if(*base == NULL) {
        offset = 0;
        *base = malloc(sizeof(char) * val_len);
        if(*base == NULL)
            return -1;
    }
    else {
        /* subtract 1 from new len b.c. we will
         * overwrite the null byte that terminates base. */
        offset = *len - 1;

        tmp = realloc(*base, sizeof(char)*(offset + val_len));
        if(tmp == NULL)
            return -1;

        *base = tmp;
    }

    tok_itr_val(&ts->itr, *base + offset, val_len);
    *len = offset + val_len;
    return 0;
}

int tok_stream(struct tok_stream *ts, char **token) {
    size_t len;

    if(tok_itr_end(&ts->itr))
        return -1;

    /* if this is true then this is the last
     * token AND its a partial token (not empty),
     * so we return NULL and save this partial
     * token in ts->partial_tok */
    if(tok_itr_partial_val(&ts->itr) ) {
        if(add_val(ts, &ts->partial_tok, &ts->partial_tok_len) != 0)
            return -2;

        return -1;
    }

    if(ts->partial_tok != NULL) {
        *token = ts->partial_tok;
        len = ts->partial_tok_len;
    }
    else {
        *token = NULL;
        len = 0;
    }

    if(add_val(ts, token, &len) != 0)
        return -2;

    ts->partial_tok = NULL;
    tok_itr_next(&ts->itr);
    return 0;
} /* tok_stream */

If you are ok with the current design of tok_itr, I could also submit this
second module (or maybe integrate it into tok_itr) which does all the
allocation for the user.

P.S. you can just call me anthony, i had forgotten to change my display
name back after changing it. woops :p

On Tue, Apr 2, 2013 at 6:03 PM, Rusty Russell <rusty at rustcorp.com.au> wrote:

> anto canto <ar.cantor at gmail.com> writes:
> > Hello Rusty,
>
> Hi Anto!
>
> > I created a pull request on github for a small module which iterates over
> > token strings. If you don't want to pull it that's fine, but since I
> > haven't gotten a response from you on github I wanted to point it out
> here
> > on the mailing list.
>
> Hmm, I missed another one.  Ah, github doesn't put my email address in
> the to: header, which was defeating my filters.  Fixed, thanks.
>
> Now, as to your module.  Since it's your first contribution, I'll give
> as much advice as I can.  You don't need to follow *any* of it if you
> don't want to, though.
>
> > +++ b/ccan/tok_itr/_info
> > @@ -0,0 +1,41 @@
> > +#include <string.h>
> > +#include "config.h"
> > +
> > +/**
> > + * tok_itr - iterate over tokens in a string
> > + *
> > + * The list header contains routines for manipulating double linked
> lists.
> > + * It defines two types: struct list_head used for anchoring lists, and
> > + * struct list_node which is usually embedded in the structure which
> > is place> d
> > + * in the list.
>
> This is the wrong description :)
>
> > diff --git a/ccan/tok_itr/tok_itr.c b/ccan/tok_itr/tok_itr.c
> > new file mode 100644
> > index 0000000..63db8f6
> > --- /dev/null
> > +++ b/ccan/tok_itr/tok_itr.c
> > @@ -0,0 +1,49 @@
> > +/* Licensed under LGPL - see LICENSE file for details */
> > +#include "tok_itr.h"
> > +
> > +void tok_itr_init(struct tok_itr *itr, const char *list, char delim) {
> > +
> > +     itr->delim = delim;
> > +     itr->curr = list;
> > +}
> > +
> > +int tok_itr_end(const struct tok_itr *itr) {
> > +     return (itr->curr == NULL);
> > +}
>
> Shouldn't this return bool?
>
> > +size_t tok_itr_val(const struct tok_itr *itr, char *val, size_t size) {
> > +     char *next;
> > +     size_t amt, len;
> > +
> > +     if(size < 1)
> > +             return 0;
>
> I thought this function was supposed to return the length of the token,
> but here it's returning 0.
>
> > +     next = strchr(itr->curr, itr->delim);
> > +     if(next == NULL)
> > +             len = strlen(itr->curr);
> > +     else if(next == itr->curr)
> > +             len = 0;
> > +     else  /* next > itr->curr */
> > +             len = next - itr->curr;
>
> Note that the "if(next == itr->curr)" is redundant, as the else
> statement does the right thing.
>
> > +     /* only want to copy at most size - 1 bytes because
> > +      * we need to save the last spot for '\0' */
> > +     size--;
> > +     amt = (len > size)? size : len;
> > +
> > +     if(amt > 0)
> > +             strncpy(val, itr->curr, amt);
>
> This can be a memcpy, since you know amt is < strlen(itr->curr).
>
> > +
> > +     /* just copied bytes 0->(amt-1) for total of *amt* bytes.
> > +      * now add the null byte to pos at *amt*       */
> > +     val[amt] = '\0';
> > +
> > +     return len;
>
> I'm uncomfortable with this, because (as your examples show) it usually
> means people will truncate tokens.  I think we can do better.
>
> How about we allocate if the token doesn't fit in the buffer, and cache
> the pointer in struct tok_itr?
>
> struct tok_itr {
>         char delim;
>         const char *curr;
>         size_t alloc_len;
>         char *alloc_buf;
> };
>
> static inline void tok_itr_init(struct tok_itr *itr,
>                                 const char *list, char delim)
> {
>         itr->delim = delim;
>         itr->curr = list;
>         itr->alloc_len = 0;
>         itr->alloc_buf = NULL;
> }
>
> Now we can make tok_itr the fundamental function:
>
> /* Returns buf, or an allocated buffer if it doesn't fit.  Returns NULL
>  * on no-more-tokens or malloc failure. */
> char *tok_itr(struct tok_itr *itr, char *buf, size_t buflen)
> {
>         char *next;
>         size_t toklen;
>
>         if (!itr->curr)
>                 goto cleanup;
>
>         next = strchr(itr->curr, itr->delim);
>         if (next == NULL)
>                 toklen = strlen(itr->curr);
>         else
>                 toklen = next - itr->curr;
>
>         /* Can't fit in their buffer (incl nul terminator)? */
>         if (buflen < toklen + 1) {
>                 /* Resize/allocate our internal buffer */
>                 if (itr->alloc_len < toklen + 1) {
>                         char *a = realloc(itr->alloc, toklen + 1);
>                         if (!a)
>                                 goto cleanup;
>                         itr->alloc = a;
>                         itr->alloc_len = toklen + 1;
>                 }
>                 buf = itr->alloc;
>         }
>
>         memcpy(buf, itr->curr, toklen);
>         buf[toklen] = '\0';
>         itr->curr = next;
>         return buf;
>
> cleanup:
>         free(itr->alloc);
>         return NULL;
> }
>
> If you really want a foreach macro, it becomes:
>
> #define TOK_ITR_FOREACH(buf, buf_size, ptr, list, delim, tok_itr_ptr)   \
>         for (tok_itr_init(tok_itr_ptr, list, delim),                    \
>                 ptr = tok_itr(tok_itr_ptr, buf, buf_size);              \
>                 ptr;                                                    \
>                 ptr = tok_itr(tok_itr_ptr, buf, buf_size))
>
> There need to be more brackets in here to protect complex expressions,
> though, at minimum around (ptr) (which, admittedly, is unlikely to be a
> complex expression):
>
>         for (tok_itr_init(tok_itr_ptr, list, delim),                    \
>                 (ptr) = tok_itr(tok_itr_ptr, buf, buf_size);            \
>                 (ptr);                                                  \
>                 (ptr) = tok_itr(tok_itr_ptr, buf, buf_size))
>
> Though I generally prefer my arguments in "context first" order, with
> trivia at the end, which means tok_itr_ptr and list are probably needed
> at the front:
>
>         TOK_ITR_FOREACH(tok_itr_ptr, list, delim, ptr, buf, buf_size)
>
> Example usage:
>         TOK_ITR_FOREACH(&itr, "this is a test", ' ', p, NULL, 0) {
>
> This is still a bit messy, so maybe the buffer should be in token setup,
> not in the iteration.  We have space in the structure, so this is easy:
>
> struct tok_itr {
>         char delim;
>         bool allocated;
>         const char *curr;
>         size_t buf_len;
>         char *buf;
> };
>
> static inline void tok_itr_init(struct tok_itr *itr, char *buf, size_t
> buflen,
>                                 const char *list, char delim)
> {
>         itr->delim = delim;
>         itr->allocated = false;
>         itr->curr = list;
>         itr->buf_len = buflen;
>         itr->buf = buf;
> }
>
> This makes usage pretty simple, with no looping macro required.
>
> static void print_split_by_colon(const char *string)
> {
>         struct tok_itr itr;
>         char buf[32], *p;
>
>         tok_itr_init(&itr, buf, sizeof(buf), string, ';');
>         while (p = tok_itr(&itr))
>                 printf("%s\n", p);
> }
>
> Cheers,
> Rusty.
> PS.  Completely untested, of course!
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ozlabs.org/pipermail/ccan/attachments/20130405/1f0a9639/attachment-0001.html>