<div class="gmail_quote"><div dir="ltr"><div><div><div>Hi Rusty,<br></div><div>Thanks for the helpful suggestions, Ive sent another pull request with updates to the code based on your suggestions, please let me know what you think. <br>
</div><br></div>
I addressed the truncation problem by refactoring some of the code to allow for a tok_itr_val_len function which can be called before a call to tok_itr_val such that the user of the module can allocate a correctly sized buffer on their own. this also allows for another higher level module to be built on top of the original one that captures some of the simplicity of your suggestion which defined a fundamental tok_itr(...) function:<br>

<br>/**                                                                                                                       <br> * tok_stream - pulls tokens out of a stream of bytes                                                                <br>

 *                                                                                                                       <br> * Example:                                                                                                                <br>

 *  const char *input[] = {                                                                                                  <br> *    "A black hole is a region of sp",                                                                                      <br>

 *    "acetime from which gravity pre",                                                                                        <br> *    "vents anything, including ligh",                                                                                        <br>

 *    "t, from escaping."                                                                                                      <br> *  };                                                                                                                           <br>

 *  char *tokens[32];                                                                                                            <br> *  int i, tok_idx;                                                                                                              <br>

 *  struct tok_stream ts;                                                                                                      <br> *                                                                                                                           <br>

 *  tok_stream_init(&ts, ' ');                                                                                           <br> *  tok_idx = 0;                                                                                                     <br>

 *  for(i = 0; i < 4; i++) {                                                                                 <br> *    tok_stream_input(&ts, input[i]);                                                                       <br>

 *    while(tok_stream(&ts, &tokens[tok_idx]) == 0) {                                                        <br> *      printf("token '%s'\n", tokens[tok_idx]);                                                             <br>

 *      tok_idx++;                                                                                           <br> *    }                                                                                                      <br>

 *  }                                                                                                        <br> *                        <br> *  if(tok_stream_free(&ts, &tokens[tok_idx])) {<br> *    printf("last token '%s'\n", tokens[tok_idx]);<br>

 *    tok_idx++;<br> *  }<br> *<br> */<br><br>struct tok_stream {                                                                                                      <br>        struct tok_itr itr;                                                                                          <br>

        char *partial_tok;<br>        size_t partial_tok_len;<br>        char delim;<br>};<br><br>void tok_stream_init(struct tok_stream *ts, char delim) {<br>    ts->partial_tok = NULL;<br>    ts->partial_tok_len = 0;<br>

    ts->delim = delim;<br>}<br><br>void tok_stream_input(struct tok_stream *ts, const char *input) {<br>        tok_itr_init(&ts->itr, input, ts->delim);<br>}<br><br>bool tok_stream_free(struct tok_stream *ts, char **remainder) {<br>

    if(ts->partial_tok != NULL) {<br>        if(remainder == NULL)<br>            free(ts->partial_tok);<br>        else<br>            *remainder = ts->partial_tok;<br><br>        return true;<br>    }<br><br>    return false;<br>

}<br><br>static int add_val(const struct tok_stream *ts, char **base, size_t *len) {<br>    size_t offset, val_len;<br>    char *tmp;<br><br>    /* add one for the null byte */<br>    val_len = tok_itr_val_len(&ts->itr) + 1;<br>

    if(*base == NULL) {<br>        offset = 0;<br>        *base = malloc(sizeof(char) * val_len);<br>        if(*base == NULL)<br>            return -1;<br>    }<br>    else {<br>        /* subtract 1 from new len b.c. we will<br>

         * overwrite the null byte that terminates base. */<br>        offset = *len - 1;<br><br>        tmp = realloc(*base, sizeof(char)*(offset + val_len));<br>        if(tmp == NULL)<br>            return -1;<br><br>
        *base = tmp;<br>
    }<br><br>    tok_itr_val(&ts->itr, *base + offset, val_len);<br>    *len = offset + val_len;<br>    return 0;<br>}<br><br>int tok_stream(struct tok_stream *ts, char **token) {<br>    size_t len;<br><br>    if(tok_itr_end(&ts->itr))<br>

        return -1;<br><br>    /* if this is true then this is the last<br>     * token AND its a partial token (not empty),<br>     * so we return NULL and save this partial<br>     * token in ts->partial_tok */<br>    if(tok_itr_partial_val(&ts->itr) ) {<br>

        if(add_val(ts, &ts->partial_tok, &ts->partial_tok_len) != 0)<br>            return -2;<br><br>        return -1;<br>    }<br><br>    if(ts->partial_tok != NULL) {<br>        *token = ts->partial_tok;<br>

        len = ts->partial_tok_len;<br>    }<br>    else {<br>        *token = NULL;<br>        len = 0;<br>    }<br><br>    if(add_val(ts, token, &len) != 0)<br>        return -2;<br><br>    ts->partial_tok = NULL;<br>

    tok_itr_next(&ts->itr);<br>    return 0;<br>} /* tok_stream */<br><br><br>If you are ok with the current design of tok_itr, I could also submit 
this second module (or maybe integrate it into tok_itr) which does all 
the allocation for the user.<br></div><br><div><div>P.S. you can just call me anthony, i had forgotten to change my display name back after changing it. woops :p<br></div></div></div><div class="HOEnZb"><div class="h5"><div class="gmail_extra">
<br><br><div class="gmail_quote">
On Tue, Apr 2, 2013 at 6:03 PM, Rusty Russell <span dir="ltr"><<a href="mailto:rusty@rustcorp.com.au" target="_blank">rusty@rustcorp.com.au</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

anto canto <<a href="mailto:ar.cantor@gmail.com" target="_blank">ar.cantor@gmail.com</a>> writes:<br>
> Hello Rusty,<br>
<br>
Hi Anto!<br>
<div><div><br>
> I created a pull request on github for a small module which iterates over<br>
> token strings. If you don't want to pull it that's fine, but since I<br>
> haven't gotten a response from you on github I wanted to point it out here<br>
> on the mailing list.<br>
<br>
</div></div>Hmm, I missed another one.  Ah, github doesn't put my email address in<br>
the to: header, which was defeating my filters.  Fixed, thanks.<br>
<br>
Now, as to your module.  Since it's your first contribution, I'll give<br>
as much advice as I can.  You don't need to follow *any* of it if you<br>
don't want to, though.<br>
<br>
> +++ b/ccan/tok_itr/_info<br>
> @@ -0,0 +1,41 @@<br>
> +#include <string.h><br>
> +#include "config.h"<br>
> +<br>
> +/**<br>
> + * tok_itr - iterate over tokens in a string<br>
> + *<br>
> + * The list header contains routines for manipulating double linked lists.<br>
> + * It defines two types: struct list_head used for anchoring lists, and<br>
> + * struct list_node which is usually embedded in the structure which<br>
> is place> d<br>
> + * in the list.<br>
<br>
This is the wrong description :)<br>
<br>
> diff --git a/ccan/tok_itr/tok_itr.c b/ccan/tok_itr/tok_itr.c<br>
> new file mode 100644<br>
> index 0000000..63db8f6<br>
> --- /dev/null<br>
> +++ b/ccan/tok_itr/tok_itr.c<br>
> @@ -0,0 +1,49 @@<br>
> +/* Licensed under LGPL - see LICENSE file for details */<br>
> +#include "tok_itr.h"<br>
> +<br>
> +void tok_itr_init(struct tok_itr *itr, const char *list, char delim) {<br>
> +<br>
> +     itr->delim = delim;<br>
> +     itr->curr = list;<br>
> +}<br>
> +<br>
> +int tok_itr_end(const struct tok_itr *itr) {<br>
> +     return (itr->curr == NULL);<br>
> +}<br>
<br>
Shouldn't this return bool?<br>
<br>
> +size_t tok_itr_val(const struct tok_itr *itr, char *val, size_t size) {<br>
> +     char *next;<br>
> +     size_t amt, len;<br>
> +<br>
> +     if(size < 1)<br>
> +             return 0;<br>
<br>
I thought this function was supposed to return the length of the token,<br>
but here it's returning 0.<br>
<br>
> +     next = strchr(itr->curr, itr->delim);<br>
> +     if(next == NULL)<br>
> +             len = strlen(itr->curr);<br>
> +     else if(next == itr->curr)<br>
> +             len = 0;<br>
> +     else  /* next > itr->curr */<br>
> +             len = next - itr->curr;<br>
<br>
Note that the "if(next == itr->curr)" is redundant, as the else<br>
statement does the right thing.<br>
<br>
> +     /* only want to copy at most size - 1 bytes because<br>
> +      * we need to save the last spot for '\0' */<br>
> +     size--;<br>
> +     amt = (len > size)? size : len;<br>
> +<br>
> +     if(amt > 0)<br>
> +             strncpy(val, itr->curr, amt);<br>
<br>
This can be a memcpy, since you know amt is < strlen(itr->curr).<br>
<br>
> +<br>
> +     /* just copied bytes 0->(amt-1) for total of *amt* bytes.<br>
> +      * now add the null byte to pos at *amt*       */<br>
> +     val[amt] = '\0';<br>
> +<br>
> +     return len;<br>
<br>
I'm uncomfortable with this, because (as your examples show) it usually<br>
means people will truncate tokens.  I think we can do better.<br>
<br>
How about we allocate if the token doesn't fit in the buffer, and cache<br>
the pointer in struct tok_itr?<br>
<br>
struct tok_itr {<br>
        char delim;<br>
        const char *curr;<br>
        size_t alloc_len;<br>
        char *alloc_buf;<br>
};<br>
<br>
static inline void tok_itr_init(struct tok_itr *itr,<br>
                                const char *list, char delim)<br>
{<br>
        itr->delim = delim;<br>
        itr->curr = list;<br>
        itr->alloc_len = 0;<br>
        itr->alloc_buf = NULL;<br>
}<br>
<br>
Now we can make tok_itr the fundamental function:<br>
<br>
/* Returns buf, or an allocated buffer if it doesn't fit.  Returns NULL<br>
 * on no-more-tokens or malloc failure. */<br>
char *tok_itr(struct tok_itr *itr, char *buf, size_t buflen)<br>
{<br>
        char *next;<br>
        size_t toklen;<br>
<br>
        if (!itr->curr)<br>
                goto cleanup;<br>
<br>
        next = strchr(itr->curr, itr->delim);<br>
        if (next == NULL)<br>
                toklen = strlen(itr->curr);<br>
        else<br>
                toklen = next - itr->curr;<br>
<br>
        /* Can't fit in their buffer (incl nul terminator)? */<br>
        if (buflen < toklen + 1) {<br>
                /* Resize/allocate our internal buffer */<br>
                if (itr->alloc_len < toklen + 1) {<br>
                        char *a = realloc(itr->alloc, toklen + 1);<br>
                        if (!a)<br>
                                goto cleanup;<br>
                        itr->alloc = a;<br>
                        itr->alloc_len = toklen + 1;<br>
                }<br>
                buf = itr->alloc;<br>
        }<br>
<br>
        memcpy(buf, itr->curr, toklen);<br>
        buf[toklen] = '\0';<br>
        itr->curr = next;<br>
        return buf;<br>
<br>
cleanup:<br>
        free(itr->alloc);<br>
        return NULL;<br>
}<br>
<br>
If you really want a foreach macro, it becomes:<br>
<br>
#define TOK_ITR_FOREACH(buf, buf_size, ptr, list, delim, tok_itr_ptr)   \<br>
        for (tok_itr_init(tok_itr_ptr, list, delim),                    \<br>
                ptr = tok_itr(tok_itr_ptr, buf, buf_size);              \<br>
                ptr;                                                    \<br>
                ptr = tok_itr(tok_itr_ptr, buf, buf_size))<br>
<br>
There need to be more brackets in here to protect complex expressions,<br>
though, at minimum around (ptr) (which, admittedly, is unlikely to be a<br>
complex expression):<br>
<br>
        for (tok_itr_init(tok_itr_ptr, list, delim),                    \<br>
                (ptr) = tok_itr(tok_itr_ptr, buf, buf_size);            \<br>
                (ptr);                                                  \<br>
                (ptr) = tok_itr(tok_itr_ptr, buf, buf_size))<br>
<br>
Though I generally prefer my arguments in "context first" order, with<br>
trivia at the end, which means tok_itr_ptr and list are probably needed<br>
at the front:<br>
<br>
        TOK_ITR_FOREACH(tok_itr_ptr, list, delim, ptr, buf, buf_size)<br>
<br>
Example usage:<br>
        TOK_ITR_FOREACH(&itr, "this is a test", ' ', p, NULL, 0) {<br>
<br>
This is still a bit messy, so maybe the buffer should be in token setup,<br>
not in the iteration.  We have space in the structure, so this is easy:<br>
<br>
struct tok_itr {<br>
        char delim;<br>
        bool allocated;<br>
        const char *curr;<br>
        size_t buf_len;<br>
        char *buf;<br>
};<br>
<br>
static inline void tok_itr_init(struct tok_itr *itr, char *buf, size_t buflen,<br>
                                const char *list, char delim)<br>
{<br>
        itr->delim = delim;<br>
        itr->allocated = false;<br>
        itr->curr = list;<br>
        itr->buf_len = buflen;<br>
        itr->buf = buf;<br>
}<br>
<br>
This makes usage pretty simple, with no looping macro required.<br>
<br>
static void print_split_by_colon(const char *string)<br>
{<br>
        struct tok_itr itr;<br>
        char buf[32], *p;<br>
<br>
        tok_itr_init(&itr, buf, sizeof(buf), string, ';');<br>
        while (p = tok_itr(&itr))<br>
                printf("%s\n", p);<br>
}<br>
<br>
Cheers,<br>
Rusty.<br>
PS.  Completely untested, of course!<br>
</blockquote></div><br></div>
</div></div></div><br>