root/maint/gnulib/lib/readtokens.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_tokenbuffer
  2. get_nth_bit
  3. set_nth_bit
  4. readtoken
  5. readtokens

   1 /* readtokens.c  -- Functions for reading tokens from an input stream.
   2 
   3    Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2021 Free Software
   4    Foundation, Inc.
   5 
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10 
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15 
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  18 
  19    Written by Jim Meyering. */
  20 
  21 /* This almost supersedes xreadline stuff -- using delim="\n"
  22    gives the same functionality, except that these functions
  23    would never return empty lines. */
  24 
  25 #include <config.h>
  26 
  27 #include "readtokens.h"
  28 
  29 #include <limits.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <stdbool.h>
  34 
  35 #include "xalloc.h"
  36 
  37 #if USE_UNLOCKED_IO
  38 # include "unlocked-io.h"
  39 #endif
  40 
  41 /* Initialize a tokenbuffer. */
  42 
  43 void
  44 init_tokenbuffer (token_buffer *tokenbuffer)
     /* [previous][next][first][last][top][bottom][index][help] */
  45 {
  46   tokenbuffer->size = 0;
  47   tokenbuffer->buffer = NULL;
  48 }
  49 
  50 typedef size_t word;
  51 enum { bits_per_word = sizeof (word) * CHAR_BIT };
  52 
  53 static bool
  54 get_nth_bit (size_t n, word const *bitset)
     /* [previous][next][first][last][top][bottom][index][help] */
  55 {
  56   return bitset[n / bits_per_word] >> n % bits_per_word & 1;
  57 }
  58 
  59 static void
  60 set_nth_bit (size_t n, word *bitset)
     /* [previous][next][first][last][top][bottom][index][help] */
  61 {
  62   size_t one = 1;
  63   bitset[n / bits_per_word] |= one << n % bits_per_word;
  64 }
  65 
  66 /* Read a token from STREAM into TOKENBUFFER.
  67    A token is delimited by any of the N_DELIM bytes in DELIM.
  68    Upon return, the token is in tokenbuffer->buffer and
  69    has a trailing '\0' instead of any original delimiter.
  70    The function value is the length of the token not including
  71    the final '\0'.  Upon EOF (i.e. on the call after the last
  72    token is read) or error, return -1 without modifying tokenbuffer.
  73    The EOF and error conditions may be distinguished in the caller
  74    by testing ferror (STREAM).
  75 
  76    This function works properly on lines containing NUL bytes
  77    and on files that do not end with a delimiter.  */
  78 
  79 size_t
  80 readtoken (FILE *stream,
     /* [previous][next][first][last][top][bottom][index][help] */
  81            const char *delim,
  82            size_t n_delim,
  83            token_buffer *tokenbuffer)
  84 {
  85   int c;
  86   idx_t i;
  87   word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word];
  88 
  89   memset (isdelim, 0, sizeof isdelim);
  90   for (i = 0; i < n_delim; i++)
  91     {
  92       unsigned char ch = delim[i];
  93       set_nth_bit (ch, isdelim);
  94     }
  95 
  96   /* skip over any leading delimiters */
  97   for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream))
  98     {
  99       /* empty */
 100     }
 101 
 102   char *p = tokenbuffer->buffer;
 103   idx_t n = tokenbuffer->size;
 104   i = 0;
 105   for (;;)
 106     {
 107       if (c < 0 && i == 0)
 108         return -1;
 109 
 110       if (i == n)
 111         p = xpalloc (p, &n, 1, -1, sizeof *p);
 112 
 113       if (c < 0)
 114         {
 115           p[i] = 0;
 116           break;
 117         }
 118       if (get_nth_bit (c, isdelim))
 119         {
 120           p[i] = 0;
 121           break;
 122         }
 123       p[i++] = c;
 124       c = getc (stream);
 125     }
 126 
 127   tokenbuffer->buffer = p;
 128   tokenbuffer->size = n;
 129   return i;
 130 }
 131 
 132 /* Build a NULL-terminated array of pointers to tokens
 133    read from STREAM.  Return the number of tokens read.
 134    All storage is obtained through calls to xmalloc-like functions.
 135 
 136    %%% Question: is it worth it to do a single
 137    %%% realloc() of 'tokens' just before returning? */
 138 
 139 size_t
 140 readtokens (FILE *stream,
     /* [previous][next][first][last][top][bottom][index][help] */
 141             size_t projected_n_tokens,
 142             const char *delim,
 143             size_t n_delim,
 144             char ***tokens_out,
 145             size_t **token_lengths)
 146 {
 147   token_buffer tb, *token = &tb;
 148   char **tokens;
 149   size_t *lengths;
 150   idx_t sz, n_tokens;
 151 
 152   if (projected_n_tokens == 0)
 153     projected_n_tokens = 64;
 154   else
 155     projected_n_tokens++;       /* add one for trailing NULL pointer */
 156 
 157   sz = projected_n_tokens;
 158   tokens = xnmalloc (sz, sizeof *tokens);
 159   lengths = xnmalloc (sz, sizeof *lengths);
 160 
 161   n_tokens = 0;
 162   init_tokenbuffer (token);
 163   for (;;)
 164     {
 165       char *tmp;
 166       size_t token_length = readtoken (stream, delim, n_delim, token);
 167       if (n_tokens >= sz)
 168         {
 169           tokens = xpalloc (tokens, &sz, 1, -1, sizeof *tokens);
 170           lengths = xreallocarray (lengths, sz, sizeof *lengths);
 171         }
 172 
 173       if (token_length == (size_t) -1)
 174         {
 175           /* don't increment n_tokens for NULL entry */
 176           tokens[n_tokens] = NULL;
 177           lengths[n_tokens] = 0;
 178           break;
 179         }
 180       tmp = xnmalloc (token_length + 1, sizeof *tmp);
 181       lengths[n_tokens] = token_length;
 182       tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
 183       n_tokens++;
 184     }
 185 
 186   free (token->buffer);
 187   *tokens_out = tokens;
 188   if (token_lengths != NULL)
 189     *token_lengths = lengths;
 190   else
 191     free (lengths);
 192   return n_tokens;
 193 }

/* [previous][next][first][last][top][bottom][index][help] */