root/maint/gnulib/lib/uniwbrk/u8-wordbreaks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. read_file
  2. main

   1 /* Word breaks in UTF-8 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2009.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 #include <config.h>
  27 
  28 /* Specification.  */
  29 #include "uniwbrk.h"
  30 
  31 #include <string.h>
  32 
  33 #include "unistr.h"
  34 #include "uniwbrk/wbrktable.h"
  35 
  36 #define FUNC u8_wordbreaks
  37 #define UNIT uint8_t
  38 #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe
  39 #include "u-wordbreaks.h"
  40 
  41 
  42 #ifdef TEST
  43 
  44 #include <stdio.h>
  45 #include <stdlib.h>
  46 
  47 /* Read the contents of an input stream, and return it, terminated with a NUL
  48    byte. */
  49 char *
  50 read_file (FILE *stream)
     /* [previous][next][first][last][top][bottom][index][help] */
  51 {
  52 #define BUFSIZE 4096
  53   char *buf = NULL;
  54   int alloc = 0;
  55   int size = 0;
  56   int count;
  57 
  58   while (! feof (stream))
  59     {
  60       if (size + BUFSIZE > alloc)
  61         {
  62           alloc = alloc + alloc / 2;
  63           if (alloc < size + BUFSIZE)
  64             alloc = size + BUFSIZE;
  65           buf = realloc (buf, alloc);
  66           if (buf == NULL)
  67             {
  68               fprintf (stderr, "out of memory\n");
  69               exit (1);
  70             }
  71         }
  72       count = fread (buf + size, 1, BUFSIZE, stream);
  73       if (count == 0)
  74         {
  75           if (ferror (stream))
  76             {
  77               perror ("fread");
  78               exit (1);
  79             }
  80         }
  81       else
  82         size += count;
  83     }
  84   buf = realloc (buf, size + 1);
  85   if (buf == NULL)
  86     {
  87       fprintf (stderr, "out of memory\n");
  88       exit (1);
  89     }
  90   buf[size] = '\0';
  91   return buf;
  92 #undef BUFSIZE
  93 }
  94 
  95 int
  96 main (int argc, char * argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98   if (argc == 1)
  99     {
 100       /* Display all the word breaks in the input string.  */
 101       char *input = read_file (stdin);
 102       int length = strlen (input);
 103       char *breaks = malloc (length);
 104       int i;
 105 
 106       u8_wordbreaks ((uint8_t *) input, length, breaks);
 107 
 108       for (i = 0; i < length; i++)
 109         {
 110           switch (breaks[i])
 111             {
 112             case 1:
 113               /* U+2027 in UTF-8 encoding */
 114               putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
 115               break;
 116             case 0:
 117               break;
 118             default:
 119               abort ();
 120             }
 121           putc (input[i], stdout);
 122         }
 123 
 124       free (breaks);
 125 
 126       return 0;
 127     }
 128   else
 129     return 1;
 130 }
 131 
 132 #endif /* TEST */

/* [previous][next][first][last][top][bottom][index][help] */