root/maint/gnulib/lib/unilbrk/u8-width-linebreaks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u8_width_linebreaks
  2. read_file
  3. main

   1 /* Line breaking of UTF-8 strings.
   2    Copyright (C) 2001-2003, 2006-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 #include <config.h>
  27 
  28 /* Specification.  */
  29 #include "unilbrk.h"
  30 
  31 #include "unistr.h"
  32 #include "uniwidth.h"
  33 
  34 int
  35 u8_width_linebreaks (const uint8_t *s, size_t n,
     /* [previous][next][first][last][top][bottom][index][help] */
  36                      int width, int start_column, int at_end_columns,
  37                      const char *o, const char *encoding,
  38                      char *p)
  39 {
  40   const uint8_t *s_end;
  41   char *last_p;
  42   int last_column;
  43   int piece_width;
  44 
  45   u8_possible_linebreaks (s, n, encoding, p);
  46 
  47   s_end = s + n;
  48   last_p = NULL;
  49   last_column = start_column;
  50   piece_width = 0;
  51   while (s < s_end)
  52     {
  53       ucs4_t uc;
  54       int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
  55 
  56       /* Respect the override.  */
  57       if (o != NULL && *o != UC_BREAK_UNDEFINED)
  58         *p = *o;
  59 
  60       if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
  61         {
  62           /* An atomic piece of text ends here.  */
  63           if (last_p != NULL && last_column + piece_width > width)
  64             {
  65               /* Insert a line break.  */
  66               *last_p = UC_BREAK_POSSIBLE;
  67               last_column = 0;
  68             }
  69         }
  70 
  71       if (*p == UC_BREAK_MANDATORY)
  72         {
  73           /* uc is a line break character.  */
  74           /* Start a new piece at column 0.  */
  75           last_p = NULL;
  76           last_column = 0;
  77           piece_width = 0;
  78         }
  79       else
  80         {
  81           /* uc is not a line break character.  */
  82           int w;
  83 
  84           if (*p == UC_BREAK_POSSIBLE)
  85             {
  86               /* Start a new piece.  */
  87               last_p = p;
  88               last_column += piece_width;
  89               piece_width = 0;
  90               /* No line break for the moment, may be turned into
  91                  UC_BREAK_POSSIBLE later, via last_p. */
  92             }
  93 
  94           *p = UC_BREAK_PROHIBITED;
  95 
  96           w = uc_width (uc, encoding);
  97           if (w >= 0) /* ignore control characters in the string */
  98             piece_width += w;
  99         }
 100 
 101       s += count;
 102       p += count;
 103       if (o != NULL)
 104         o += count;
 105     }
 106 
 107   /* The last atomic piece of text ends here.  */
 108   if (last_p != NULL && last_column + piece_width + at_end_columns > width)
 109     {
 110       /* Insert a line break.  */
 111       *last_p = UC_BREAK_POSSIBLE;
 112       last_column = 0;
 113     }
 114 
 115   return last_column + piece_width;
 116 }
 117 
 118 
 119 #ifdef TEST
 120 
 121 #include <stdio.h>
 122 #include <stdlib.h>
 123 #include <string.h>
 124 
 125 /* Read the contents of an input stream, and return it, terminated with a NUL
 126    byte. */
 127 char *
 128 read_file (FILE *stream)
     /* [previous][next][first][last][top][bottom][index][help] */
 129 {
 130 #define BUFSIZE 4096
 131   char *buf = NULL;
 132   int alloc = 0;
 133   int size = 0;
 134   int count;
 135 
 136   while (! feof (stream))
 137     {
 138       if (size + BUFSIZE > alloc)
 139         {
 140           alloc = alloc + alloc / 2;
 141           if (alloc < size + BUFSIZE)
 142             alloc = size + BUFSIZE;
 143           buf = realloc (buf, alloc);
 144           if (buf == NULL)
 145             {
 146               fprintf (stderr, "out of memory\n");
 147               exit (1);
 148             }
 149         }
 150       count = fread (buf + size, 1, BUFSIZE, stream);
 151       if (count == 0)
 152         {
 153           if (ferror (stream))
 154             {
 155               perror ("fread");
 156               exit (1);
 157             }
 158         }
 159       else
 160         size += count;
 161     }
 162   buf = realloc (buf, size + 1);
 163   if (buf == NULL)
 164     {
 165       fprintf (stderr, "out of memory\n");
 166       exit (1);
 167     }
 168   buf[size] = '\0';
 169   return buf;
 170 #undef BUFSIZE
 171 }
 172 
 173 int
 174 main (int argc, char * argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
 175 {
 176   if (argc == 2)
 177     {
 178       /* Insert line breaks for a given width.  */
 179       int width = atoi (argv[1]);
 180       char *input = read_file (stdin);
 181       int length = strlen (input);
 182       char *breaks = malloc (length);
 183       int i;
 184 
 185       u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
 186 
 187       for (i = 0; i < length; i++)
 188         {
 189           switch (breaks[i])
 190             {
 191             case UC_BREAK_POSSIBLE:
 192               putc ('\n', stdout);
 193               break;
 194             case UC_BREAK_MANDATORY:
 195               break;
 196             case UC_BREAK_PROHIBITED:
 197               break;
 198             default:
 199               abort ();
 200             }
 201           putc (input[i], stdout);
 202         }
 203 
 204       free (breaks);
 205 
 206       return 0;
 207     }
 208   else
 209     return 1;
 210 }
 211 
 212 #endif /* TEST */

/* [previous][next][first][last][top][bottom][index][help] */