/* Word breaks in UTF-8 strings. Copyright (C) 2009-2022 Free Software Foundation, Inc. Written by Bruno Haible , 2009. This file is free software. It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". You can redistribute it and/or modify it under either - the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version, or - the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version, or - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License and of the GNU General Public License along with this program. If not, see . */ #include /* Specification. */ #include "uniwbrk.h" #include #include "unictype.h" #include "unistr.h" #include "uniwbrk/wbrktable.h" #define FUNC u8_wordbreaks #define UNIT uint8_t #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe #include "u-wordbreaks.h" #ifdef TEST #include #include /* Read the contents of an input stream, and return it, terminated with a NUL byte. */ char * read_file (FILE *stream) { #define BUFSIZE 4096 char *buf = NULL; int alloc = 0; int size = 0; int count; while (! feof (stream)) { if (size + BUFSIZE > alloc) { alloc = alloc + alloc / 2; if (alloc < size + BUFSIZE) alloc = size + BUFSIZE; buf = realloc (buf, alloc); if (buf == NULL) { fprintf (stderr, "out of memory\n"); exit (1); } } count = fread (buf + size, 1, BUFSIZE, stream); if (count == 0) { if (ferror (stream)) { perror ("fread"); exit (1); } } else size += count; } buf = realloc (buf, size + 1); if (buf == NULL) { fprintf (stderr, "out of memory\n"); exit (1); } buf[size] = '\0'; return buf; #undef BUFSIZE } int main (int argc, char * argv[]) { if (argc == 1) { /* Display all the word breaks in the input string. */ char *input = read_file (stdin); int length = strlen (input); char *breaks = malloc (length); int i; u8_wordbreaks ((uint8_t *) input, length, breaks); for (i = 0; i < length; i++) { switch (breaks[i]) { case 1: /* U+2027 in UTF-8 encoding */ putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout); break; case 0: break; default: abort (); } putc (input[i], stdout); } free (breaks); return 0; } else return 1; } #endif /* TEST */