diff options
Diffstat (limited to 'lib/unilbrk/ulc-width-linebreaks.c')
-rw-r--r-- | lib/unilbrk/ulc-width-linebreaks.c | 94 |
1 files changed, 64 insertions, 30 deletions
diff --git a/lib/unilbrk/ulc-width-linebreaks.c b/lib/unilbrk/ulc-width-linebreaks.c index fbe970d..b7e6c7c 100644 --- a/lib/unilbrk/ulc-width-linebreaks.c +++ b/lib/unilbrk/ulc-width-linebreaks.c @@ -1,28 +1,27 @@ /* Line breaking of strings. - Copyright (C) 2001-2003, 2006-2018 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2022 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2001. - This program is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - This program is distributed in the hope that it will be useful, + This file is free software. + It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". + You can redistribute it and/or modify it under either + - the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version, or + - the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) + any later version, or + - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". + + This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. + Lesser General Public License and the GNU General Public License + for more details. - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. */ + You should have received a copy of the GNU Lesser General Public + License and of the GNU General Public License along with this + program. If not, see <https://www.gnu.org/licenses/>. */ #include <config.h> @@ -34,6 +33,8 @@ #include "c-ctype.h" #include "uniconv.h" +#include "unilbrk/internal.h" +#include "unilbrk/lbrktables.h" #include "unilbrk/ulc-common.h" /* Line breaking of a string in an arbitrary encoding. @@ -49,16 +50,16 @@ but this is not backed by an RFC. So we use UTF-8. It supports characters up to \U7FFFFFFF and is unambiguously defined. */ -int -ulc_width_linebreaks (const char *s, size_t n, - int width, int start_column, int at_end_columns, - const char *o, const char *encoding, - char *p) +static int +ulc_width_linebreaks_internal (const char *s, size_t n, + int width, int start_column, int at_end_columns, + const char *o, const char *encoding, int cr, + char *p) { if (n > 0) { if (is_utf8_encoding (encoding)) - return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); + return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p); else { /* Convert the string to UTF-8 and build a translation table @@ -95,7 +96,7 @@ ulc_width_linebreaks (const char *s, size_t n, /* Determine the line breaks of the UTF-8 string. */ res_column = - u8_width_linebreaks (t, m, width, start_column, at_end_columns, o8, encoding, q); + u8_width_linebreaks_internal (t, m, width, start_column, at_end_columns, o8, encoding, cr, q); /* Translate the result back to the original string. */ memset (p, UC_BREAK_PROHIBITED, n); @@ -117,7 +118,7 @@ ulc_width_linebreaks (const char *s, size_t n, if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ - return u8_width_linebreaks ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, p); + return u8_width_linebreaks_internal ((const uint8_t *) s, n, width, start_column, at_end_columns, o, encoding, cr, p); } #endif /* We have a non-ASCII string and cannot convert it. @@ -128,9 +129,16 @@ ulc_width_linebreaks (const char *s, size_t n, const char *s_end = s + n; while (s < s_end) { - *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n' + *p = ((o != NULL && *o == UC_BREAK_MANDATORY) + || *s == '\n' ? UC_BREAK_MANDATORY - : UC_BREAK_PROHIBITED); + : ((o != NULL && *o == UC_BREAK_CR_BEFORE_LF) + || (cr >= 0 + && *s == '\r' + && s + 1 < s_end + && *(s + 1) == '\n') + ? UC_BREAK_CR_BEFORE_LF + : UC_BREAK_PROHIBITED)); s++; p++; if (o != NULL) @@ -143,6 +151,30 @@ ulc_width_linebreaks (const char *s, size_t n, return start_column; } +#undef ulc_width_linebreaks + +int +ulc_width_linebreaks (const char *s, size_t n, + int width, int start_column, int at_end_columns, + const char *o, const char *encoding, + char *p) +{ + return ulc_width_linebreaks_internal (s, n, + width, start_column, at_end_columns, + o, encoding, -1, p); +} + +int +ulc_width_linebreaks_v2 (const char *s, size_t n, + int width, int start_column, int at_end_columns, + const char *o, const char *encoding, + char *p) +{ + return ulc_width_linebreaks_internal (s, n, + width, start_column, at_end_columns, + o, encoding, LBP_CR, p); +} + #ifdef TEST @@ -210,7 +242,7 @@ main (int argc, char * argv[]) char *breaks = malloc (length); int i; - ulc_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks); + ulc_width_linebreaks_v2 (input, length, width, 0, 0, NULL, locale_charset (), breaks); for (i = 0; i < length; i++) { @@ -221,6 +253,8 @@ main (int argc, char * argv[]) break; case UC_BREAK_MANDATORY: break; + case UC_BREAK_CR_BEFORE_LF: + break; case UC_BREAK_PROHIBITED: break; default: |