diff options
Diffstat (limited to 'src/util/string.vala')
-rw-r--r-- | src/util/string.vala | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/src/util/string.vala b/src/util/string.vala new file mode 100644 index 0000000..9fda007 --- /dev/null +++ b/src/util/string.vala @@ -0,0 +1,268 @@ +/* Copyright 2010-2014 Yorba Foundation + * + * This software is licensed under the GNU Lesser General Public License + * (version 2.1 or later). See the COPYING file in this distribution. + */ + +extern int64 g_ascii_strtoll(string str, out char *endptr, uint num_base); + +public const int DEFAULT_USER_TEXT_INPUT_LENGTH = 1024; + +public inline bool is_string_empty(string? s) { + return (s == null || s[0] == '\0'); +} + +// utf8 case sensitive compare +public int utf8_cs_compare(void *a, void *b) { + return ((string) a).collate((string) b); +} + +// utf8 case insensitive compare +public int utf8_ci_compare(void *a, void *b) { + return ((string) a).down().collate(((string) b).down()); +} + +// utf8 array to string +public string uchar_array_to_string(uchar[] data, int length = -1) { + if (length < 0) + length = data.length; + + StringBuilder builder = new StringBuilder(); + for (int ctr = 0; ctr < length; ctr++) { + if (data[ctr] != '\0') + builder.append_c((char) data[ctr]); + else + break; + } + + return builder.str; +} + +// string to uchar array +public uchar[] string_to_uchar_array(string str) { + uchar[] data = new uchar[0]; + for (int ctr = 0; ctr < str.length; ctr++) + data += (uchar) str[ctr]; + + return data; +} + +// Markup.escape_text() will crash if the UTF-8 text is not valid; it relies on a call to +// g_utf8_next_char(), which demands that the string be validated before use, which escape_text() +// does not do. This handles this problem by kicking back an empty string if the text is not +// valid. Text should be validated upon entry to the system as well to guard against this +// problem. +// +// Null strings are accepted; they will result in an empty string returned. +public inline string guarded_markup_escape_text(string? plain) { + return (!is_string_empty(plain) && plain.validate()) ? Markup.escape_text(plain) : ""; +} + +public long find_last_offset(string str, char c) { + long offset = str.length; + while (--offset >= 0) { + if (str[offset] == c) + return offset; + } + + return -1; +} + +// Helper function for searching an array of case-insensitive strings. The array should be +// all lowercase. +public bool is_in_ci_array(string str, string[] strings) { + string strdown = str.down(); + foreach (string str_element in strings) { + if (strdown == str_element) + return true; + } + + return false; +} + +[Flags] +public enum PrepareInputTextOptions { + EMPTY_IS_NULL, + VALIDATE, + INVALID_IS_NULL, + STRIP, + STRIP_CRLF, + NORMALIZE, + DEFAULT = EMPTY_IS_NULL | VALIDATE | INVALID_IS_NULL | STRIP_CRLF | STRIP | NORMALIZE; +} + +public string? prepare_input_text(string? text, PrepareInputTextOptions options, int dest_length) { + if (text == null) + return null; + + if ((options & PrepareInputTextOptions.VALIDATE) != 0 && !text.validate()) + return (options & PrepareInputTextOptions.INVALID_IS_NULL) != 0 ? null : ""; + + string prepped = text; + + // Using composed form rather than GLib's default (decomposed) as NFC is the preferred form in + // Linux and WWW. More importantly, Pango seems to have serious problems displaying decomposed + // forms of Korean language glyphs (and perhaps others). See: + // http://trac.yorba.org/ticket/2952 + if ((options & PrepareInputTextOptions.NORMALIZE) != 0) + prepped = prepped.normalize(-1, NormalizeMode.NFC); + + if ((options & PrepareInputTextOptions.STRIP) != 0) + prepped = prepped.strip(); + + // Ticket #3245 - Prevent carriage return mayhem + // in image titles, tag names, etc. + if ((options & PrepareInputTextOptions.STRIP_CRLF) != 0) + prepped = prepped.delimit("\n\r", ' '); + + if ((options & PrepareInputTextOptions.EMPTY_IS_NULL) != 0 && is_string_empty(prepped)) + return null; + + // Ticket #3196 - Allow calling functions to limit the length of the + // string we return to them. Passing any negative value is interpreted + // as 'do not truncate'. + if (dest_length >= 0) { + StringBuilder sb = new StringBuilder(prepped); + sb.truncate(dest_length); + return sb.str; + } + + // otherwise, return normally. + return prepped; +} + +public int64 parse_int64(string str, int num_base) { + return g_ascii_strtoll(str, null, num_base); +} + +namespace String { + +public inline bool contains_char(string haystack, unichar needle) { + return haystack.index_of_char(needle) >= 0; +} + +public inline bool contains_str(string haystack, string needle) { + return haystack.index_of(needle) >= 0; +} + +public inline string? sliced_at(string str, int index) { + return (index >= 0) ? str[index:str.length] : null; +} + +public inline string? sliced_at_first_str(string haystack, string needle, int start_index = 0) { + return sliced_at(haystack, haystack.index_of(needle, start_index)); +} + +public inline string? sliced_at_last_str(string haystack, string needle, int start_index = 0) { + return sliced_at(haystack, haystack.last_index_of(needle, start_index)); +} + +public inline string? sliced_at_first_char(string haystack, unichar ch, int start_index = 0) { + return sliced_at(haystack, haystack.index_of_char(ch, start_index)); +} + +public inline string? sliced_at_last_char(string haystack, unichar ch, int start_index = 0) { + return sliced_at(haystack, haystack.last_index_of_char(ch, start_index)); +} + +// Note that this method currently turns a word of all zeros into empty space ("000" -> "") +public string strip_leading_zeroes(string str) { + StringBuilder stripped = new StringBuilder(); + bool prev_is_space = true; + for (unowned string iter = str; iter.get_char() != 0; iter = iter.next_char()) { + unichar ch = iter.get_char(); + + if (!prev_is_space || ch != '0') { + stripped.append_unichar(ch); + prev_is_space = ch.isspace(); + } + } + + return stripped.str; +} + +public string remove_diacritics(string istring) { + var builder = new StringBuilder (); + unichar ch; + int i = 0; + while(istring.normalize().get_next_char(ref i, out ch)) { + switch(ch.type()) { + case UnicodeType.CONTROL: + case UnicodeType.FORMAT: + case UnicodeType.UNASSIGNED: + case UnicodeType.NON_SPACING_MARK: + case UnicodeType.COMBINING_MARK: + case UnicodeType.ENCLOSING_MARK: + // Ignore those + continue; + } + builder.append_unichar(ch); + } + return builder.str; +} + +public string to_hex_string(string str) { + StringBuilder builder = new StringBuilder(); + + uint8 *data = (uint8 *) str; + while (*data != 0) + builder.append_printf("%02Xh%s", *data++, (*data != 0) ? " " : ""); + + return builder.str; +} + +// A note on the collated_* and precollated_* methods: +// +// A bug report (http://trac.yorba.org/ticket/3152) indicated that two different Hirigana characters +// as Tag names would trigger an assertion. Investigation showed that the characters' collation +// keys computed as equal when the locale was set to anything but the default locale (C) or +// Japanese. A related bug was that another hash table was using str_equal, which does not use +// collation, meaning that in one table the strings were seen as the same and in another as +// different. +// +// The solution we arrived at is to use collation whenever possible, but if two strings have the +// same collation, then fall back on strcmp(), which looks for byte-for-byte comparisons. Note +// that this technique requires that both strings have been properly composed (use +// prepare_input_text() for that task) so that equal UTF-8 strings are byte-for-byte equal as +// well. + +// See note above. +public uint collated_hash(void *ptr) { + string str = (string) ptr; + + return str_hash(str.collate_key()); +} + +// See note above. +public uint precollated_hash(void *ptr) { + return str_hash((string) ptr); +} + +// See note above. +public int collated_compare(void *a, void *b) { + string astr = (string) a; + string bstr = (string) b; + + int result = astr.collate(bstr); + + return (result != 0) ? result : strcmp(astr, bstr); +} + +// See note above. +public int precollated_compare(string astr, string akey, string bstr, string bkey) { + int result = strcmp(akey, bkey); + + return (result != 0) ? result : strcmp(astr, bstr); +} + +// See note above. +public bool collated_equals(void *a, void *b) { + return collated_compare(a, b) == 0; +} + +// See note above. +public bool precollated_equals(string astr, string akey, string bstr, string bkey) { + return precollated_compare(astr, akey, bstr, bkey) == 0; +} + +} |