summaryrefslogtreecommitdiff
path: root/app/wlib/mswlib/utf8conv.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/wlib/mswlib/utf8conv.c')
-rw-r--r--app/wlib/mswlib/utf8conv.c210
1 files changed, 210 insertions, 0 deletions
diff --git a/app/wlib/mswlib/utf8conv.c b/app/wlib/mswlib/utf8conv.c
new file mode 100644
index 0000000..62ada76
--- /dev/null
+++ b/app/wlib/mswlib/utf8conv.c
@@ -0,0 +1,210 @@
+/**
+ * \file utf8conv.c.
+ *
+ * UTF-8 conversion functions
+ */
+
+/* XTrkCad - Model Railroad CAD
+ * Copyright (C) 2020 Martin Fischer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <malloc.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <Windows.h>
+
+#include <wlib.h>
+
+/**
+ * Convert system codepage to UTF 8
+ *
+ * \param inString The input string.
+ * \param [in,out] outString The output string buffer.
+ * \param outStringLength Length of the output buffer
+ *
+ * \returns FALSE if it fails.
+ */
+
+bool
+wSystemToUTF8(const char *inString, char *outString, unsigned outStringLength)
+{
+ unsigned int cnt = 2 * (strlen(inString) + 1);
+ char *tempBuffer = malloc(cnt);
+
+ // convert to wide character (UTF16)
+ MultiByteToWideChar(CP_ACP,
+ 0,
+ inString,
+ -1,
+ (LPWSTR)tempBuffer,
+ cnt);
+
+ // convert from wide char to UTF-8
+ WideCharToMultiByte(CP_UTF8,
+ 0,
+ (LPCWCH)tempBuffer,
+ -1,
+ (LPSTR)outString,
+ outStringLength,
+ NULL,
+ NULL);
+
+ free(tempBuffer);
+ return true;
+}
+
+/**
+ * Convert from UTF-8 to system codepage
+ *
+ * \param inString The input string.
+ * \param [in,out] outString the output string.
+ * \param outStringLength Length of the output buffer.
+ *
+ * \returns True if it succeeds, false if it fails.
+ */
+
+bool
+wUTF8ToSystem(const char *inString, char *outString, unsigned outStringLength)
+{
+ unsigned int cnt = 2 * (strlen(inString) + 1);
+ char *tempBuffer = malloc(cnt);
+
+ // convert to wide character (UTF16)
+ MultiByteToWideChar(CP_UTF8,
+ 0,
+ inString,
+ -1,
+ (LPWSTR)tempBuffer,
+ cnt);
+
+
+ cnt = WideCharToMultiByte(CP_ACP,
+ 0,
+ (LPCWCH)tempBuffer,
+ -1,
+ (LPSTR)outString,
+ 0L,
+ NULL,
+ NULL);
+
+ if (outStringLength <= cnt) {
+ return (false);
+ }
+
+ // convert from wide char to system codepage
+ WideCharToMultiByte(CP_ACP,
+ 0,
+ (LPCWCH)tempBuffer,
+ -1,
+ (LPSTR)outString,
+ outStringLength,
+ NULL,
+ NULL);
+
+ free(tempBuffer);
+ return true;
+}
+
+/**
+ * Is passed string in correct UTF-8 format?
+ * Taken from https://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
+ *
+ * \param string The string to check.
+ *
+ * \returns True if UTF 8, false if not.
+ */
+
+bool wIsUTF8(const char * string)
+{
+ if (!string) {
+ return 0;
+ }
+
+ const unsigned char * bytes = (const unsigned char *)string;
+ while (*bytes) {
+ if ((// ASCII
+ // use bytes[0] <= 0x7F to allow ASCII control characters
+ bytes[0] == 0x09 ||
+ bytes[0] == 0x0A ||
+ bytes[0] == 0x0D ||
+ (0x20 <= bytes[0] && bytes[0] <= 0x7E)
+ )
+ ) {
+ bytes += 1;
+ continue;
+ }
+
+ if ((// non-overlong 2-byte
+ (0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
+ (0x80 <= bytes[1] && bytes[1] <= 0xBF)
+ )
+ ) {
+ bytes += 2;
+ continue;
+ }
+
+ if ((// excluding overlongs
+ bytes[0] == 0xE0 &&
+ (0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF)
+ ) ||
+ (// straight 3-byte
+ ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
+ bytes[0] == 0xEE ||
+ bytes[0] == 0xEF) &&
+ (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF)
+ ) ||
+ (// excluding surrogates
+ bytes[0] == 0xED &&
+ (0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF)
+ )
+ ) {
+ bytes += 3;
+ continue;
+ }
+
+ if ((// planes 1-3
+ bytes[0] == 0xF0 &&
+ (0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
+ (0x80 <= bytes[3] && bytes[3] <= 0xBF)
+ ) ||
+ (// planes 4-15
+ (0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
+ (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
+ (0x80 <= bytes[3] && bytes[3] <= 0xBF)
+ ) ||
+ (// plane 16
+ bytes[0] == 0xF4 &&
+ (0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
+ (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
+ (0x80 <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ ) {
+ bytes += 4;
+ continue;
+ }
+
+ return false;
+ }
+
+ return true;
+} \ No newline at end of file