#pragma once #include "EHS.h" #include "Str.h" namespace lwe { enum class CharEncoding { UTF_32, UTF_16, UTF_8 }; /// A helper class for converting between UTF8, 16 and 32. class UTF { public: /// Converts the given UTF16 C-style string into UTF32. /// @tparam N The number data type to use. /// @param [in] from The given C-style UTF16 string. /// @param [in] size The size of the given C-style UTF16 string. /// @returns The result. template static Str To_32(const Char_16* const from, const N size = 0) { Str result((size) ? size : Str::Len(from)); N index = 0; for (N i = 0; i < result.Size(); ++i) { if (i != result.Size() - 1) { if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800) { result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000; continue; } } result[index++] = (Char_32)from[i]; } result.Resize(index); return result; } /// Converts the given UTF16 string object into UTF32. /// @tparam N The number data type to use. /// @param [in] from The given UTF16 string. /// @returns The result. template static Str To_32(const Str& from) { Str result(from.Size()); N index = 0; for (N i = 0; i < from.Size(); ++i) { if (i != from.Size() - 1) { if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800) { result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000; continue; } } result[index++] = (Char_32)from[i]; } result.Resize(index); return result; } /// Converts the given UTF8 C-style string into UTF32. /// @tparam N The number data type to use. /// @param [in] from The given C-style UTF8 string. /// @param [in] size The size of the given C-style UTF8 string. /// @returns The result. template static Str To_32(const Char_8* from, const N size = 0) { N rSize = size ? size : Str::Len(from); Str r(rSize); N c = 0; for (N i = 0; i < rSize; ++i) { if (from[i] <= 0b11110111 && i + 3 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111); else if (from[i] <= 0b11101111 && i + 2 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111); else if (from[i] <= 0b11011111 && i + 1 < rSize && from[i + 1] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111); else r[c++] = (Char_32)from[i]; } r.Resize(c); return r; } /// Converts the given UTF8 string object into UTF32. /// @tparam N The number data type to use. /// @param [in] from The given UTF8 string. /// @returns The result. template static Str To_32(const Str& from) { Str r(from.Size()); N c = 0; for (N i = 0; i < from.Size(); ++i) { if (from[i] <= 0b11110111 && i + 3 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111); else if (from[i] <= 0b11101111 && i + 2 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111); else if (from[i] <= 0b11011111 && i + 1 < from.Size() && from[i + 1] <= 0b10111111) r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111); else r[c++] = (Char_32)from[i]; } r.Resize(c); return r; } /// Converts the given UTF32 C-style string object into UTF16. /// @tparam N The number data type to use. /// @param [in] from The given UTF32 string. /// @param [in] size The size of the give C-style UTF32 string. /// @returns The result. template static Str To_16(const Char_32* const from, const N size = 0) { N rSize = size ? size : Str::Len(from); Str result(rSize * sizeof(Char_16)); N index = 0; for (N i = 0; i < rSize; ++i) { if (from[i] <= 0xFFFF) { result[index++] = (Char_16)from[i]; } else { Char_32 t = from[i] - 0x10000; result[index++] |= (t >> 10) + 0xD800; result[index++] |= t + 0xDC00; } } result.Resize(index); return result; } /// Converts the given UTF32 string object into UTF16. /// @tparam N The number data type to use. /// @param [in] from The given UTF32 string. /// @returns The result. template static Str To_16(const Str& from) { Str result(from.Size() * sizeof(Char_16)); N index = 0; for (N i = 0; i < from.Size(); ++i) { if (from[i] <= 0xFFFF) { result[index++] = (Char_16)from[i]; } else { Char_32 t = from[i] - 0x10000; result[index++] |= (t >> 10) + 0xD800; result[index++] |= t + 0xDC00; } } result.Resize(index); return result; } /// Converts the given UTF8 C-style string into UTF16. /// @tparam N The number data type to use. /// @param [in] from The given UTF8 C-style string. /// @param [in] size The size of the given C-style UTF8 string. /// @returns The result. template static Str To_16(const Char_8* const from, const N size = 0) { N rSize = size ? size : Str::Len(from); const Byte* const data = (const Byte* const)from; Str r(rSize); N c = 0; for (N i = 0; i < rSize; ++i) { if (data[i] >= 0b11110000 && i + 3 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111) r[c++] = (0b00000011111111110000001111111111 & ((Char_16)(data[i++] & 0b00000111) << 23) | ((Char_16)(data[i++] & 0b00111111) << 18) | ((Char_16)(data[i++] & 0b00111111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111)) | 0b11011000000000001101110000000000; else if (data[i] >= 0b11100000 && i + 2 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111) r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111); else if (data[i] >= 0b11000000 && i + 1 < rSize && data[i + 1] <= 0b10111111) r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111); else r[c++] = (Char_16 )data[i]; } r.Resize(c); return r; } /// Converts the given UTF8 string object into UTF16. /// @tparam N The number data type to use. /// @param [in] from The given UTF8 string. /// @returns The result. template static Str To_16(const Str& from) { const Byte* const data = from.ToBytes(); Str r(from.Size()); N c = 0; for (N i = 0; i < from.Size(); ++i) { if (data[i] >= 0b11110000 && i + 3 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111) r[c++] = (0b00000011111111110000001111111111 & ((Char_16)(data[i++] & 0b00000111) << 23) | ((Char_16)(data[i++] & 0b00111111) << 18) | ((Char_16)(data[i++] & 0b00111111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111)) | 0b11011000000000001101110000000000; else if (data[i] >= 0b11100000 && i + 2 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111) r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111); else if (data[i] >= 0b11000000 && i + 1 < from.Size() && data[i + 1] <= 0b10111111) r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111); else r[c++] = (Char_16 )data[i]; } r.Resize(c); return r; } /// Converts the given UTF16 C-style string into UTF8. /// @tparam N The number data type to use. /// @param [in] from The given UTF16 string. /// @param [in] size The size of the given C-style UTF8 string. /// @returns The result. template static Str To_8(const Char_16* const from, const N size = 0) { N rSize = size ? size : Str::Len(from); Str r(rSize * sizeof(Char_16)); N c = 0; for (N i = 0; i < rSize; ++i) { if (from[i] & 0b1101100000000000 && i + 1 < rSize && from[i] & 0b1101110000000000) { r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000; r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000; } else if (from[i] <= 0b11111111) { r[c++] = (Byte)from[i]; } else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } } r.Resize(c); return r; } /// Converts the given UTF16 string object into UTF8. /// @tparam N The number data type to use. /// @param [in] from The given UTF16 string. /// @returns The result. template static Str To_8(const Str& from) { Str r(from.Size(true) * sizeof(Char_16)); N c = 0; for (N i = 0; i < from.Size(); ++i) { if (from[i] & 0b1101100000000000 && i + 1 < from.Size() && from[i] & 0b1101110000000000) { r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000; r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000; } else if (from[i] <= 0b11111111) { r[c++] = (Byte)from[i]; } else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } } r.Resize(c); return r; } /// Converts the given UTF32 C-style string into UTF8. /// @tparam N The number data type to use. /// @param [in] from The given UTF32 string. /// @param [in] size The size of the give C-style UTF32 string. /// @returns The result. template static Str To_8(const Char_32* const from, const N size = 0) { N rSize = size ? size : Str::Len(from); Str r(rSize * sizeof(Char_32)); N c = 0; for (N i = 0; i < rSize; ++i) { if (from[i] <= 0b11111111) { r[c++] = (Char_8)from[i]; } else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111) { r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b1111111111111111) { r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000; r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } } r.Resize(c); return r; } /// Converts the given UTF32 string object into UTF8. /// @tparam N The number data type to use. /// @param [in] from The given UTF32 string. /// @returns The result. template static Str To_8(const Str& from) { Str r(from.Size() * sizeof(Char_32)); N c = 0; for (N i = 0; i < from.Size(); ++i) { if (from[i] <= 0b11111111) { r[c++] = (Char_8)from[i]; } else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111) { r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111) { r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } else if (from[i] > 0b1111111111111111) { r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000; r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000; r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000; r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000; } } r.Resize(c); return r; } }; }