455 lines
17 KiB
C++
455 lines
17 KiB
C++
#pragma once
|
|
|
|
#include "EHS.h"
|
|
#include "Str.h"
|
|
|
|
namespace ehs
|
|
{
|
|
enum class CharEncoding
|
|
{
|
|
UTF_32,
|
|
UTF_16,
|
|
UTF_8
|
|
};
|
|
|
|
/// A helper class for converting between UTF8, 16 and 32.
|
|
class UTF
|
|
{
|
|
public:
|
|
/// Converts the given UTF16 C-style string into UTF32.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given C-style UTF16 string.
|
|
/// @param [in] size The size of the given C-style UTF16 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_32, N> To_32(const Char_16* const from, const N size = 0)
|
|
{
|
|
Str<Char_32, N> result((size) ? size : Str<Char_16, N>::Len(from));
|
|
|
|
N index = 0;
|
|
|
|
for (N i = 0; i < result.Size(); ++i)
|
|
{
|
|
if (i != result.Size() - 1)
|
|
{
|
|
if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800)
|
|
{
|
|
result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
result[index++] = (Char_32)from[i];
|
|
}
|
|
|
|
result.Resize(index);
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Converts the given UTF16 string object into UTF32.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF16 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_32, N> To_32(const Str<Char_16, N>& from)
|
|
{
|
|
Str<Char_32, N> result(from.Size());
|
|
|
|
N index = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (i != from.Size() - 1)
|
|
{
|
|
if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800)
|
|
{
|
|
result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
result[index++] = (Char_32)from[i];
|
|
}
|
|
|
|
result.Resize(index);
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Converts the given UTF8 C-style string into UTF32.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given C-style UTF8 string.
|
|
/// @param [in] size The size of the given C-style UTF8 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_32, N> To_32(const Char_8* from, const N size = 0)
|
|
{
|
|
N rSize = size ? size : Str<Char_8, N>::Len(from);
|
|
|
|
Str<Char_32, N> r(rSize);
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < rSize; ++i)
|
|
{
|
|
if (from[i] <= 0b11110111 && i + 3 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111);
|
|
else if (from[i] <= 0b11101111 && i + 2 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111);
|
|
else if (from[i] <= 0b11011111 && i + 1 < rSize && from[i + 1] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111);
|
|
else
|
|
r[c++] = (Char_32)from[i];
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF8 string object into UTF32.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF8 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_32, N> To_32(const Str<Char_8, N>& from)
|
|
{
|
|
Str<Char_32, N> r(from.Size());
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (from[i] <= 0b11110111 && i + 3 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111);
|
|
else if (from[i] <= 0b11101111 && i + 2 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111);
|
|
else if (from[i] <= 0b11011111 && i + 1 < from.Size() && from[i + 1] <= 0b10111111)
|
|
r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111);
|
|
else
|
|
r[c++] = (Char_32)from[i];
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF32 C-style string object into UTF16.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF32 string.
|
|
/// @param [in] size The size of the give C-style UTF32 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_16, N> To_16(const Char_32* const from, const N size = 0)
|
|
{
|
|
N rSize = size ? size : Str<Char_32, N>::Len(from);
|
|
|
|
Str<Char_16, N> result(rSize * sizeof(Char_16));
|
|
|
|
N index = 0;
|
|
|
|
for (N i = 0; i < rSize; ++i)
|
|
{
|
|
if (from[i] <= 0xFFFF)
|
|
{
|
|
result[index++] = (Char_16)from[i];
|
|
}
|
|
else
|
|
{
|
|
Char_32 t = from[i] - 0x10000;
|
|
|
|
result[index++] |= (t >> 10) + 0xD800;
|
|
result[index++] |= t + 0xDC00;
|
|
}
|
|
}
|
|
|
|
result.Resize(index);
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Converts the given UTF32 string object into UTF16.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF32 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_16, N> To_16(const Str<Char_32, N>& from)
|
|
{
|
|
Str<Char_16, N> result(from.Size() * sizeof(Char_16));
|
|
|
|
N index = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (from[i] <= 0xFFFF)
|
|
{
|
|
result[index++] = (Char_16)from[i];
|
|
}
|
|
else
|
|
{
|
|
Char_32 t = from[i] - 0x10000;
|
|
|
|
result[index++] |= (t >> 10) + 0xD800;
|
|
result[index++] |= t + 0xDC00;
|
|
}
|
|
}
|
|
|
|
result.Resize(index);
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Converts the given UTF8 C-style string into UTF16.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF8 C-style string.
|
|
/// @param [in] size The size of the given C-style UTF8 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_16, N> To_16(const Char_8* const from, const N size = 0)
|
|
{
|
|
N rSize = size ? size : Str<Char_8, N>::Len(from);
|
|
|
|
const Byte* const data = (const Byte* const)from;
|
|
|
|
Str<Char_16, N> r(rSize);
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < rSize; ++i)
|
|
{
|
|
if (data[i] >= 0b11110000 && i + 3 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111)
|
|
r[c++] = (0b00000011111111110000001111111111 &
|
|
((Char_16)(data[i++] & 0b00000111) << 23) |
|
|
((Char_16)(data[i++] & 0b00111111) << 18) |
|
|
((Char_16)(data[i++] & 0b00111111) << 12) |
|
|
((Char_16)(data[i++] & 0b00111111) << 6) |
|
|
(Char_16)(data[i] & 0b00111111)) |
|
|
0b11011000000000001101110000000000;
|
|
else if (data[i] >= 0b11100000 && i + 2 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111)
|
|
r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111);
|
|
else if (data[i] >= 0b11000000 && i + 1 < rSize && data[i + 1] <= 0b10111111)
|
|
r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111);
|
|
else
|
|
r[c++] = (Char_16 )data[i];
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF8 string object into UTF16.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF8 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_16, N> To_16(const Str<Char_8, N>& from)
|
|
{
|
|
const Byte* const data = from.ToBytes();
|
|
|
|
Str<Char_16, N> r(from.Size());
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (data[i] >= 0b11110000 && i + 3 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111)
|
|
r[c++] = (0b00000011111111110000001111111111 &
|
|
((Char_16)(data[i++] & 0b00000111) << 23) |
|
|
((Char_16)(data[i++] & 0b00111111) << 18) |
|
|
((Char_16)(data[i++] & 0b00111111) << 12) |
|
|
((Char_16)(data[i++] & 0b00111111) << 6) |
|
|
(Char_16)(data[i] & 0b00111111)) |
|
|
0b11011000000000001101110000000000;
|
|
else if (data[i] >= 0b11100000 && i + 2 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111)
|
|
r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111);
|
|
else if (data[i] >= 0b11000000 && i + 1 < from.Size() && data[i + 1] <= 0b10111111)
|
|
r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111);
|
|
else
|
|
r[c++] = (Char_16 )data[i];
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF16 C-style string into UTF8.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF16 string.
|
|
/// @param [in] size The size of the given C-style UTF8 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_8, N> To_8(const Char_16* const from, const N size = 0)
|
|
{
|
|
N rSize = size ? size : Str<Char_16, N>::Len(from);
|
|
|
|
Str<Char_8, N> r(rSize * sizeof(Char_16));
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < rSize; ++i)
|
|
{
|
|
if (from[i] & 0b1101100000000000 && i + 1 < rSize && from[i] & 0b1101110000000000)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000;
|
|
r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] <= 0b11111111)
|
|
{
|
|
r[c++] = (Byte)from[i];
|
|
}
|
|
else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF16 string object into UTF8.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF16 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_8, N> To_8(const Str<Char_16, N>& from)
|
|
{
|
|
Str<Char_8, N> r(from.Size(true) * sizeof(Char_16));
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (from[i] & 0b1101100000000000 && i + 1 < from.Size() && from[i] & 0b1101110000000000)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000;
|
|
r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] <= 0b11111111)
|
|
{
|
|
r[c++] = (Byte)from[i];
|
|
}
|
|
else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF32 C-style string into UTF8.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF32 string.
|
|
/// @param [in] size The size of the give C-style UTF32 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_8, N> To_8(const Char_32* const from, const N size = 0)
|
|
{
|
|
N rSize = size ? size : Str<Char_32, N>::Len(from);
|
|
|
|
Str<Char_8, N> r(rSize * sizeof(Char_32));
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < rSize; ++i)
|
|
{
|
|
if (from[i] <= 0b11111111)
|
|
{
|
|
r[c++] = (Char_8)from[i];
|
|
}
|
|
else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b1111111111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000;
|
|
r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
|
|
/// Converts the given UTF32 string object into UTF8.
|
|
/// @tparam N The number data type to use.
|
|
/// @param [in] from The given UTF32 string.
|
|
/// @returns The result.
|
|
template<typename N = UInt_64>
|
|
static Str<Char_8, N> To_8(const Str<Char_32, N>& from)
|
|
{
|
|
Str<Char_8, N> r(from.Size() * sizeof(Char_32));
|
|
|
|
N c = 0;
|
|
|
|
for (N i = 0; i < from.Size(); ++i)
|
|
{
|
|
if (from[i] <= 0b11111111)
|
|
{
|
|
r[c++] = (Char_8)from[i];
|
|
}
|
|
else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
else if (from[i] > 0b1111111111111111)
|
|
{
|
|
r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000;
|
|
r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000;
|
|
r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000;
|
|
r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
|
|
}
|
|
}
|
|
|
|
r.Resize(c);
|
|
|
|
return r;
|
|
}
|
|
};
|
|
} |