EHS
Loading...
Searching...
No Matches
UTF.h
Go to the documentation of this file.
1#pragma once
2
3#include "EHS.h"
4#include "Str.h"
5
6namespace ehs
7{
8 enum class CharEncoding
9 {
13 };
14
16 class EHS_LIB_IO UTF
17 {
18 public:
24 template<typename N = UInt_64>
25 static Str<Char_32, N> To_32(const Char_16* const from, const N size = 0)
26 {
27 Str<Char_32, N> result((size) ? size : Str<Char_16, N>::Len(from));
28
29 N index = 0;
30
31 for (N i = 0; i < result.Size(); ++i)
32 {
33 if (i != result.Size() - 1)
34 {
35 if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800)
36 {
37 result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000;
38
39 continue;
40 }
41 }
42
43 result[index++] = (Char_32)from[i];
44 }
45
46 result.Resize(index);
47
48 return result;
49 }
50
55 template<typename N = UInt_64>
57 {
58 Str<Char_32, N> result(from.Size());
59
60 N index = 0;
61
62 for (N i = 0; i < from.Size(); ++i)
63 {
64 if (i != from.Size() - 1)
65 {
66 if ((from[i] & 0xDC00) == 0xDC00 && (from[i + 1] & 0xD800) == 0xD800)
67 {
68 result[index++] = (((from[i] - 0xD800) * 0x400) | (from[i] - 0xDC00)) + 0x10000;
69
70 continue;
71 }
72 }
73
74 result[index++] = (Char_32)from[i];
75 }
76
77 result.Resize(index);
78
79 return result;
80 }
81
87 template<typename N = UInt_64>
88 static Str<Char_32, N> To_32(const Char_8* from, const N size = 0)
89 {
90 N rSize = size ? size : Str<Char_8, N>::Len(from);
91
92 Str<Char_32, N> r(rSize);
93
94 N c = 0;
95
96 for (N i = 0; i < rSize; ++i)
97 {
98 if (from[i] <= 0b11110111 && i + 3 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111)
99 r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111);
100 else if (from[i] <= 0b11101111 && i + 2 < rSize && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111)
101 r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111);
102 else if (from[i] <= 0b11011111 && i + 1 < rSize && from[i + 1] <= 0b10111111)
103 r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111);
104 else
105 r[c++] = (Char_32)from[i];
106 }
107
108 r.Resize(c);
109
110 return r;
111 }
112
117 template<typename N = UInt_64>
119 {
120 Str<Char_32, N> r(from.Size());
121
122 N c = 0;
123
124 for (N i = 0; i < from.Size(); ++i)
125 {
126 if (from[i] <= 0b11110111 && i + 3 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111 && from[i + 3] <= 0b10111111)
127 r[c++] = (Char_32)(from[i++] & 0b00000111) << 18 | (Char_32)(from[i++] & 0b00111111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | (Char_32)(from[i] & 0b00111111);
128 else if (from[i] <= 0b11101111 && i + 2 < from.Size() && from[i + 1] <= 0b10111111 && from[i + 2] <= 0b10111111)
129 r[c++] = (Char_32)(from[i++] & 0b00001111) << 12 | (Char_32)(from[i++] & 0b00111111) << 6 | ((Char_32)from[i] & 0b00111111);
130 else if (from[i] <= 0b11011111 && i + 1 < from.Size() && from[i + 1] <= 0b10111111)
131 r[c++] = (Char_32)(from[i++] & 0b00011111) << 6 | (Char_32)(from[i] & 0b00111111);
132 else
133 r[c++] = (Char_32)from[i];
134 }
135
136 r.Resize(c);
137
138 return r;
139 }
140
146 template<typename N = UInt_64>
147 static Str<Char_16, N> To_16(const Char_32* const from, const N size = 0)
148 {
149 N rSize = size ? size : Str<Char_32, N>::Len(from);
150
151 Str<Char_16, N> result(rSize * sizeof(Char_16));
152
153 N index = 0;
154
155 for (N i = 0; i < rSize; ++i)
156 {
157 if (from[i] <= 0xFFFF)
158 {
159 result[index++] = (Char_16)from[i];
160 }
161 else
162 {
163 Char_32 t = from[i] - 0x10000;
164
165 result[index++] |= (t >> 10) + 0xD800;
166 result[index++] |= t + 0xDC00;
167 }
168 }
169
170 result.Resize(index);
171
172 return result;
173 }
174
179 template<typename N = UInt_64>
181 {
182 Str<Char_16, N> result(from.Size() * sizeof(Char_16));
183
184 N index = 0;
185
186 for (N i = 0; i < from.Size(); ++i)
187 {
188 if (from[i] <= 0xFFFF)
189 {
190 result[index++] = (Char_16)from[i];
191 }
192 else
193 {
194 Char_32 t = from[i] - 0x10000;
195
196 result[index++] |= (t >> 10) + 0xD800;
197 result[index++] |= t + 0xDC00;
198 }
199 }
200
201 result.Resize(index);
202
203 return result;
204 }
205
211 template<typename N = UInt_64>
212 static Str<Char_16, N> To_16(const Char_8* const from, const N size = 0)
213 {
214 N rSize = size ? size : Str<Char_8, N>::Len(from);
215
216 const Byte* const data = (const Byte* const)from;
217
218 Str<Char_16, N> r(rSize);
219
220 N c = 0;
221
222 for (N i = 0; i < rSize; ++i)
223 {
224 if (data[i] >= 0b11110000 && i + 3 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111)
225 r[c++] = (0b00000011111111110000001111111111 &
226 ((Char_16)(data[i++] & 0b00000111) << 23) |
227 ((Char_16)(data[i++] & 0b00111111) << 18) |
228 ((Char_16)(data[i++] & 0b00111111) << 12) |
229 ((Char_16)(data[i++] & 0b00111111) << 6) |
230 (Char_16)(data[i] & 0b00111111)) |
231 0b11011000000000001101110000000000;
232 else if (data[i] >= 0b11100000 && i + 2 < rSize && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111)
233 r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111);
234 else if (data[i] >= 0b11000000 && i + 1 < rSize && data[i + 1] <= 0b10111111)
235 r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111);
236 else
237 r[c++] = (Char_16 )data[i];
238 }
239
240 r.Resize(c);
241
242 return r;
243 }
244
249 template<typename N = UInt_64>
251 {
252 const Byte* const data = from.ToBytes();
253
254 Str<Char_16, N> r(from.Size());
255
256 N c = 0;
257
258 for (N i = 0; i < from.Size(); ++i)
259 {
260 if (data[i] >= 0b11110000 && i + 3 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111 && data[i + 3] <= 0b10111111)
261 r[c++] = (0b00000011111111110000001111111111 &
262 ((Char_16)(data[i++] & 0b00000111) << 23) |
263 ((Char_16)(data[i++] & 0b00111111) << 18) |
264 ((Char_16)(data[i++] & 0b00111111) << 12) |
265 ((Char_16)(data[i++] & 0b00111111) << 6) |
266 (Char_16)(data[i] & 0b00111111)) |
267 0b11011000000000001101110000000000;
268 else if (data[i] >= 0b11100000 && i + 2 < from.Size() && data[i + 1] <= 0b10111111 && data[i + 2] <= 0b10111111)
269 r[c++] = ((Char_16)(data[i++] & 0b00001111) << 12) | ((Char_16)(data[i++] & 0b00111111) << 6) | (Char_16)(data[i] & 0b00111111);
270 else if (data[i] >= 0b11000000 && i + 1 < from.Size() && data[i + 1] <= 0b10111111)
271 r[c++] = (Char_16)(data[i++] & 0b00011111) << 6 | (Char_16 )(data[i] & 0b00111111);
272 else
273 r[c++] = (Char_16 )data[i];
274 }
275
276 r.Resize(c);
277
278 return r;
279 }
280
286 template<typename N = UInt_64>
287 static Str<Char_8, N> To_8(const Char_16* const from, const N size = 0)
288 {
289 N rSize = size ? size : Str<Char_16, N>::Len(from);
290
291 Str<Char_8, N> r(rSize * sizeof(Char_16));
292
293 N c = 0;
294
295 for (N i = 0; i < rSize; ++i)
296 {
297 if (from[i] & 0b1101100000000000 && i + 1 < rSize && from[i] & 0b1101110000000000)
298 {
299 r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000;
300 r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000;
301 r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000;
302 r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000;
303 }
304 else if (from[i] <= 0b11111111)
305 {
306 r[c++] = (Byte)from[i];
307 }
308 else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
309 {
310 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
311 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
312 }
313 else if (from[i] > 0b0000011111111111)
314 {
315 r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000;
316 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
317 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
318 }
319 }
320
321 r.Resize(c);
322
323 return r;
324 }
325
330 template<typename N = UInt_64>
332 {
333 Str<Char_8, N> r(from.Size(true) * sizeof(Char_16));
334
335 N c = 0;
336
337 for (N i = 0; i < from.Size(); ++i)
338 {
339 if (from[i] & 0b1101100000000000 && i + 1 < from.Size() && from[i] & 0b1101110000000000)
340 {
341 r[c++] = ((Byte*)&from[i])[1] & 00000111 | 0b11110000;
342 r[c++] = ((Byte*)&from[i])[0] >> 2 & 0b00111111 | 0b10000000;
343 r[c++] = ((Byte*)&from[i])[0] << 4 | (((Byte*)&from[i + 1])[1] & 0b00000011) << 2 | ((Byte*)&from[i + 1])[0] >> 6 & 0b00111111 | 0b10000000;
344 r[c++] = ((Byte*)&from[++i])[0] & 0b00111111 | 0b10000000;
345 }
346 else if (from[i] <= 0b11111111)
347 {
348 r[c++] = (Byte)from[i];
349 }
350 else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
351 {
352 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
353 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
354 }
355 else if (from[i] > 0b0000011111111111)
356 {
357 r[c++] = ((Byte*)&from[i])[1] >> 4 | 0b11100000;
358 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
359 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
360 }
361 }
362
363 r.Resize(c);
364
365 return r;
366 }
367
373 template<typename N = UInt_64>
374 static Str<Char_8, N> To_8(const Char_32* const from, const N size = 0)
375 {
376 N rSize = size ? size : Str<Char_32, N>::Len(from);
377
378 Str<Char_8, N> r(rSize * sizeof(Char_32));
379
380 N c = 0;
381
382 for (N i = 0; i < rSize; ++i)
383 {
384 if (from[i] <= 0b11111111)
385 {
386 r[c++] = (Char_8)from[i];
387 }
388 else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
389 {
390 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
391 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
392 }
393 else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111)
394 {
395 r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000;
396 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
397 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
398 }
399 else if (from[i] > 0b1111111111111111)
400 {
401 r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000;
402 r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000;
403 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000;
404 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
405 }
406 }
407
408 r.Resize(c);
409
410 return r;
411 }
412
417 template<typename N = UInt_64>
419 {
420 Str<Char_8, N> r(from.Size() * sizeof(Char_32));
421
422 N c = 0;
423
424 for (N i = 0; i < from.Size(); ++i)
425 {
426 if (from[i] <= 0b11111111)
427 {
428 r[c++] = (Char_8)from[i];
429 }
430 else if (from[i] > 0b11111111 && from[i] <= 0b0000011111111111)
431 {
432 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 | 0b11000000;
433 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
434 }
435 else if (from[i] > 0b0000011111111111 && from[i] <= 0b1111111111111111)
436 {
437 r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[1] >> 6 | 0b11100000;
438 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[0] >> 6 & 0b00111111 | 0b10000000;
439 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
440 }
441 else if (from[i] > 0b1111111111111111)
442 {
443 r[c++] = ((Byte*)&from[i])[3] << 2 | ((Byte*)&from[i])[3] >> 2 & 0b00000111 | 0b11110000;
444 r[c++] = ((Byte*)&from[i])[2] << 2 | ((Byte*)&from[i])[2] >> 6 & 0b00111111 | 0b11100000;
445 r[c++] = ((Byte*)&from[i])[1] << 2 | ((Byte*)&from[i])[1] >> 6 & 0b00111111 | 0b10000000;
446 r[c++] = ((Byte*)&from[i])[0] & 0b00111111 | 0b10000000;
447 }
448 }
449
450 r.Resize(c);
451
452 return r;
453 }
454 };
455}
Definition Str.h:29
static N Len(const T *const str)
Definition Str.h:1869
N Size(bool inBytes=false) const
Definition Str.h:523
const Byte * ToBytes() const
Definition Str.h:787
void Resize(const N newSize)
Definition Str.h:500
A helper class for converting between UTF8, 16 and 32.
Definition UTF.h:17
static Str< Char_32, N > To_32(const Str< Char_16, N > &from)
Definition UTF.h:56
static Str< Char_32, N > To_32(const Char_16 *const from, const N size=0)
Definition UTF.h:25
static Str< Char_8, N > To_8(const Char_16 *const from, const N size=0)
Definition UTF.h:287
static Str< Char_32, N > To_32(const Str< Char_8, N > &from)
Definition UTF.h:118
static Str< Char_16, N > To_16(const Str< Char_8, N > &from)
Definition UTF.h:250
static Str< Char_16, N > To_16(const Char_8 *const from, const N size=0)
Definition UTF.h:212
static Str< Char_8, N > To_8(const Char_32 *const from, const N size=0)
Definition UTF.h:374
static Str< Char_16, N > To_16(const Str< Char_32, N > &from)
Definition UTF.h:180
static Str< Char_16, N > To_16(const Char_32 *const from, const N size=0)
Definition UTF.h:147
static Str< Char_8, N > To_8(const Str< Char_32, N > &from)
Definition UTF.h:418
static Str< Char_8, N > To_8(const Str< Char_16, N > &from)
Definition UTF.h:331
static Str< Char_32, N > To_32(const Char_8 *from, const N size=0)
Definition UTF.h:88
Definition Anchor.h:6
char32_t Char_32
Definition Types.h:42
char Char_8
Definition Types.h:40
unsigned char Byte
Definition Types.h:39
wchar_t Char_16
Definition Types.h:41
CharEncoding
Definition UTF.h:9
@ UTF_16
Definition UTF.h:11
@ UTF_32
Definition UTF.h:10
@ UTF_8
Definition UTF.h:12