1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2015 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // Distributed under the Boost Software License, Version 1.0.
7 // http://www.boost.org/LICENSE_1_0.txt
8 
9 module ddmd.utf;
10 
11 nothrow pure @nogc:
12 
13 /// The Unicode code space is the range of code points [0x000000,0x10FFFF]
14 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF]
15 /// and non-characters (which end in 0xFFFE or 0xFFFF).
16 bool utf_isValidDchar(dchar c)
17 {
18     // TODO: Whether non-char code points should be rejected is pending review
19     // largest character code point
20     if (c > 0x10FFFF)
21         return false;
22     // surrogate pairs
23     if (0xD800 <= c && c <= 0xDFFF)
24         return false;
25     // non-characters
26     if ((c & 0xFFFFFE) == 0x00FFFE)
27         return false;
28     return true;
29 }
30 
31 /*******************************
32  * Return !=0 if unicode alpha.
33  * Use table from C99 Appendix D.
34  */
35 bool isUniAlpha(dchar c)
36 {
37     static immutable wchar[2][] ALPHA_TABLE =
38     [
39         [0x00AA, 0x00AA],
40         [0x00B5, 0x00B5],
41         [0x00B7, 0x00B7],
42         [0x00BA, 0x00BA],
43         [0x00C0, 0x00D6],
44         [0x00D8, 0x00F6],
45         [0x00F8, 0x01F5],
46         [0x01FA, 0x0217],
47         [0x0250, 0x02A8],
48         [0x02B0, 0x02B8],
49         [0x02BB, 0x02BB],
50         [0x02BD, 0x02C1],
51         [0x02D0, 0x02D1],
52         [0x02E0, 0x02E4],
53         [0x037A, 0x037A],
54         [0x0386, 0x0386],
55         [0x0388, 0x038A],
56         [0x038C, 0x038C],
57         [0x038E, 0x03A1],
58         [0x03A3, 0x03CE],
59         [0x03D0, 0x03D6],
60         [0x03DA, 0x03DA],
61         [0x03DC, 0x03DC],
62         [0x03DE, 0x03DE],
63         [0x03E0, 0x03E0],
64         [0x03E2, 0x03F3],
65         [0x0401, 0x040C],
66         [0x040E, 0x044F],
67         [0x0451, 0x045C],
68         [0x045E, 0x0481],
69         [0x0490, 0x04C4],
70         [0x04C7, 0x04C8],
71         [0x04CB, 0x04CC],
72         [0x04D0, 0x04EB],
73         [0x04EE, 0x04F5],
74         [0x04F8, 0x04F9],
75         [0x0531, 0x0556],
76         [0x0559, 0x0559],
77         [0x0561, 0x0587],
78         [0x05B0, 0x05B9],
79         [0x05BB, 0x05BD],
80         [0x05BF, 0x05BF],
81         [0x05C1, 0x05C2],
82         [0x05D0, 0x05EA],
83         [0x05F0, 0x05F2],
84         [0x0621, 0x063A],
85         [0x0640, 0x0652],
86         [0x0660, 0x0669],
87         [0x0670, 0x06B7],
88         [0x06BA, 0x06BE],
89         [0x06C0, 0x06CE],
90         [0x06D0, 0x06DC],
91         [0x06E5, 0x06E8],
92         [0x06EA, 0x06ED],
93         [0x06F0, 0x06F9],
94         [0x0901, 0x0903],
95         [0x0905, 0x0939],
96         [0x093D, 0x094D],
97         [0x0950, 0x0952],
98         [0x0958, 0x0963],
99         [0x0966, 0x096F],
100         [0x0981, 0x0983],
101         [0x0985, 0x098C],
102         [0x098F, 0x0990],
103         [0x0993, 0x09A8],
104         [0x09AA, 0x09B0],
105         [0x09B2, 0x09B2],
106         [0x09B6, 0x09B9],
107         [0x09BE, 0x09C4],
108         [0x09C7, 0x09C8],
109         [0x09CB, 0x09CD],
110         [0x09DC, 0x09DD],
111         [0x09DF, 0x09E3],
112         [0x09E6, 0x09F1],
113         [0x0A02, 0x0A02],
114         [0x0A05, 0x0A0A],
115         [0x0A0F, 0x0A10],
116         [0x0A13, 0x0A28],
117         [0x0A2A, 0x0A30],
118         [0x0A32, 0x0A33],
119         [0x0A35, 0x0A36],
120         [0x0A38, 0x0A39],
121         [0x0A3E, 0x0A42],
122         [0x0A47, 0x0A48],
123         [0x0A4B, 0x0A4D],
124         [0x0A59, 0x0A5C],
125         [0x0A5E, 0x0A5E],
126         [0x0A66, 0x0A6F],
127         [0x0A74, 0x0A74],
128         [0x0A81, 0x0A83],
129         [0x0A85, 0x0A8B],
130         [0x0A8D, 0x0A8D],
131         [0x0A8F, 0x0A91],
132         [0x0A93, 0x0AA8],
133         [0x0AAA, 0x0AB0],
134         [0x0AB2, 0x0AB3],
135         [0x0AB5, 0x0AB9],
136         [0x0ABD, 0x0AC5],
137         [0x0AC7, 0x0AC9],
138         [0x0ACB, 0x0ACD],
139         [0x0AD0, 0x0AD0],
140         [0x0AE0, 0x0AE0],
141         [0x0AE6, 0x0AEF],
142         [0x0B01, 0x0B03],
143         [0x0B05, 0x0B0C],
144         [0x0B0F, 0x0B10],
145         [0x0B13, 0x0B28],
146         [0x0B2A, 0x0B30],
147         [0x0B32, 0x0B33],
148         [0x0B36, 0x0B39],
149         [0x0B3D, 0x0B43],
150         [0x0B47, 0x0B48],
151         [0x0B4B, 0x0B4D],
152         [0x0B5C, 0x0B5D],
153         [0x0B5F, 0x0B61],
154         [0x0B66, 0x0B6F],
155         [0x0B82, 0x0B83],
156         [0x0B85, 0x0B8A],
157         [0x0B8E, 0x0B90],
158         [0x0B92, 0x0B95],
159         [0x0B99, 0x0B9A],
160         [0x0B9C, 0x0B9C],
161         [0x0B9E, 0x0B9F],
162         [0x0BA3, 0x0BA4],
163         [0x0BA8, 0x0BAA],
164         [0x0BAE, 0x0BB5],
165         [0x0BB7, 0x0BB9],
166         [0x0BBE, 0x0BC2],
167         [0x0BC6, 0x0BC8],
168         [0x0BCA, 0x0BCD],
169         [0x0BE7, 0x0BEF],
170         [0x0C01, 0x0C03],
171         [0x0C05, 0x0C0C],
172         [0x0C0E, 0x0C10],
173         [0x0C12, 0x0C28],
174         [0x0C2A, 0x0C33],
175         [0x0C35, 0x0C39],
176         [0x0C3E, 0x0C44],
177         [0x0C46, 0x0C48],
178         [0x0C4A, 0x0C4D],
179         [0x0C60, 0x0C61],
180         [0x0C66, 0x0C6F],
181         [0x0C82, 0x0C83],
182         [0x0C85, 0x0C8C],
183         [0x0C8E, 0x0C90],
184         [0x0C92, 0x0CA8],
185         [0x0CAA, 0x0CB3],
186         [0x0CB5, 0x0CB9],
187         [0x0CBE, 0x0CC4],
188         [0x0CC6, 0x0CC8],
189         [0x0CCA, 0x0CCD],
190         [0x0CDE, 0x0CDE],
191         [0x0CE0, 0x0CE1],
192         [0x0CE6, 0x0CEF],
193         [0x0D02, 0x0D03],
194         [0x0D05, 0x0D0C],
195         [0x0D0E, 0x0D10],
196         [0x0D12, 0x0D28],
197         [0x0D2A, 0x0D39],
198         [0x0D3E, 0x0D43],
199         [0x0D46, 0x0D48],
200         [0x0D4A, 0x0D4D],
201         [0x0D60, 0x0D61],
202         [0x0D66, 0x0D6F],
203         [0x0E01, 0x0E3A],
204         [0x0E40, 0x0E5B],
205         [0x0E81, 0x0E82],
206         [0x0E84, 0x0E84],
207         [0x0E87, 0x0E88],
208         [0x0E8A, 0x0E8A],
209         [0x0E8D, 0x0E8D],
210         [0x0E94, 0x0E97],
211         [0x0E99, 0x0E9F],
212         [0x0EA1, 0x0EA3],
213         [0x0EA5, 0x0EA5],
214         [0x0EA7, 0x0EA7],
215         [0x0EAA, 0x0EAB],
216         [0x0EAD, 0x0EAE],
217         [0x0EB0, 0x0EB9],
218         [0x0EBB, 0x0EBD],
219         [0x0EC0, 0x0EC4],
220         [0x0EC6, 0x0EC6],
221         [0x0EC8, 0x0ECD],
222         [0x0ED0, 0x0ED9],
223         [0x0EDC, 0x0EDD],
224         [0x0F00, 0x0F00],
225         [0x0F18, 0x0F19],
226         [0x0F20, 0x0F33],
227         [0x0F35, 0x0F35],
228         [0x0F37, 0x0F37],
229         [0x0F39, 0x0F39],
230         [0x0F3E, 0x0F47],
231         [0x0F49, 0x0F69],
232         [0x0F71, 0x0F84],
233         [0x0F86, 0x0F8B],
234         [0x0F90, 0x0F95],
235         [0x0F97, 0x0F97],
236         [0x0F99, 0x0FAD],
237         [0x0FB1, 0x0FB7],
238         [0x0FB9, 0x0FB9],
239         [0x10A0, 0x10C5],
240         [0x10D0, 0x10F6],
241         [0x1E00, 0x1E9B],
242         [0x1EA0, 0x1EF9],
243         [0x1F00, 0x1F15],
244         [0x1F18, 0x1F1D],
245         [0x1F20, 0x1F45],
246         [0x1F48, 0x1F4D],
247         [0x1F50, 0x1F57],
248         [0x1F59, 0x1F59],
249         [0x1F5B, 0x1F5B],
250         [0x1F5D, 0x1F5D],
251         [0x1F5F, 0x1F7D],
252         [0x1F80, 0x1FB4],
253         [0x1FB6, 0x1FBC],
254         [0x1FBE, 0x1FBE],
255         [0x1FC2, 0x1FC4],
256         [0x1FC6, 0x1FCC],
257         [0x1FD0, 0x1FD3],
258         [0x1FD6, 0x1FDB],
259         [0x1FE0, 0x1FEC],
260         [0x1FF2, 0x1FF4],
261         [0x1FF6, 0x1FFC],
262         [0x203F, 0x2040],
263         [0x207F, 0x207F],
264         [0x2102, 0x2102],
265         [0x2107, 0x2107],
266         [0x210A, 0x2113],
267         [0x2115, 0x2115],
268         [0x2118, 0x211D],
269         [0x2124, 0x2124],
270         [0x2126, 0x2126],
271         [0x2128, 0x2128],
272         [0x212A, 0x2131],
273         [0x2133, 0x2138],
274         [0x2160, 0x2182],
275         [0x3005, 0x3007],
276         [0x3021, 0x3029],
277         [0x3041, 0x3093],
278         [0x309B, 0x309C],
279         [0x30A1, 0x30F6],
280         [0x30FB, 0x30FC],
281         [0x3105, 0x312C],
282         [0x4E00, 0x9FA5],
283         [0xAC00, 0xD7A3]
284     ];
285 
286     size_t high = ALPHA_TABLE.length - 1;
287     // Shortcut search if c is out of range
288     size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
289     // Binary search
290     while (low <= high)
291     {
292         size_t mid = (low + high) >> 1;
293         if (c < ALPHA_TABLE[mid][0])
294             high = mid - 1;
295         else if (ALPHA_TABLE[mid][1] < c)
296             low = mid + 1;
297         else
298         {
299             assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
300             return true;
301         }
302     }
303     return false;
304 }
305 
306 /**
307  * Returns the code length of c in code units.
308  */
309 int utf_codeLengthChar(dchar c)
310 {
311     return c <= 0x7F ? 1 : c <= 0x7FF ? 2 : c <= 0xFFFF ? 3 : c <= 0x10FFFF ? 4 : (assert(false), 6);
312 }
313 
314 int utf_codeLengthWchar(dchar c)
315 {
316     return c <= 0xFFFF ? 1 : 2;
317 }
318 
319 /**
320  * Returns the code length of c in code units for the encoding.
321  * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
322  */
323 int utf_codeLength(int sz, dchar c)
324 {
325     if (sz == 1)
326         return utf_codeLengthChar(c);
327     if (sz == 2)
328         return utf_codeLengthWchar(c);
329     assert(sz == 4);
330     return 1;
331 }
332 
333 void utf_encodeChar(char* s, dchar c)
334 {
335     assert(s !is null);
336     assert(utf_isValidDchar(c));
337     if (c <= 0x7F)
338     {
339         s[0] = cast(char)c;
340     }
341     else if (c <= 0x07FF)
342     {
343         s[0] = cast(char)(0xC0 | (c >> 6));
344         s[1] = cast(char)(0x80 | (c & 0x3F));
345     }
346     else if (c <= 0xFFFF)
347     {
348         s[0] = cast(char)(0xE0 | (c >> 12));
349         s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
350         s[2] = cast(char)(0x80 | (c & 0x3F));
351     }
352     else if (c <= 0x10FFFF)
353     {
354         s[0] = cast(char)(0xF0 | (c >> 18));
355         s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
356         s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
357         s[3] = cast(char)(0x80 | (c & 0x3F));
358     }
359     else
360         assert(0);
361 }
362 
363 void utf_encodeWchar(wchar* s, dchar c)
364 {
365     assert(s !is null);
366     assert(utf_isValidDchar(c));
367     if (c <= 0xFFFF)
368     {
369         s[0] = cast(wchar)c;
370     }
371     else
372     {
373         s[0] = cast(wchar)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800);
374         s[1] = cast(wchar)(((c - 0x010000) & 0x03FF) + 0xDC00);
375     }
376 }
377 
378 void utf_encode(int sz, void* s, dchar c)
379 {
380     if (sz == 1)
381         utf_encodeChar(cast(char*)s, c);
382     else if (sz == 2)
383         utf_encodeWchar(cast(wchar*)s, c);
384     else
385     {
386         assert(sz == 4);
387         *(cast(dchar*)s) = c;
388     }
389 }
390 
391 /********************************************
392  * Decode a UTF-8 sequence as a single UTF-32 code point.
393  * Params:
394  *      s = UTF-8 sequence
395  *      len = number of code units in s[]
396  *      ridx = starting index in s[], updated to reflect number of code units decoded
397  *      rresult = set to character decoded
398  * Returns:
399  *      null on success, otherwise error message string
400  */
401 immutable(char*) utf_decodeChar(const(char)* s, size_t len, ref size_t ridx, out dchar rresult)
402 {
403     // UTF-8 decoding errors
404     static immutable char* UTF8_DECODE_OK = null; // no error
405     static immutable char* UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space";
406     static immutable char* UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence";
407     static immutable char* UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence";
408     static immutable char* UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit";
409     static immutable char* UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
410 
411     /* The following encodings are valid, except for the 5 and 6 byte
412      * combinations:
413      *      0xxxxxxx
414      *      110xxxxx 10xxxxxx
415      *      1110xxxx 10xxxxxx 10xxxxxx
416      *      11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
417      *      111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
418      *      1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
419      */
420     static immutable uint[] UTF8_STRIDE =
421     [
422         1,
423         1,
424         1,
425         1,
426         1,
427         1,
428         1,
429         1,
430         1,
431         1,
432         1,
433         1,
434         1,
435         1,
436         1,
437         1,
438         1,
439         1,
440         1,
441         1,
442         1,
443         1,
444         1,
445         1,
446         1,
447         1,
448         1,
449         1,
450         1,
451         1,
452         1,
453         1,
454         1,
455         1,
456         1,
457         1,
458         1,
459         1,
460         1,
461         1,
462         1,
463         1,
464         1,
465         1,
466         1,
467         1,
468         1,
469         1,
470         1,
471         1,
472         1,
473         1,
474         1,
475         1,
476         1,
477         1,
478         1,
479         1,
480         1,
481         1,
482         1,
483         1,
484         1,
485         1,
486         1,
487         1,
488         1,
489         1,
490         1,
491         1,
492         1,
493         1,
494         1,
495         1,
496         1,
497         1,
498         1,
499         1,
500         1,
501         1,
502         1,
503         1,
504         1,
505         1,
506         1,
507         1,
508         1,
509         1,
510         1,
511         1,
512         1,
513         1,
514         1,
515         1,
516         1,
517         1,
518         1,
519         1,
520         1,
521         1,
522         1,
523         1,
524         1,
525         1,
526         1,
527         1,
528         1,
529         1,
530         1,
531         1,
532         1,
533         1,
534         1,
535         1,
536         1,
537         1,
538         1,
539         1,
540         1,
541         1,
542         1,
543         1,
544         1,
545         1,
546         1,
547         1,
548         1,
549         1,
550         0xFF,
551         0xFF,
552         0xFF,
553         0xFF,
554         0xFF,
555         0xFF,
556         0xFF,
557         0xFF,
558         0xFF,
559         0xFF,
560         0xFF,
561         0xFF,
562         0xFF,
563         0xFF,
564         0xFF,
565         0xFF,
566         0xFF,
567         0xFF,
568         0xFF,
569         0xFF,
570         0xFF,
571         0xFF,
572         0xFF,
573         0xFF,
574         0xFF,
575         0xFF,
576         0xFF,
577         0xFF,
578         0xFF,
579         0xFF,
580         0xFF,
581         0xFF,
582         0xFF,
583         0xFF,
584         0xFF,
585         0xFF,
586         0xFF,
587         0xFF,
588         0xFF,
589         0xFF,
590         0xFF,
591         0xFF,
592         0xFF,
593         0xFF,
594         0xFF,
595         0xFF,
596         0xFF,
597         0xFF,
598         0xFF,
599         0xFF,
600         0xFF,
601         0xFF,
602         0xFF,
603         0xFF,
604         0xFF,
605         0xFF,
606         0xFF,
607         0xFF,
608         0xFF,
609         0xFF,
610         0xFF,
611         0xFF,
612         0xFF,
613         0xFF,
614         2,
615         2,
616         2,
617         2,
618         2,
619         2,
620         2,
621         2,
622         2,
623         2,
624         2,
625         2,
626         2,
627         2,
628         2,
629         2,
630         2,
631         2,
632         2,
633         2,
634         2,
635         2,
636         2,
637         2,
638         2,
639         2,
640         2,
641         2,
642         2,
643         2,
644         2,
645         2,
646         3,
647         3,
648         3,
649         3,
650         3,
651         3,
652         3,
653         3,
654         3,
655         3,
656         3,
657         3,
658         3,
659         3,
660         3,
661         3,
662         4,
663         4,
664         4,
665         4,
666         4,
667         4,
668         4,
669         4,
670         5,
671         5,
672         5,
673         5,
674         6,
675         6,
676         0xFF,
677         0xFF
678     ];
679 
680     assert(s !is null);
681     size_t i = ridx++;
682     assert(i < len);
683     char u = s[i];
684     // Pre-stage results for ASCII and error cases
685     rresult = u;
686     //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len);
687     // Get expected sequence length
688     size_t n = UTF8_STRIDE[u];
689     switch (n)
690     {
691     case 1:
692         // ASCII
693         return UTF8_DECODE_OK;
694     case 2:
695     case 3:
696     case 4:
697         // multi-byte UTF-8
698         break;
699     default:
700         // 5- or 6-byte sequence
701         return UTF8_DECODE_OUTSIDE_CODE_SPACE;
702     }
703     if (len < i + n) // source too short
704         return UTF8_DECODE_TRUNCATED_SEQUENCE;
705     // Pick off 7 - n low bits from first code unit
706     dchar c = u & ((1 << (7 - n)) - 1);
707     /* The following combinations are overlong, and illegal:
708      *      1100000x (10xxxxxx)
709      *      11100000 100xxxxx (10xxxxxx)
710      *      11110000 1000xxxx (10xxxxxx 10xxxxxx)
711      *      11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
712      *      11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
713      */
714     char u2 = s[++i];
715     // overlong combination
716     if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80))
717         return UTF8_DECODE_OVERLONG;
718     // Decode remaining bits
719     for (n += i - 1; i != n; ++i)
720     {
721         u = s[i];
722         if ((u & 0xC0) != 0x80) // trailing bytes are 10xxxxxx
723             return UTF8_DECODE_INVALID_TRAILER;
724         c = (c << 6) | (u & 0x3F);
725     }
726     if (!utf_isValidDchar(c))
727         return UTF8_DECODE_INVALID_CODE_POINT;
728     ridx = i;
729     rresult = c;
730     return UTF8_DECODE_OK;
731 }
732 
733 /********************************************
734  * Decode a UTF-16 sequence as a single UTF-32 code point.
735  * Params:
736  *      s = UTF-16 sequence
737  *      len = number of code units in s[]
738  *      ridx = starting index in s[], updated to reflect number of code units decoded
739  *      rresult = set to character decoded
740  * Returns:
741  *      null on success, otherwise error message string
742  */
743 immutable(char*) utf_decodeWchar(const(wchar)* s, size_t len, ref size_t ridx, out dchar rresult)
744 {
745     // UTF-16 decoding errors
746     static immutable char* UTF16_DECODE_OK = null; // no error
747     static immutable char* UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence";
748     static immutable char* UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate";
749     static immutable char* UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate";
750     static immutable char* UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
751 
752     assert(s !is null);
753     size_t i = ridx++;
754     assert(i < len);
755     // Pre-stage results for ASCII and error cases
756     dchar u = rresult = s[i];
757     if (u < 0x80) // ASCII
758         return UTF16_DECODE_OK;
759     if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair
760     {
761         if (len <= i + 1)
762             return UTF16_DECODE_TRUNCATED_SEQUENCE;
763         wchar u2 = s[i + 1];
764         if (u2 < 0xDC00 || 0xDFFF < u)
765             return UTF16_DECODE_INVALID_SURROGATE;
766         u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
767         ++ridx;
768     }
769     else if (0xDC00 <= u && u <= 0xDFFF)
770         return UTF16_DECODE_UNPAIRED_SURROGATE;
771     if (!utf_isValidDchar(u))
772         return UTF16_DECODE_INVALID_CODE_POINT;
773     rresult = u;
774     return UTF16_DECODE_OK;
775 }