1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2015 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // Distributed under the Boost Software License, Version 1.0.
7 // http://www.boost.org/LICENSE_1_0.txt
8 
9 module ddmd.utf;
10 
11 /// A UTF-8 code unit
12 /// A UTF-16 code unit
13 alias utf16_t = ushort;
14 /// A UTF-32 code unit
15 alias utf32_t = uint;
16 alias dchar_t = utf32_t;
17 
18 enum ALPHA_TABLE_LENGTH = 245;
19 extern (C++) __gshared const(utf16_t)** ALPHA_TABLE =
20 [
21     [0x00AA, 0x00AA],
22     [0x00B5, 0x00B5],
23     [0x00B7, 0x00B7],
24     [0x00BA, 0x00BA],
25     [0x00C0, 0x00D6],
26     [0x00D8, 0x00F6],
27     [0x00F8, 0x01F5],
28     [0x01FA, 0x0217],
29     [0x0250, 0x02A8],
30     [0x02B0, 0x02B8],
31     [0x02BB, 0x02BB],
32     [0x02BD, 0x02C1],
33     [0x02D0, 0x02D1],
34     [0x02E0, 0x02E4],
35     [0x037A, 0x037A],
36     [0x0386, 0x0386],
37     [0x0388, 0x038A],
38     [0x038C, 0x038C],
39     [0x038E, 0x03A1],
40     [0x03A3, 0x03CE],
41     [0x03D0, 0x03D6],
42     [0x03DA, 0x03DA],
43     [0x03DC, 0x03DC],
44     [0x03DE, 0x03DE],
45     [0x03E0, 0x03E0],
46     [0x03E2, 0x03F3],
47     [0x0401, 0x040C],
48     [0x040E, 0x044F],
49     [0x0451, 0x045C],
50     [0x045E, 0x0481],
51     [0x0490, 0x04C4],
52     [0x04C7, 0x04C8],
53     [0x04CB, 0x04CC],
54     [0x04D0, 0x04EB],
55     [0x04EE, 0x04F5],
56     [0x04F8, 0x04F9],
57     [0x0531, 0x0556],
58     [0x0559, 0x0559],
59     [0x0561, 0x0587],
60     [0x05B0, 0x05B9],
61     [0x05BB, 0x05BD],
62     [0x05BF, 0x05BF],
63     [0x05C1, 0x05C2],
64     [0x05D0, 0x05EA],
65     [0x05F0, 0x05F2],
66     [0x0621, 0x063A],
67     [0x0640, 0x0652],
68     [0x0660, 0x0669],
69     [0x0670, 0x06B7],
70     [0x06BA, 0x06BE],
71     [0x06C0, 0x06CE],
72     [0x06D0, 0x06DC],
73     [0x06E5, 0x06E8],
74     [0x06EA, 0x06ED],
75     [0x06F0, 0x06F9],
76     [0x0901, 0x0903],
77     [0x0905, 0x0939],
78     [0x093D, 0x094D],
79     [0x0950, 0x0952],
80     [0x0958, 0x0963],
81     [0x0966, 0x096F],
82     [0x0981, 0x0983],
83     [0x0985, 0x098C],
84     [0x098F, 0x0990],
85     [0x0993, 0x09A8],
86     [0x09AA, 0x09B0],
87     [0x09B2, 0x09B2],
88     [0x09B6, 0x09B9],
89     [0x09BE, 0x09C4],
90     [0x09C7, 0x09C8],
91     [0x09CB, 0x09CD],
92     [0x09DC, 0x09DD],
93     [0x09DF, 0x09E3],
94     [0x09E6, 0x09F1],
95     [0x0A02, 0x0A02],
96     [0x0A05, 0x0A0A],
97     [0x0A0F, 0x0A10],
98     [0x0A13, 0x0A28],
99     [0x0A2A, 0x0A30],
100     [0x0A32, 0x0A33],
101     [0x0A35, 0x0A36],
102     [0x0A38, 0x0A39],
103     [0x0A3E, 0x0A42],
104     [0x0A47, 0x0A48],
105     [0x0A4B, 0x0A4D],
106     [0x0A59, 0x0A5C],
107     [0x0A5E, 0x0A5E],
108     [0x0A66, 0x0A6F],
109     [0x0A74, 0x0A74],
110     [0x0A81, 0x0A83],
111     [0x0A85, 0x0A8B],
112     [0x0A8D, 0x0A8D],
113     [0x0A8F, 0x0A91],
114     [0x0A93, 0x0AA8],
115     [0x0AAA, 0x0AB0],
116     [0x0AB2, 0x0AB3],
117     [0x0AB5, 0x0AB9],
118     [0x0ABD, 0x0AC5],
119     [0x0AC7, 0x0AC9],
120     [0x0ACB, 0x0ACD],
121     [0x0AD0, 0x0AD0],
122     [0x0AE0, 0x0AE0],
123     [0x0AE6, 0x0AEF],
124     [0x0B01, 0x0B03],
125     [0x0B05, 0x0B0C],
126     [0x0B0F, 0x0B10],
127     [0x0B13, 0x0B28],
128     [0x0B2A, 0x0B30],
129     [0x0B32, 0x0B33],
130     [0x0B36, 0x0B39],
131     [0x0B3D, 0x0B43],
132     [0x0B47, 0x0B48],
133     [0x0B4B, 0x0B4D],
134     [0x0B5C, 0x0B5D],
135     [0x0B5F, 0x0B61],
136     [0x0B66, 0x0B6F],
137     [0x0B82, 0x0B83],
138     [0x0B85, 0x0B8A],
139     [0x0B8E, 0x0B90],
140     [0x0B92, 0x0B95],
141     [0x0B99, 0x0B9A],
142     [0x0B9C, 0x0B9C],
143     [0x0B9E, 0x0B9F],
144     [0x0BA3, 0x0BA4],
145     [0x0BA8, 0x0BAA],
146     [0x0BAE, 0x0BB5],
147     [0x0BB7, 0x0BB9],
148     [0x0BBE, 0x0BC2],
149     [0x0BC6, 0x0BC8],
150     [0x0BCA, 0x0BCD],
151     [0x0BE7, 0x0BEF],
152     [0x0C01, 0x0C03],
153     [0x0C05, 0x0C0C],
154     [0x0C0E, 0x0C10],
155     [0x0C12, 0x0C28],
156     [0x0C2A, 0x0C33],
157     [0x0C35, 0x0C39],
158     [0x0C3E, 0x0C44],
159     [0x0C46, 0x0C48],
160     [0x0C4A, 0x0C4D],
161     [0x0C60, 0x0C61],
162     [0x0C66, 0x0C6F],
163     [0x0C82, 0x0C83],
164     [0x0C85, 0x0C8C],
165     [0x0C8E, 0x0C90],
166     [0x0C92, 0x0CA8],
167     [0x0CAA, 0x0CB3],
168     [0x0CB5, 0x0CB9],
169     [0x0CBE, 0x0CC4],
170     [0x0CC6, 0x0CC8],
171     [0x0CCA, 0x0CCD],
172     [0x0CDE, 0x0CDE],
173     [0x0CE0, 0x0CE1],
174     [0x0CE6, 0x0CEF],
175     [0x0D02, 0x0D03],
176     [0x0D05, 0x0D0C],
177     [0x0D0E, 0x0D10],
178     [0x0D12, 0x0D28],
179     [0x0D2A, 0x0D39],
180     [0x0D3E, 0x0D43],
181     [0x0D46, 0x0D48],
182     [0x0D4A, 0x0D4D],
183     [0x0D60, 0x0D61],
184     [0x0D66, 0x0D6F],
185     [0x0E01, 0x0E3A],
186     [0x0E40, 0x0E5B],
187     [0x0E81, 0x0E82],
188     [0x0E84, 0x0E84],
189     [0x0E87, 0x0E88],
190     [0x0E8A, 0x0E8A],
191     [0x0E8D, 0x0E8D],
192     [0x0E94, 0x0E97],
193     [0x0E99, 0x0E9F],
194     [0x0EA1, 0x0EA3],
195     [0x0EA5, 0x0EA5],
196     [0x0EA7, 0x0EA7],
197     [0x0EAA, 0x0EAB],
198     [0x0EAD, 0x0EAE],
199     [0x0EB0, 0x0EB9],
200     [0x0EBB, 0x0EBD],
201     [0x0EC0, 0x0EC4],
202     [0x0EC6, 0x0EC6],
203     [0x0EC8, 0x0ECD],
204     [0x0ED0, 0x0ED9],
205     [0x0EDC, 0x0EDD],
206     [0x0F00, 0x0F00],
207     [0x0F18, 0x0F19],
208     [0x0F20, 0x0F33],
209     [0x0F35, 0x0F35],
210     [0x0F37, 0x0F37],
211     [0x0F39, 0x0F39],
212     [0x0F3E, 0x0F47],
213     [0x0F49, 0x0F69],
214     [0x0F71, 0x0F84],
215     [0x0F86, 0x0F8B],
216     [0x0F90, 0x0F95],
217     [0x0F97, 0x0F97],
218     [0x0F99, 0x0FAD],
219     [0x0FB1, 0x0FB7],
220     [0x0FB9, 0x0FB9],
221     [0x10A0, 0x10C5],
222     [0x10D0, 0x10F6],
223     [0x1E00, 0x1E9B],
224     [0x1EA0, 0x1EF9],
225     [0x1F00, 0x1F15],
226     [0x1F18, 0x1F1D],
227     [0x1F20, 0x1F45],
228     [0x1F48, 0x1F4D],
229     [0x1F50, 0x1F57],
230     [0x1F59, 0x1F59],
231     [0x1F5B, 0x1F5B],
232     [0x1F5D, 0x1F5D],
233     [0x1F5F, 0x1F7D],
234     [0x1F80, 0x1FB4],
235     [0x1FB6, 0x1FBC],
236     [0x1FBE, 0x1FBE],
237     [0x1FC2, 0x1FC4],
238     [0x1FC6, 0x1FCC],
239     [0x1FD0, 0x1FD3],
240     [0x1FD6, 0x1FDB],
241     [0x1FE0, 0x1FEC],
242     [0x1FF2, 0x1FF4],
243     [0x1FF6, 0x1FFC],
244     [0x203F, 0x2040],
245     [0x207F, 0x207F],
246     [0x2102, 0x2102],
247     [0x2107, 0x2107],
248     [0x210A, 0x2113],
249     [0x2115, 0x2115],
250     [0x2118, 0x211D],
251     [0x2124, 0x2124],
252     [0x2126, 0x2126],
253     [0x2128, 0x2128],
254     [0x212A, 0x2131],
255     [0x2133, 0x2138],
256     [0x2160, 0x2182],
257     [0x3005, 0x3007],
258     [0x3021, 0x3029],
259     [0x3041, 0x3093],
260     [0x309B, 0x309C],
261     [0x30A1, 0x30F6],
262     [0x30FB, 0x30FC],
263     [0x3105, 0x312C],
264     [0x4E00, 0x9FA5],
265     [0xAC00, 0xD7A3]
266 ];
267 extern (C++) __gshared const(const(char)*) UTF8_DECODE_OK = null;
268 extern (C++) __gshared const(const(char)*) UTF16_DECODE_OK = null;
269 /* The following encodings are valid, except for the 5 and 6 byte
270  * combinations:
271  *      0xxxxxxx
272  *      110xxxxx 10xxxxxx
273  *      1110xxxx 10xxxxxx 10xxxxxx
274  *      11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
275  *      111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
276  *      1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
277  */
278 extern (C++) __gshared const(uint)* UTF8_STRIDE =
279 [
280     1,
281     1,
282     1,
283     1,
284     1,
285     1,
286     1,
287     1,
288     1,
289     1,
290     1,
291     1,
292     1,
293     1,
294     1,
295     1,
296     1,
297     1,
298     1,
299     1,
300     1,
301     1,
302     1,
303     1,
304     1,
305     1,
306     1,
307     1,
308     1,
309     1,
310     1,
311     1,
312     1,
313     1,
314     1,
315     1,
316     1,
317     1,
318     1,
319     1,
320     1,
321     1,
322     1,
323     1,
324     1,
325     1,
326     1,
327     1,
328     1,
329     1,
330     1,
331     1,
332     1,
333     1,
334     1,
335     1,
336     1,
337     1,
338     1,
339     1,
340     1,
341     1,
342     1,
343     1,
344     1,
345     1,
346     1,
347     1,
348     1,
349     1,
350     1,
351     1,
352     1,
353     1,
354     1,
355     1,
356     1,
357     1,
358     1,
359     1,
360     1,
361     1,
362     1,
363     1,
364     1,
365     1,
366     1,
367     1,
368     1,
369     1,
370     1,
371     1,
372     1,
373     1,
374     1,
375     1,
376     1,
377     1,
378     1,
379     1,
380     1,
381     1,
382     1,
383     1,
384     1,
385     1,
386     1,
387     1,
388     1,
389     1,
390     1,
391     1,
392     1,
393     1,
394     1,
395     1,
396     1,
397     1,
398     1,
399     1,
400     1,
401     1,
402     1,
403     1,
404     1,
405     1,
406     1,
407     1,
408     0xFF,
409     0xFF,
410     0xFF,
411     0xFF,
412     0xFF,
413     0xFF,
414     0xFF,
415     0xFF,
416     0xFF,
417     0xFF,
418     0xFF,
419     0xFF,
420     0xFF,
421     0xFF,
422     0xFF,
423     0xFF,
424     0xFF,
425     0xFF,
426     0xFF,
427     0xFF,
428     0xFF,
429     0xFF,
430     0xFF,
431     0xFF,
432     0xFF,
433     0xFF,
434     0xFF,
435     0xFF,
436     0xFF,
437     0xFF,
438     0xFF,
439     0xFF,
440     0xFF,
441     0xFF,
442     0xFF,
443     0xFF,
444     0xFF,
445     0xFF,
446     0xFF,
447     0xFF,
448     0xFF,
449     0xFF,
450     0xFF,
451     0xFF,
452     0xFF,
453     0xFF,
454     0xFF,
455     0xFF,
456     0xFF,
457     0xFF,
458     0xFF,
459     0xFF,
460     0xFF,
461     0xFF,
462     0xFF,
463     0xFF,
464     0xFF,
465     0xFF,
466     0xFF,
467     0xFF,
468     0xFF,
469     0xFF,
470     0xFF,
471     0xFF,
472     2,
473     2,
474     2,
475     2,
476     2,
477     2,
478     2,
479     2,
480     2,
481     2,
482     2,
483     2,
484     2,
485     2,
486     2,
487     2,
488     2,
489     2,
490     2,
491     2,
492     2,
493     2,
494     2,
495     2,
496     2,
497     2,
498     2,
499     2,
500     2,
501     2,
502     2,
503     2,
504     3,
505     3,
506     3,
507     3,
508     3,
509     3,
510     3,
511     3,
512     3,
513     3,
514     3,
515     3,
516     3,
517     3,
518     3,
519     3,
520     4,
521     4,
522     4,
523     4,
524     4,
525     4,
526     4,
527     4,
528     5,
529     5,
530     5,
531     5,
532     6,
533     6,
534     0xFF,
535     0xFF
536 ];
537 // UTF-8 decoding errors
538 extern (C++) __gshared const(char)* UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space";
539 extern (C++) __gshared const(char)* UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence";
540 extern (C++) __gshared const(char)* UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence";
541 extern (C++) __gshared const(char)* UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit";
542 extern (C++) __gshared const(char)* UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
543 // UTF-16 decoding errors
544 extern (C++) __gshared const(char)* UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence";
545 extern (C++) __gshared const(char)* UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate";
546 extern (C++) __gshared const(char)* UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate";
547 extern (C++) __gshared const(char)* UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
548 
549 /// The Unicode code space is the range of code points [0x000000,0x10FFFF]
550 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF]
551 /// and non-characters (which end in 0xFFFE or 0xFFFF).
552 extern (C++) bool utf_isValidDchar(dchar_t c)
553 {
554     // TODO: Whether non-char code points should be rejected is pending review
555     // largest character code point
556     if (c > 0x10FFFF)
557         return false;
558     // surrogate pairs
559     if (0xD800 <= c && c <= 0xDFFF)
560         return false;
561     // non-characters
562     if ((c & 0xFFFFFE) == 0x00FFFE)
563         return false;
564     return true;
565 }
566 
567 /*******************************
568  * Return !=0 if unicode alpha.
569  * Use table from C99 Appendix D.
570  */
571 extern (C++) bool isUniAlpha(dchar_t c)
572 {
573     size_t high = ALPHA_TABLE_LENGTH - 1;
574     // Shortcut search if c is out of range
575     size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
576     // Binary search
577     while (low <= high)
578     {
579         size_t mid = (low + high) >> 1;
580         if (c < ALPHA_TABLE[mid][0])
581             high = mid - 1;
582         else if (ALPHA_TABLE[mid][1] < c)
583             low = mid + 1;
584         else
585         {
586             assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
587             return true;
588         }
589     }
590     return false;
591 }
592 
593 /**
594  * Returns the code length of c in code units.
595  */
596 extern (C++) int utf_codeLengthChar(dchar_t c)
597 {
598     return c <= 0x7F ? 1 : c <= 0x7FF ? 2 : c <= 0xFFFF ? 3 : c <= 0x10FFFF ? 4 : (assert(false), 6);
599 }
600 
601 extern (C++) int utf_codeLengthWchar(dchar_t c)
602 {
603     return c <= 0xFFFF ? 1 : 2;
604 }
605 
606 /**
607  * Returns the code length of c in code units for the encoding.
608  * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
609  */
610 extern (C++) int utf_codeLength(int sz, dchar_t c)
611 {
612     if (sz == 1)
613         return utf_codeLengthChar(c);
614     if (sz == 2)
615         return utf_codeLengthWchar(c);
616     assert(sz == 4);
617     return 1;
618 }
619 
620 extern (C++) void utf_encodeChar(char* s, dchar_t c)
621 {
622     assert(s !is null);
623     assert(utf_isValidDchar(c));
624     if (c <= 0x7F)
625     {
626         s[0] = cast(char)c;
627     }
628     else if (c <= 0x07FF)
629     {
630         s[0] = cast(char)(0xC0 | (c >> 6));
631         s[1] = cast(char)(0x80 | (c & 0x3F));
632     }
633     else if (c <= 0xFFFF)
634     {
635         s[0] = cast(char)(0xE0 | (c >> 12));
636         s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
637         s[2] = cast(char)(0x80 | (c & 0x3F));
638     }
639     else if (c <= 0x10FFFF)
640     {
641         s[0] = cast(char)(0xF0 | (c >> 18));
642         s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
643         s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
644         s[3] = cast(char)(0x80 | (c & 0x3F));
645     }
646     else
647         assert(0);
648 }
649 
650 extern (C++) void utf_encodeWchar(utf16_t* s, dchar_t c)
651 {
652     assert(s !is null);
653     assert(utf_isValidDchar(c));
654     if (c <= 0xFFFF)
655     {
656         s[0] = cast(utf16_t)c;
657     }
658     else
659     {
660         s[0] = cast(utf16_t)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800);
661         s[1] = cast(utf16_t)(((c - 0x010000) & 0x03FF) + 0xDC00);
662     }
663 }
664 
665 extern (C++) void utf_encode(int sz, void* s, dchar_t c)
666 {
667     if (sz == 1)
668         utf_encodeChar(cast(char*)s, c);
669     else if (sz == 2)
670         utf_encodeWchar(cast(utf16_t*)s, c);
671     else
672     {
673         assert(sz == 4);
674         *(cast(utf32_t*)s) = c;
675     }
676 }
677 
678 /********************************************
679  * Decode a UTF-8 sequence as a single UTF-32 code point.
680  * Returns:
681  *      NULL    success
682  *      !=NULL  error message string
683  */
684 extern (C++) const(char)* utf_decodeChar(const(char)* s, size_t len, size_t* pidx, dchar_t* presult)
685 {
686     assert(s !is null);
687     assert(pidx !is null);
688     assert(presult !is null);
689     size_t i = (*pidx)++;
690     assert(i < len);
691     char u = s[i];
692     // Pre-stage results for ASCII and error cases
693     *presult = u;
694     //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len);
695     // Get expected sequence length
696     size_t n = UTF8_STRIDE[u];
697     switch (n)
698     {
699     case 1:
700         // ASCII
701         return UTF8_DECODE_OK;
702     case 2:
703     case 3:
704     case 4:
705         // multi-byte UTF-8
706         break;
707     default:
708         // 5- or 6-byte sequence
709         return UTF8_DECODE_OUTSIDE_CODE_SPACE;
710     }
711     if (len < i + n) // source too short
712         return UTF8_DECODE_TRUNCATED_SEQUENCE;
713     // Pick off 7 - n low bits from first code unit
714     utf32_t c = u & ((1 << (7 - n)) - 1);
715     /* The following combinations are overlong, and illegal:
716      *      1100000x (10xxxxxx)
717      *      11100000 100xxxxx (10xxxxxx)
718      *      11110000 1000xxxx (10xxxxxx 10xxxxxx)
719      *      11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
720      *      11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
721      */
722     char u2 = s[++i];
723     // overlong combination
724     if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80))
725         return UTF8_DECODE_OVERLONG;
726     // Decode remaining bits
727     for (n += i - 1; i != n; ++i)
728     {
729         u = s[i];
730         if ((u & 0xC0) != 0x80) // trailing bytes are 10xxxxxx
731             return UTF8_DECODE_INVALID_TRAILER;
732         c = (c << 6) | (u & 0x3F);
733     }
734     if (!utf_isValidDchar(c))
735         return UTF8_DECODE_INVALID_CODE_POINT;
736     *pidx = i;
737     *presult = c;
738     return UTF8_DECODE_OK;
739 }
740 
741 /********************************************
742  * Decode a UTF-16 sequence as a single UTF-32 code point.
743  * Returns:
744  *      NULL    success
745  *      !=NULL  error message string
746  */
747 extern (C++) const(char)* utf_decodeWchar(const(utf16_t)* s, size_t len, size_t* pidx, dchar_t* presult)
748 {
749     assert(s !is null);
750     assert(pidx !is null);
751     assert(presult !is null);
752     size_t i = (*pidx)++;
753     assert(i < len);
754     // Pre-stage results for ASCII and error cases
755     utf32_t u = *presult = s[i];
756     if (u < 0x80) // ASCII
757         return UTF16_DECODE_OK;
758     if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair
759     {
760         if (len <= i + 1)
761             return UTF16_DECODE_TRUNCATED_SEQUENCE;
762         utf16_t u2 = s[i + 1];
763         if (u2 < 0xDC00 || 0xDFFF < u)
764             return UTF16_DECODE_INVALID_SURROGATE;
765         u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
766         ++*pidx;
767     }
768     else if (0xDC00 <= u && u <= 0xDFFF)
769         return UTF16_DECODE_UNPAIRED_SURROGATE;
770     if (!utf_isValidDchar(u))
771         return UTF16_DECODE_INVALID_CODE_POINT;
772     *presult = u;
773     return UTF16_DECODE_OK;
774 }