summaryrefslogtreecommitdiffstats
path: root/gl/mbrtowc-impl-utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/mbrtowc-impl-utf8.h')
-rw-r--r--gl/mbrtowc-impl-utf8.h138
1 files changed, 138 insertions, 0 deletions
diff --git a/gl/mbrtowc-impl-utf8.h b/gl/mbrtowc-impl-utf8.h
new file mode 100644
index 00000000..4fdd65d8
--- /dev/null
+++ b/gl/mbrtowc-impl-utf8.h
@@ -0,0 +1,138 @@
1/* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2023 Free Software Foundation, Inc.
3
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
8
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2008. */
18
19/* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
20 that handles the special case of the UTF-8 encoding. */
21
22 /* Cf. unistr/u8-mbtouc.c. */
23 unsigned char c = (unsigned char) p[0];
24
25 if (c < 0x80)
26 {
27 if (pwc != NULL)
28 *pwc = c;
29 res = (c == 0 ? 0 : 1);
30 goto success;
31 }
32 if (c >= 0xc2)
33 {
34 if (c < 0xe0)
35 {
36 if (m == 1)
37 goto incomplete;
38 else /* m >= 2 */
39 {
40 unsigned char c2 = (unsigned char) p[1];
41
42 if ((c2 ^ 0x80) < 0x40)
43 {
44 if (pwc != NULL)
45 *pwc = ((unsigned int) (c & 0x1f) << 6)
46 | (unsigned int) (c2 ^ 0x80);
47 res = 2;
48 goto success;
49 }
50 }
51 }
52 else if (c < 0xf0)
53 {
54 if (m == 1)
55 goto incomplete;
56 else
57 {
58 unsigned char c2 = (unsigned char) p[1];
59
60 if ((c2 ^ 0x80) < 0x40
61 && (c >= 0xe1 || c2 >= 0xa0)
62 && (c != 0xed || c2 < 0xa0))
63 {
64 if (m == 2)
65 goto incomplete;
66 else /* m >= 3 */
67 {
68 unsigned char c3 = (unsigned char) p[2];
69
70 if ((c3 ^ 0x80) < 0x40)
71 {
72 unsigned int wc =
73 (((unsigned int) (c & 0x0f) << 12)
74 | ((unsigned int) (c2 ^ 0x80) << 6)
75 | (unsigned int) (c3 ^ 0x80));
76
77 if (FITS_IN_CHAR_TYPE (wc))
78 {
79 if (pwc != NULL)
80 *pwc = wc;
81 res = 3;
82 goto success;
83 }
84 }
85 }
86 }
87 }
88 }
89 else if (c <= 0xf4)
90 {
91 if (m == 1)
92 goto incomplete;
93 else
94 {
95 unsigned char c2 = (unsigned char) p[1];
96
97 if ((c2 ^ 0x80) < 0x40
98 && (c >= 0xf1 || c2 >= 0x90)
99 && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90)))
100 {
101 if (m == 2)
102 goto incomplete;
103 else
104 {
105 unsigned char c3 = (unsigned char) p[2];
106
107 if ((c3 ^ 0x80) < 0x40)
108 {
109 if (m == 3)
110 goto incomplete;
111 else /* m >= 4 */
112 {
113 unsigned char c4 = (unsigned char) p[3];
114
115 if ((c4 ^ 0x80) < 0x40)
116 {
117 unsigned int wc =
118 (((unsigned int) (c & 0x07) << 18)
119 | ((unsigned int) (c2 ^ 0x80) << 12)
120 | ((unsigned int) (c3 ^ 0x80) << 6)
121 | (unsigned int) (c4 ^ 0x80));
122
123 if (FITS_IN_CHAR_TYPE (wc))
124 {
125 if (pwc != NULL)
126 *pwc = wc;
127 res = 4;
128 goto success;
129 }
130 }
131 }
132 }
133 }
134 }
135 }
136 }
137 }
138 goto invalid;