summaryrefslogtreecommitdiffstats
path: root/gl/mbrtowc-impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/mbrtowc-impl.h')
-rw-r--r--gl/mbrtowc-impl.h262
1 files changed, 262 insertions, 0 deletions
diff --git a/gl/mbrtowc-impl.h b/gl/mbrtowc-impl.h
new file mode 100644
index 00000000..e9c04ed7
--- /dev/null
+++ b/gl/mbrtowc-impl.h
@@ -0,0 +1,262 @@
1/* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2023 Free Software Foundation, Inc.
3
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
8
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2008. */
18
19/* This file contains the body of the mbrtowc and mbrtoc32 functions,
20 when GNULIB_defined_mbstate_t is defined. */
21
22 char *pstate = (char *)ps;
23
24 if (s == NULL)
25 {
26 pwc = NULL;
27 s = "";
28 n = 1;
29 }
30
31 if (n == 0)
32 return (size_t)(-2);
33
34 /* Here n > 0. */
35
36 if (pstate == NULL)
37 pstate = internal_state;
38
39 {
40 size_t nstate = pstate[0];
41 char buf[4];
42 const char *p;
43 size_t m;
44 enc_t enc;
45 int res;
46
47 switch (nstate)
48 {
49 case 0:
50 p = s;
51 m = n;
52 break;
53 case 3:
54 buf[2] = pstate[3];
55 FALLTHROUGH;
56 case 2:
57 buf[1] = pstate[2];
58 FALLTHROUGH;
59 case 1:
60 buf[0] = pstate[1];
61 p = buf;
62 m = nstate;
63 buf[m++] = s[0];
64 if (n >= 2 && m < 4)
65 {
66 buf[m++] = s[1];
67 if (n >= 3 && m < 4)
68 buf[m++] = s[2];
69 }
70 break;
71 default:
72 errno = EINVAL;
73 return (size_t)(-1);
74 }
75
76 /* Here m > 0. */
77
78 enc = locale_encoding_classification ();
79
80 if (enc == enc_utf8) /* UTF-8 */
81 {
82 /* Achieve
83 - multi-thread safety and
84 - the ability to produce wide character values > WCHAR_MAX
85 by not calling mbtowc() at all. */
86#include "mbrtowc-impl-utf8.h"
87 }
88 else
89 {
90 /* The hidden internal state of mbtowc would make this function not
91 multi-thread safe. Achieve multi-thread safety through a lock. */
92 wchar_t wc;
93 res = mbtowc_with_lock (&wc, p, m);
94
95 if (res >= 0)
96 {
97 if ((wc == 0) != (res == 0))
98 abort ();
99 if (pwc != NULL)
100 *pwc = wc;
101 goto success;
102 }
103
104 /* mbtowc does not distinguish between invalid and incomplete multibyte
105 sequences. But mbrtowc needs to make this distinction.
106 There are two possible approaches:
107 - Use iconv() and its return value.
108 - Use built-in knowledge about the possible encodings.
109 Given the low quality of implementation of iconv() on the systems
110 that lack mbrtowc(), we use the second approach.
111 The possible encodings are:
112 - 8-bit encodings,
113 - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
114 - UTF-8 (already handled above).
115 Use specialized code for each. */
116 if (m >= 4 || m >= MB_CUR_MAX)
117 goto invalid;
118 /* Here MB_CUR_MAX > 1 and 0 < m < 4. */
119 switch (enc)
120 {
121 /* As a reference for this code, you can use the GNU libiconv
122 implementation. Look for uses of the RET_TOOFEW macro. */
123
124 case enc_eucjp: /* EUC-JP */
125 {
126 if (m == 1)
127 {
128 unsigned char c = (unsigned char) p[0];
129
130 if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
131 goto incomplete;
132 }
133 if (m == 2)
134 {
135 unsigned char c = (unsigned char) p[0];
136
137 if (c == 0x8f)
138 {
139 unsigned char c2 = (unsigned char) p[1];
140
141 if (c2 >= 0xa1 && c2 < 0xff)
142 goto incomplete;
143 }
144 }
145 goto invalid;
146 }
147
148 case enc_94: /* EUC-KR, GB2312, BIG5 */
149 {
150 if (m == 1)
151 {
152 unsigned char c = (unsigned char) p[0];
153
154 if (c >= 0xa1 && c < 0xff)
155 goto incomplete;
156 }
157 goto invalid;
158 }
159
160 case enc_euctw: /* EUC-TW */
161 {
162 if (m == 1)
163 {
164 unsigned char c = (unsigned char) p[0];
165
166 if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
167 goto incomplete;
168 }
169 else /* m == 2 || m == 3 */
170 {
171 unsigned char c = (unsigned char) p[0];
172
173 if (c == 0x8e)
174 goto incomplete;
175 }
176 goto invalid;
177 }
178
179 case enc_gb18030: /* GB18030 */
180 {
181 if (m == 1)
182 {
183 unsigned char c = (unsigned char) p[0];
184
185 if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
186 goto incomplete;
187 }
188 else /* m == 2 || m == 3 */
189 {
190 unsigned char c = (unsigned char) p[0];
191
192 if (c >= 0x90 && c <= 0xe3)
193 {
194 unsigned char c2 = (unsigned char) p[1];
195
196 if (c2 >= 0x30 && c2 <= 0x39)
197 {
198 if (m == 2)
199 goto incomplete;
200 else /* m == 3 */
201 {
202 unsigned char c3 = (unsigned char) p[2];
203
204 if (c3 >= 0x81 && c3 <= 0xfe)
205 goto incomplete;
206 }
207 }
208 }
209 }
210 goto invalid;
211 }
212
213 case enc_sjis: /* SJIS */
214 {
215 if (m == 1)
216 {
217 unsigned char c = (unsigned char) p[0];
218
219 if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
220 || (c >= 0xf0 && c <= 0xf9))
221 goto incomplete;
222 }
223 goto invalid;
224 }
225
226 default:
227 /* An unknown multibyte encoding. */
228 goto incomplete;
229 }
230 }
231
232 success:
233 /* res >= 0 is the corrected return value of
234 mbtowc_with_lock (&wc, p, m). */
235 if (nstate >= (res > 0 ? res : 1))
236 abort ();
237 res -= nstate;
238 pstate[0] = 0;
239 return res;
240
241 incomplete:
242 {
243 size_t k = nstate;
244 /* Here 0 <= k < m < 4. */
245 pstate[++k] = s[0];
246 if (k < m)
247 {
248 pstate[++k] = s[1];
249 if (k < m)
250 pstate[++k] = s[2];
251 }
252 if (k != m)
253 abort ();
254 }
255 pstate[0] = m;
256 return (size_t)(-2);
257
258 invalid:
259 errno = EILSEQ;
260 /* The conversion state is undefined, says POSIX. */
261 return (size_t)(-1);
262 }