diff options
Diffstat (limited to 'gl/localcharset.c')
-rw-r--r-- | gl/localcharset.c | 482 |
1 files changed, 266 insertions, 216 deletions
diff --git a/gl/localcharset.c b/gl/localcharset.c index a7ca94c..a04dc44 100644 --- a/gl/localcharset.c +++ b/gl/localcharset.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* Determine a canonical name for the current locale's character encoding. | 1 | /* Determine a canonical name for the current locale's character encoding. |
2 | 2 | ||
3 | Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc. | 3 | Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc. |
4 | 4 | ||
5 | This program is free software; you can redistribute it and/or modify | 5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 6 | it under the terms of the GNU General Public License as published by |
@@ -23,6 +23,7 @@ | |||
23 | /* Specification. */ | 23 | /* Specification. */ |
24 | #include "localcharset.h" | 24 | #include "localcharset.h" |
25 | 25 | ||
26 | #include <fcntl.h> | ||
26 | #include <stddef.h> | 27 | #include <stddef.h> |
27 | #include <stdio.h> | 28 | #include <stdio.h> |
28 | #include <string.h> | 29 | #include <string.h> |
@@ -44,6 +45,7 @@ | |||
44 | #endif | 45 | #endif |
45 | 46 | ||
46 | #if !defined WIN32_NATIVE | 47 | #if !defined WIN32_NATIVE |
48 | # include <unistd.h> | ||
47 | # if HAVE_LANGINFO_CODESET | 49 | # if HAVE_LANGINFO_CODESET |
48 | # include <langinfo.h> | 50 | # include <langinfo.h> |
49 | # else | 51 | # else |
@@ -75,6 +77,11 @@ | |||
75 | # include "configmake.h" | 77 | # include "configmake.h" |
76 | #endif | 78 | #endif |
77 | 79 | ||
80 | /* Define O_NOFOLLOW to 0 on platforms where it does not exist. */ | ||
81 | #ifndef O_NOFOLLOW | ||
82 | # define O_NOFOLLOW 0 | ||
83 | #endif | ||
84 | |||
78 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ | 85 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ |
79 | /* Win32, Cygwin, OS/2, DOS */ | 86 | /* Win32, Cygwin, OS/2, DOS */ |
80 | # define ISSLASH(C) ((C) == '/' || (C) == '\\') | 87 | # define ISSLASH(C) ((C) == '/' || (C) == '\\') |
@@ -117,192 +124,219 @@ get_charset_aliases (void) | |||
117 | if (cp == NULL) | 124 | if (cp == NULL) |
118 | { | 125 | { |
119 | #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) | 126 | #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) |
120 | FILE *fp; | ||
121 | const char *dir; | 127 | const char *dir; |
122 | const char *base = "charset.alias"; | 128 | const char *base = "charset.alias"; |
123 | char *file_name; | 129 | char *file_name; |
124 | 130 | ||
125 | /* Make it possible to override the charset.alias location. This is | 131 | /* Make it possible to override the charset.alias location. This is |
126 | necessary for running the testsuite before "make install". */ | 132 | necessary for running the testsuite before "make install". */ |
127 | dir = getenv ("CHARSETALIASDIR"); | 133 | dir = getenv ("CHARSETALIASDIR"); |
128 | if (dir == NULL || dir[0] == '\0') | 134 | if (dir == NULL || dir[0] == '\0') |
129 | dir = relocate (LIBDIR); | 135 | dir = relocate (LIBDIR); |
130 | 136 | ||
131 | /* Concatenate dir and base into freshly allocated file_name. */ | 137 | /* Concatenate dir and base into freshly allocated file_name. */ |
132 | { | 138 | { |
133 | size_t dir_len = strlen (dir); | 139 | size_t dir_len = strlen (dir); |
134 | size_t base_len = strlen (base); | 140 | size_t base_len = strlen (base); |
135 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); | 141 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); |
136 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); | 142 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); |
137 | if (file_name != NULL) | 143 | if (file_name != NULL) |
138 | { | 144 | { |
139 | memcpy (file_name, dir, dir_len); | 145 | memcpy (file_name, dir, dir_len); |
140 | if (add_slash) | 146 | if (add_slash) |
141 | file_name[dir_len] = DIRECTORY_SEPARATOR; | 147 | file_name[dir_len] = DIRECTORY_SEPARATOR; |
142 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); | 148 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); |
143 | } | 149 | } |
144 | } | 150 | } |
145 | 151 | ||
146 | if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) | 152 | if (file_name == NULL) |
147 | /* Out of memory or file not found, treat it as empty. */ | 153 | /* Out of memory. Treat the file as empty. */ |
148 | cp = ""; | 154 | cp = ""; |
149 | else | 155 | else |
150 | { | 156 | { |
151 | /* Parse the file's contents. */ | 157 | int fd; |
152 | char *res_ptr = NULL; | 158 | |
153 | size_t res_size = 0; | 159 | /* Open the file. Reject symbolic links on platforms that support |
154 | 160 | O_NOFOLLOW. This is a security feature. Without it, an attacker | |
155 | for (;;) | 161 | could retrieve parts of the contents (namely, the tail of the |
156 | { | 162 | first line that starts with "* ") of an arbitrary file by placing |
157 | int c; | 163 | a symbolic link to that file under the name "charset.alias" in |
158 | char buf1[50+1]; | 164 | some writable directory and defining the environment variable |
159 | char buf2[50+1]; | 165 | CHARSETALIASDIR to point to that directory. */ |
160 | size_t l1, l2; | 166 | fd = open (file_name, |
161 | char *old_res_ptr; | 167 | O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0)); |
162 | 168 | if (fd < 0) | |
163 | c = getc (fp); | 169 | /* File not found. Treat it as empty. */ |
164 | if (c == EOF) | 170 | cp = ""; |
165 | break; | 171 | else |
166 | if (c == '\n' || c == ' ' || c == '\t') | 172 | { |
167 | continue; | 173 | FILE *fp; |
168 | if (c == '#') | 174 | |
169 | { | 175 | fp = fdopen (fd, "r"); |
170 | /* Skip comment, to end of line. */ | 176 | if (fp == NULL) |
171 | do | 177 | { |
172 | c = getc (fp); | 178 | /* Out of memory. Treat the file as empty. */ |
173 | while (!(c == EOF || c == '\n')); | 179 | close (fd); |
174 | if (c == EOF) | 180 | cp = ""; |
175 | break; | 181 | } |
176 | continue; | 182 | else |
177 | } | 183 | { |
178 | ungetc (c, fp); | 184 | /* Parse the file's contents. */ |
179 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) | 185 | char *res_ptr = NULL; |
180 | break; | 186 | size_t res_size = 0; |
181 | l1 = strlen (buf1); | 187 | |
182 | l2 = strlen (buf2); | 188 | for (;;) |
183 | old_res_ptr = res_ptr; | 189 | { |
184 | if (res_size == 0) | 190 | int c; |
185 | { | 191 | char buf1[50+1]; |
186 | res_size = l1 + 1 + l2 + 1; | 192 | char buf2[50+1]; |
187 | res_ptr = (char *) malloc (res_size + 1); | 193 | size_t l1, l2; |
188 | } | 194 | char *old_res_ptr; |
189 | else | 195 | |
190 | { | 196 | c = getc (fp); |
191 | res_size += l1 + 1 + l2 + 1; | 197 | if (c == EOF) |
192 | res_ptr = (char *) realloc (res_ptr, res_size + 1); | 198 | break; |
193 | } | 199 | if (c == '\n' || c == ' ' || c == '\t') |
194 | if (res_ptr == NULL) | 200 | continue; |
195 | { | 201 | if (c == '#') |
196 | /* Out of memory. */ | 202 | { |
197 | res_size = 0; | 203 | /* Skip comment, to end of line. */ |
198 | if (old_res_ptr != NULL) | 204 | do |
199 | free (old_res_ptr); | 205 | c = getc (fp); |
200 | break; | 206 | while (!(c == EOF || c == '\n')); |
201 | } | 207 | if (c == EOF) |
202 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); | 208 | break; |
203 | strcpy (res_ptr + res_size - (l2 + 1), buf2); | 209 | continue; |
204 | } | 210 | } |
205 | fclose (fp); | 211 | ungetc (c, fp); |
206 | if (res_size == 0) | 212 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) |
207 | cp = ""; | 213 | break; |
208 | else | 214 | l1 = strlen (buf1); |
209 | { | 215 | l2 = strlen (buf2); |
210 | *(res_ptr + res_size) = '\0'; | 216 | old_res_ptr = res_ptr; |
211 | cp = res_ptr; | 217 | if (res_size == 0) |
212 | } | 218 | { |
213 | } | 219 | res_size = l1 + 1 + l2 + 1; |
214 | 220 | res_ptr = (char *) malloc (res_size + 1); | |
215 | if (file_name != NULL) | 221 | } |
216 | free (file_name); | 222 | else |
223 | { | ||
224 | res_size += l1 + 1 + l2 + 1; | ||
225 | res_ptr = (char *) realloc (res_ptr, res_size + 1); | ||
226 | } | ||
227 | if (res_ptr == NULL) | ||
228 | { | ||
229 | /* Out of memory. */ | ||
230 | res_size = 0; | ||
231 | if (old_res_ptr != NULL) | ||
232 | free (old_res_ptr); | ||
233 | break; | ||
234 | } | ||
235 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); | ||
236 | strcpy (res_ptr + res_size - (l2 + 1), buf2); | ||
237 | } | ||
238 | fclose (fp); | ||
239 | if (res_size == 0) | ||
240 | cp = ""; | ||
241 | else | ||
242 | { | ||
243 | *(res_ptr + res_size) = '\0'; | ||
244 | cp = res_ptr; | ||
245 | } | ||
246 | } | ||
247 | } | ||
248 | |||
249 | free (file_name); | ||
250 | } | ||
217 | 251 | ||
218 | #else | 252 | #else |
219 | 253 | ||
220 | # if defined DARWIN7 | 254 | # if defined DARWIN7 |
221 | /* To avoid the trouble of installing a file that is shared by many | 255 | /* To avoid the trouble of installing a file that is shared by many |
222 | GNU packages -- many packaging systems have problems with this --, | 256 | GNU packages -- many packaging systems have problems with this --, |
223 | simply inline the aliases here. */ | 257 | simply inline the aliases here. */ |
224 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" | 258 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
225 | "ISO8859-2" "\0" "ISO-8859-2" "\0" | 259 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
226 | "ISO8859-4" "\0" "ISO-8859-4" "\0" | 260 | "ISO8859-4" "\0" "ISO-8859-4" "\0" |
227 | "ISO8859-5" "\0" "ISO-8859-5" "\0" | 261 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
228 | "ISO8859-7" "\0" "ISO-8859-7" "\0" | 262 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
229 | "ISO8859-9" "\0" "ISO-8859-9" "\0" | 263 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
230 | "ISO8859-13" "\0" "ISO-8859-13" "\0" | 264 | "ISO8859-13" "\0" "ISO-8859-13" "\0" |
231 | "ISO8859-15" "\0" "ISO-8859-15" "\0" | 265 | "ISO8859-15" "\0" "ISO-8859-15" "\0" |
232 | "KOI8-R" "\0" "KOI8-R" "\0" | 266 | "KOI8-R" "\0" "KOI8-R" "\0" |
233 | "KOI8-U" "\0" "KOI8-U" "\0" | 267 | "KOI8-U" "\0" "KOI8-U" "\0" |
234 | "CP866" "\0" "CP866" "\0" | 268 | "CP866" "\0" "CP866" "\0" |
235 | "CP949" "\0" "CP949" "\0" | 269 | "CP949" "\0" "CP949" "\0" |
236 | "CP1131" "\0" "CP1131" "\0" | 270 | "CP1131" "\0" "CP1131" "\0" |
237 | "CP1251" "\0" "CP1251" "\0" | 271 | "CP1251" "\0" "CP1251" "\0" |
238 | "eucCN" "\0" "GB2312" "\0" | 272 | "eucCN" "\0" "GB2312" "\0" |
239 | "GB2312" "\0" "GB2312" "\0" | 273 | "GB2312" "\0" "GB2312" "\0" |
240 | "eucJP" "\0" "EUC-JP" "\0" | 274 | "eucJP" "\0" "EUC-JP" "\0" |
241 | "eucKR" "\0" "EUC-KR" "\0" | 275 | "eucKR" "\0" "EUC-KR" "\0" |
242 | "Big5" "\0" "BIG5" "\0" | 276 | "Big5" "\0" "BIG5" "\0" |
243 | "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" | 277 | "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" |
244 | "GBK" "\0" "GBK" "\0" | 278 | "GBK" "\0" "GBK" "\0" |
245 | "GB18030" "\0" "GB18030" "\0" | 279 | "GB18030" "\0" "GB18030" "\0" |
246 | "SJIS" "\0" "SHIFT_JIS" "\0" | 280 | "SJIS" "\0" "SHIFT_JIS" "\0" |
247 | "ARMSCII-8" "\0" "ARMSCII-8" "\0" | 281 | "ARMSCII-8" "\0" "ARMSCII-8" "\0" |
248 | "PT154" "\0" "PT154" "\0" | 282 | "PT154" "\0" "PT154" "\0" |
249 | /*"ISCII-DEV" "\0" "?" "\0"*/ | 283 | /*"ISCII-DEV" "\0" "?" "\0"*/ |
250 | "*" "\0" "UTF-8" "\0"; | 284 | "*" "\0" "UTF-8" "\0"; |
251 | # endif | 285 | # endif |
252 | 286 | ||
253 | # if defined VMS | 287 | # if defined VMS |
254 | /* To avoid the troubles of an extra file charset.alias_vms in the | 288 | /* To avoid the troubles of an extra file charset.alias_vms in the |
255 | sources of many GNU packages, simply inline the aliases here. */ | 289 | sources of many GNU packages, simply inline the aliases here. */ |
256 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation | 290 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation |
257 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" | 291 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" |
258 | section 10.7 "Handling Different Character Sets". */ | 292 | section 10.7 "Handling Different Character Sets". */ |
259 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" | 293 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
260 | "ISO8859-2" "\0" "ISO-8859-2" "\0" | 294 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
261 | "ISO8859-5" "\0" "ISO-8859-5" "\0" | 295 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
262 | "ISO8859-7" "\0" "ISO-8859-7" "\0" | 296 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
263 | "ISO8859-8" "\0" "ISO-8859-8" "\0" | 297 | "ISO8859-8" "\0" "ISO-8859-8" "\0" |
264 | "ISO8859-9" "\0" "ISO-8859-9" "\0" | 298 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
265 | /* Japanese */ | 299 | /* Japanese */ |
266 | "eucJP" "\0" "EUC-JP" "\0" | 300 | "eucJP" "\0" "EUC-JP" "\0" |
267 | "SJIS" "\0" "SHIFT_JIS" "\0" | 301 | "SJIS" "\0" "SHIFT_JIS" "\0" |
268 | "DECKANJI" "\0" "DEC-KANJI" "\0" | 302 | "DECKANJI" "\0" "DEC-KANJI" "\0" |
269 | "SDECKANJI" "\0" "EUC-JP" "\0" | 303 | "SDECKANJI" "\0" "EUC-JP" "\0" |
270 | /* Chinese */ | 304 | /* Chinese */ |
271 | "eucTW" "\0" "EUC-TW" "\0" | 305 | "eucTW" "\0" "EUC-TW" "\0" |
272 | "DECHANYU" "\0" "DEC-HANYU" "\0" | 306 | "DECHANYU" "\0" "DEC-HANYU" "\0" |
273 | "DECHANZI" "\0" "GB2312" "\0" | 307 | "DECHANZI" "\0" "GB2312" "\0" |
274 | /* Korean */ | 308 | /* Korean */ |
275 | "DECKOREAN" "\0" "EUC-KR" "\0"; | 309 | "DECKOREAN" "\0" "EUC-KR" "\0"; |
276 | # endif | 310 | # endif |
277 | 311 | ||
278 | # if defined WIN32_NATIVE || defined __CYGWIN__ | 312 | # if defined WIN32_NATIVE || defined __CYGWIN__ |
279 | /* To avoid the troubles of installing a separate file in the same | 313 | /* To avoid the troubles of installing a separate file in the same |
280 | directory as the DLL and of retrieving the DLL's directory at | 314 | directory as the DLL and of retrieving the DLL's directory at |
281 | runtime, simply inline the aliases here. */ | 315 | runtime, simply inline the aliases here. */ |
282 | 316 | ||
283 | cp = "CP936" "\0" "GBK" "\0" | 317 | cp = "CP936" "\0" "GBK" "\0" |
284 | "CP1361" "\0" "JOHAB" "\0" | 318 | "CP1361" "\0" "JOHAB" "\0" |
285 | "CP20127" "\0" "ASCII" "\0" | 319 | "CP20127" "\0" "ASCII" "\0" |
286 | "CP20866" "\0" "KOI8-R" "\0" | 320 | "CP20866" "\0" "KOI8-R" "\0" |
287 | "CP20936" "\0" "GB2312" "\0" | 321 | "CP20936" "\0" "GB2312" "\0" |
288 | "CP21866" "\0" "KOI8-RU" "\0" | 322 | "CP21866" "\0" "KOI8-RU" "\0" |
289 | "CP28591" "\0" "ISO-8859-1" "\0" | 323 | "CP28591" "\0" "ISO-8859-1" "\0" |
290 | "CP28592" "\0" "ISO-8859-2" "\0" | 324 | "CP28592" "\0" "ISO-8859-2" "\0" |
291 | "CP28593" "\0" "ISO-8859-3" "\0" | 325 | "CP28593" "\0" "ISO-8859-3" "\0" |
292 | "CP28594" "\0" "ISO-8859-4" "\0" | 326 | "CP28594" "\0" "ISO-8859-4" "\0" |
293 | "CP28595" "\0" "ISO-8859-5" "\0" | 327 | "CP28595" "\0" "ISO-8859-5" "\0" |
294 | "CP28596" "\0" "ISO-8859-6" "\0" | 328 | "CP28596" "\0" "ISO-8859-6" "\0" |
295 | "CP28597" "\0" "ISO-8859-7" "\0" | 329 | "CP28597" "\0" "ISO-8859-7" "\0" |
296 | "CP28598" "\0" "ISO-8859-8" "\0" | 330 | "CP28598" "\0" "ISO-8859-8" "\0" |
297 | "CP28599" "\0" "ISO-8859-9" "\0" | 331 | "CP28599" "\0" "ISO-8859-9" "\0" |
298 | "CP28605" "\0" "ISO-8859-15" "\0" | 332 | "CP28605" "\0" "ISO-8859-15" "\0" |
299 | "CP38598" "\0" "ISO-8859-8" "\0" | 333 | "CP38598" "\0" "ISO-8859-8" "\0" |
300 | "CP51932" "\0" "EUC-JP" "\0" | 334 | "CP51932" "\0" "EUC-JP" "\0" |
301 | "CP51936" "\0" "GB2312" "\0" | 335 | "CP51936" "\0" "GB2312" "\0" |
302 | "CP51949" "\0" "EUC-KR" "\0" | 336 | "CP51949" "\0" "EUC-KR" "\0" |
303 | "CP51950" "\0" "EUC-TW" "\0" | 337 | "CP51950" "\0" "EUC-TW" "\0" |
304 | "CP54936" "\0" "GB18030" "\0" | 338 | "CP54936" "\0" "GB18030" "\0" |
305 | "CP65001" "\0" "UTF-8" "\0"; | 339 | "CP65001" "\0" "UTF-8" "\0"; |
306 | # endif | 340 | # endif |
307 | #endif | 341 | #endif |
308 | 342 | ||
@@ -335,7 +369,7 @@ locale_charset (void) | |||
335 | codeset = nl_langinfo (CODESET); | 369 | codeset = nl_langinfo (CODESET); |
336 | 370 | ||
337 | # ifdef __CYGWIN__ | 371 | # ifdef __CYGWIN__ |
338 | /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always | 372 | /* Cygwin 1.5.x does not have locales. nl_langinfo (CODESET) always |
339 | returns "US-ASCII". As long as this is not fixed, return the suffix | 373 | returns "US-ASCII". As long as this is not fixed, return the suffix |
340 | of the locale name from the environment variables (if present) or | 374 | of the locale name from the environment variables (if present) or |
341 | the codepage as a number. */ | 375 | the codepage as a number. */ |
@@ -346,36 +380,46 @@ locale_charset (void) | |||
346 | 380 | ||
347 | locale = getenv ("LC_ALL"); | 381 | locale = getenv ("LC_ALL"); |
348 | if (locale == NULL || locale[0] == '\0') | 382 | if (locale == NULL || locale[0] == '\0') |
349 | { | 383 | { |
350 | locale = getenv ("LC_CTYPE"); | 384 | locale = getenv ("LC_CTYPE"); |
351 | if (locale == NULL || locale[0] == '\0') | 385 | if (locale == NULL || locale[0] == '\0') |
352 | locale = getenv ("LANG"); | 386 | locale = getenv ("LANG"); |
353 | } | 387 | } |
354 | if (locale != NULL && locale[0] != '\0') | 388 | if (locale != NULL && locale[0] != '\0') |
355 | { | 389 | { |
356 | /* If the locale name contains an encoding after the dot, return | 390 | /* If the locale name contains an encoding after the dot, return |
357 | it. */ | 391 | it. */ |
358 | const char *dot = strchr (locale, '.'); | 392 | const char *dot = strchr (locale, '.'); |
359 | 393 | ||
360 | if (dot != NULL) | 394 | if (dot != NULL) |
361 | { | 395 | { |
362 | const char *modifier; | 396 | const char *modifier; |
363 | 397 | ||
364 | dot++; | 398 | dot++; |
365 | /* Look for the possible @... trailer and remove it, if any. */ | 399 | /* Look for the possible @... trailer and remove it, if any. */ |
366 | modifier = strchr (dot, '@'); | 400 | modifier = strchr (dot, '@'); |
367 | if (modifier == NULL) | 401 | if (modifier == NULL) |
368 | return dot; | 402 | return dot; |
369 | if (modifier - dot < sizeof (buf)) | 403 | if (modifier - dot < sizeof (buf)) |
370 | { | 404 | { |
371 | memcpy (buf, dot, modifier - dot); | 405 | memcpy (buf, dot, modifier - dot); |
372 | buf [modifier - dot] = '\0'; | 406 | buf [modifier - dot] = '\0'; |
373 | return buf; | 407 | return buf; |
374 | } | 408 | } |
375 | } | 409 | } |
376 | } | 410 | } |
377 | 411 | ||
378 | /* Woe32 has a function returning the locale's codepage as a number. */ | 412 | /* Woe32 has a function returning the locale's codepage as a number: |
413 | GetACP(). This encoding is used by Cygwin, unless the user has set | ||
414 | the environment variable CYGWIN=codepage:oem (which very few people | ||
415 | do). | ||
416 | Output directed to console windows needs to be converted (to | ||
417 | GetOEMCP() if the console is using a raster font, or to | ||
418 | GetConsoleOutputCP() if it is using a TrueType font). Cygwin does | ||
419 | this conversion transparently (see winsup/cygwin/fhandler_console.cc), | ||
420 | converting to GetConsoleOutputCP(). This leads to correct results, | ||
421 | except when SetConsoleOutputCP has been called and a raster font is | ||
422 | in use. */ | ||
379 | sprintf (buf, "CP%u", GetACP ()); | 423 | sprintf (buf, "CP%u", GetACP ()); |
380 | codeset = buf; | 424 | codeset = buf; |
381 | } | 425 | } |
@@ -397,11 +441,11 @@ locale_charset (void) | |||
397 | { | 441 | { |
398 | locale = getenv ("LC_ALL"); | 442 | locale = getenv ("LC_ALL"); |
399 | if (locale == NULL || locale[0] == '\0') | 443 | if (locale == NULL || locale[0] == '\0') |
400 | { | 444 | { |
401 | locale = getenv ("LC_CTYPE"); | 445 | locale = getenv ("LC_CTYPE"); |
402 | if (locale == NULL || locale[0] == '\0') | 446 | if (locale == NULL || locale[0] == '\0') |
403 | locale = getenv ("LANG"); | 447 | locale = getenv ("LANG"); |
404 | } | 448 | } |
405 | } | 449 | } |
406 | 450 | ||
407 | /* On some old systems, one used to set locale = "iso8859_1". On others, | 451 | /* On some old systems, one used to set locale = "iso8859_1". On others, |
@@ -415,7 +459,13 @@ locale_charset (void) | |||
415 | 459 | ||
416 | static char buf[2 + 10 + 1]; | 460 | static char buf[2 + 10 + 1]; |
417 | 461 | ||
418 | /* Woe32 has a function returning the locale's codepage as a number. */ | 462 | /* Woe32 has a function returning the locale's codepage as a number: |
463 | GetACP(). | ||
464 | When the output goes to a console window, it needs to be provided in | ||
465 | GetOEMCP() encoding if the console is using a raster font, or in | ||
466 | GetConsoleOutputCP() encoding if it is using a TrueType font. | ||
467 | But in GUI programs and for output sent to files and pipes, GetACP() | ||
468 | encoding is the best bet. */ | ||
419 | sprintf (buf, "CP%u", GetACP ()); | 469 | sprintf (buf, "CP%u", GetACP ()); |
420 | codeset = buf; | 470 | codeset = buf; |
421 | 471 | ||
@@ -433,7 +483,7 @@ locale_charset (void) | |||
433 | { | 483 | { |
434 | locale = getenv ("LC_CTYPE"); | 484 | locale = getenv ("LC_CTYPE"); |
435 | if (locale == NULL || locale[0] == '\0') | 485 | if (locale == NULL || locale[0] == '\0') |
436 | locale = getenv ("LANG"); | 486 | locale = getenv ("LANG"); |
437 | } | 487 | } |
438 | if (locale != NULL && locale[0] != '\0') | 488 | if (locale != NULL && locale[0] != '\0') |
439 | { | 489 | { |
@@ -441,21 +491,21 @@ locale_charset (void) | |||
441 | const char *dot = strchr (locale, '.'); | 491 | const char *dot = strchr (locale, '.'); |
442 | 492 | ||
443 | if (dot != NULL) | 493 | if (dot != NULL) |
444 | { | 494 | { |
445 | const char *modifier; | 495 | const char *modifier; |
446 | 496 | ||
447 | dot++; | 497 | dot++; |
448 | /* Look for the possible @... trailer and remove it, if any. */ | 498 | /* Look for the possible @... trailer and remove it, if any. */ |
449 | modifier = strchr (dot, '@'); | 499 | modifier = strchr (dot, '@'); |
450 | if (modifier == NULL) | 500 | if (modifier == NULL) |
451 | return dot; | 501 | return dot; |
452 | if (modifier - dot < sizeof (buf)) | 502 | if (modifier - dot < sizeof (buf)) |
453 | { | 503 | { |
454 | memcpy (buf, dot, modifier - dot); | 504 | memcpy (buf, dot, modifier - dot); |
455 | buf [modifier - dot] = '\0'; | 505 | buf [modifier - dot] = '\0'; |
456 | return buf; | 506 | return buf; |
457 | } | 507 | } |
458 | } | 508 | } |
459 | 509 | ||
460 | /* Resolve through the charset.alias file. */ | 510 | /* Resolve through the charset.alias file. */ |
461 | codeset = locale; | 511 | codeset = locale; |
@@ -464,12 +514,12 @@ locale_charset (void) | |||
464 | { | 514 | { |
465 | /* OS/2 has a function returning the locale's codepage as a number. */ | 515 | /* OS/2 has a function returning the locale's codepage as a number. */ |
466 | if (DosQueryCp (sizeof (cp), cp, &cplen)) | 516 | if (DosQueryCp (sizeof (cp), cp, &cplen)) |
467 | codeset = ""; | 517 | codeset = ""; |
468 | else | 518 | else |
469 | { | 519 | { |
470 | sprintf (buf, "CP%u", cp[0]); | 520 | sprintf (buf, "CP%u", cp[0]); |
471 | codeset = buf; | 521 | codeset = buf; |
472 | } | 522 | } |
473 | } | 523 | } |
474 | 524 | ||
475 | #endif | 525 | #endif |
@@ -483,10 +533,10 @@ locale_charset (void) | |||
483 | *aliases != '\0'; | 533 | *aliases != '\0'; |
484 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) | 534 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) |
485 | if (strcmp (codeset, aliases) == 0 | 535 | if (strcmp (codeset, aliases) == 0 |
486 | || (aliases[0] == '*' && aliases[1] == '\0')) | 536 | || (aliases[0] == '*' && aliases[1] == '\0')) |
487 | { | 537 | { |
488 | codeset = aliases + strlen (aliases) + 1; | 538 | codeset = aliases + strlen (aliases) + 1; |
489 | break; | 539 | break; |
490 | } | 540 | } |
491 | 541 | ||
492 | /* Don't return an empty string. GNU libc and GNU libiconv interpret | 542 | /* Don't return an empty string. GNU libc and GNU libiconv interpret |