1 files changed, 266 insertions, 216 deletions
diff --git a/gl/localcharset.c b/gl/localcharset.c
index a7ca94c1..a04dc446 100644
--- a/gl/localcharset.c
+++ b/gl/localcharset.c
@@ -1,6 +1,6 @@
 /* Determine a canonical name for the current locale's character encoding.
-   Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc.
+   Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc.
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
 /* Specification.  */
 #include "localcharset.h"
+#include <fcntl.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
@@ -44,6 +45,7 @@
 #endif
 #if !defined WIN32_NATIVE
+# include <unistd.h>
 # if HAVE_LANGINFO_CODESET
 #  include <langinfo.h>
 # else
@@ -75,6 +77,11 @@
 # include "configmake.h"
 #endif
+/* Define O_NOFOLLOW to 0 on platforms where it does not exist.  */
+#ifndef O_NOFOLLOW
+# define O_NOFOLLOW 0
+#endif
 #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
  /* Win32, Cygwin, OS/2, DOS */
 # define ISSLASH(C) ((C) == '/' || (C) == '\\')
@@ -117,192 +124,219 @@ get_charset_aliases (void)
  if (cp == NULL)
    {
 #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
-      FILE *fp;
      const char *dir;
      const char *base = "charset.alias";
      char *file_name;
      /* Make it possible to override the charset.alias location.  This is
-         necessary for running the testsuite before "make install".  */
+         necessary for running the testsuite before "make install".  */
      dir = getenv ("CHARSETALIASDIR");
      if (dir == NULL || dir[0] == '\0')
-        dir = relocate (LIBDIR);
+        dir = relocate (LIBDIR);
      /* Concatenate dir and base into freshly allocated file_name.  */
      {
-        size_t dir_len = strlen (dir);
+        size_t dir_len = strlen (dir);
-        size_t base_len = strlen (base);
+        size_t base_len = strlen (base);
-        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
+        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
-        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
+        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
-        if (file_name != NULL)
+        if (file_name != NULL)
-          {
+          {
-            memcpy (file_name, dir, dir_len);
+            memcpy (file_name, dir, dir_len);
-            if (add_slash)
+            if (add_slash)
-              file_name[dir_len] = DIRECTORY_SEPARATOR;
+              file_name[dir_len] = DIRECTORY_SEPARATOR;
-            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
+            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
-          }
+          }
      }
-      if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
+      if (file_name == NULL)
-        /* Out of memory or file not found, treat it as empty.  */
+        /* Out of memory.  Treat the file as empty.  */
-        cp = "";
+        cp = "";
      else
-        {
+        {
-          /* Parse the file's contents.  */
+          int fd;
-          char *res_ptr = NULL;
-          size_t res_size = 0;
+          /* Open the file.  Reject symbolic links on platforms that support
+             O_NOFOLLOW.  This is a security feature.  Without it, an attacker
-          for (;;)
+             could retrieve parts of the contents (namely, the tail of the
-            {
+             first line that starts with "* ") of an arbitrary file by placing
-              int c;
+             a symbolic link to that file under the name "charset.alias" in
-              char buf1[50+1];
+             some writable directory and defining the environment variable
-              char buf2[50+1];
+             CHARSETALIASDIR to point to that directory.  */
-              size_t l1, l2;
+          fd = open (file_name,
-              char *old_res_ptr;
+                     O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
+          if (fd < 0)
-              c = getc (fp);
+            /* File not found.  Treat it as empty.  */
-              if (c == EOF)
+            cp = "";
-                break;
+          else
-              if (c == '\n' || c == ' ' || c == '\t')
+            {
-                continue;
+              FILE *fp;
-              if (c == '#')
-                {
+              fp = fdopen (fd, "r");
-                  /* Skip comment, to end of line.  */
+              if (fp == NULL)
-                  do
+                {
-                    c = getc (fp);
+                  /* Out of memory.  Treat the file as empty.  */
-                  while (!(c == EOF || c == '\n'));
+                  close (fd);
-                  if (c == EOF)
+                  cp = "";
-                    break;
+                }
-                  continue;
+              else
-                }
+                {
-              ungetc (c, fp);
+                  /* Parse the file's contents.  */
-              if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
+                  char *res_ptr = NULL;
-                break;
+                  size_t res_size = 0;
-              l1 = strlen (buf1);
-              l2 = strlen (buf2);
+                  for (;;)
-              old_res_ptr = res_ptr;
+                    {
-              if (res_size == 0)
+                      int c;
-                {
+                      char buf1[50+1];
-                  res_size = l1 + 1 + l2 + 1;
+                      char buf2[50+1];
-                  res_ptr = (char *) malloc (res_size + 1);
+                      size_t l1, l2;
-                }
+                      char *old_res_ptr;
-              else
-                {
+                      c = getc (fp);
-                  res_size += l1 + 1 + l2 + 1;
+                      if (c == EOF)
-                  res_ptr = (char *) realloc (res_ptr, res_size + 1);
+                        break;
-                }
+                      if (c == '\n' || c == ' ' || c == '\t')
-              if (res_ptr == NULL)
+                        continue;
-                {
+                      if (c == '#')
-                  /* Out of memory. */
+                        {
-                  res_size = 0;
+                          /* Skip comment, to end of line.  */
-                  if (old_res_ptr != NULL)
+                          do
-                    free (old_res_ptr);
+                            c = getc (fp);
-                  break;
+                          while (!(c == EOF || c == '\n'));
-                }
+                          if (c == EOF)
-              strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
+                            break;
-              strcpy (res_ptr + res_size - (l2 + 1), buf2);
+                          continue;
-            }
+                        }
-          fclose (fp);
+                      ungetc (c, fp);
-          if (res_size == 0)
+                      if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
-            cp = "";
+                        break;
-          else
+                      l1 = strlen (buf1);
-            {
+                      l2 = strlen (buf2);
-              *(res_ptr + res_size) = '\0';
+                      old_res_ptr = res_ptr;
-              cp = res_ptr;
+                      if (res_size == 0)
-            }
+                        {
-        }
+                          res_size = l1 + 1 + l2 + 1;
+                          res_ptr = (char *) malloc (res_size + 1);
-      if (file_name != NULL)
+                        }
-        free (file_name);
+                      else
+                        {
+                          res_size += l1 + 1 + l2 + 1;
+                          res_ptr = (char *) realloc (res_ptr, res_size + 1);
+                        }
+                      if (res_ptr == NULL)
+                        {
+                          /* Out of memory. */
+                          res_size = 0;
+                          if (old_res_ptr != NULL)
+                            free (old_res_ptr);
+                          break;
+                        }
+                      strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
+                      strcpy (res_ptr + res_size - (l2 + 1), buf2);
+                    }
+                  fclose (fp);
+                  if (res_size == 0)
+                    cp = "";
+                  else
+                    {
+                      *(res_ptr + res_size) = '\0';
+                      cp = res_ptr;
+                    }
+                }
+            }
+          free (file_name);
+        }
 #else
 # if defined DARWIN7
      /* To avoid the trouble of installing a file that is shared by many
-         GNU packages -- many packaging systems have problems with this --,
+         GNU packages -- many packaging systems have problems with this --,
-         simply inline the aliases here.  */
+         simply inline the aliases here.  */
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
-           "ISO8859-2" "\0" "ISO-8859-2" "\0"
+           "ISO8859-2" "\0" "ISO-8859-2" "\0"
-           "ISO8859-4" "\0" "ISO-8859-4" "\0"
+           "ISO8859-4" "\0" "ISO-8859-4" "\0"
-           "ISO8859-5" "\0" "ISO-8859-5" "\0"
+           "ISO8859-5" "\0" "ISO-8859-5" "\0"
-           "ISO8859-7" "\0" "ISO-8859-7" "\0"
+           "ISO8859-7" "\0" "ISO-8859-7" "\0"
-           "ISO8859-9" "\0" "ISO-8859-9" "\0"
+           "ISO8859-9" "\0" "ISO-8859-9" "\0"
-           "ISO8859-13" "\0" "ISO-8859-13" "\0"
+           "ISO8859-13" "\0" "ISO-8859-13" "\0"
-           "ISO8859-15" "\0" "ISO-8859-15" "\0"
+           "ISO8859-15" "\0" "ISO-8859-15" "\0"
-           "KOI8-R" "\0" "KOI8-R" "\0"
+           "KOI8-R" "\0" "KOI8-R" "\0"
-           "KOI8-U" "\0" "KOI8-U" "\0"
+           "KOI8-U" "\0" "KOI8-U" "\0"
-           "CP866" "\0" "CP866" "\0"
+           "CP866" "\0" "CP866" "\0"
-           "CP949" "\0" "CP949" "\0"
+           "CP949" "\0" "CP949" "\0"
-           "CP1131" "\0" "CP1131" "\0"
+           "CP1131" "\0" "CP1131" "\0"
-           "CP1251" "\0" "CP1251" "\0"
+           "CP1251" "\0" "CP1251" "\0"
-           "eucCN" "\0" "GB2312" "\0"
+           "eucCN" "\0" "GB2312" "\0"
-           "GB2312" "\0" "GB2312" "\0"
+           "GB2312" "\0" "GB2312" "\0"
-           "eucJP" "\0" "EUC-JP" "\0"
+           "eucJP" "\0" "EUC-JP" "\0"
-           "eucKR" "\0" "EUC-KR" "\0"
+           "eucKR" "\0" "EUC-KR" "\0"
-           "Big5" "\0" "BIG5" "\0"
+           "Big5" "\0" "BIG5" "\0"
-           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
+           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
-           "GBK" "\0" "GBK" "\0"
+           "GBK" "\0" "GBK" "\0"
-           "GB18030" "\0" "GB18030" "\0"
+           "GB18030" "\0" "GB18030" "\0"
-           "SJIS" "\0" "SHIFT_JIS" "\0"
+           "SJIS" "\0" "SHIFT_JIS" "\0"
-           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
+           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
-           "PT154" "\0" "PT154" "\0"
+           "PT154" "\0" "PT154" "\0"
-         /*"ISCII-DEV" "\0" "?" "\0"*/
+         /*"ISCII-DEV" "\0" "?" "\0"*/
-           "*" "\0" "UTF-8" "\0";
+           "*" "\0" "UTF-8" "\0";
 # endif
 # if defined VMS
      /* To avoid the troubles of an extra file charset.alias_vms in the
-         sources of many GNU packages, simply inline the aliases here.  */
+         sources of many GNU packages, simply inline the aliases here.  */
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
-         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
+         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
-         section 10.7 "Handling Different Character Sets".  */
+         section 10.7 "Handling Different Character Sets".  */
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
-           "ISO8859-2" "\0" "ISO-8859-2" "\0"
+           "ISO8859-2" "\0" "ISO-8859-2" "\0"
-           "ISO8859-5" "\0" "ISO-8859-5" "\0"
+           "ISO8859-5" "\0" "ISO-8859-5" "\0"
-           "ISO8859-7" "\0" "ISO-8859-7" "\0"
+           "ISO8859-7" "\0" "ISO-8859-7" "\0"
-           "ISO8859-8" "\0" "ISO-8859-8" "\0"
+           "ISO8859-8" "\0" "ISO-8859-8" "\0"
-           "ISO8859-9" "\0" "ISO-8859-9" "\0"
+           "ISO8859-9" "\0" "ISO-8859-9" "\0"
-           /* Japanese */
+           /* Japanese */
-           "eucJP" "\0" "EUC-JP" "\0"
+           "eucJP" "\0" "EUC-JP" "\0"
-           "SJIS" "\0" "SHIFT_JIS" "\0"
+           "SJIS" "\0" "SHIFT_JIS" "\0"
-           "DECKANJI" "\0" "DEC-KANJI" "\0"
+           "DECKANJI" "\0" "DEC-KANJI" "\0"
-           "SDECKANJI" "\0" "EUC-JP" "\0"
+           "SDECKANJI" "\0" "EUC-JP" "\0"
-           /* Chinese */
+           /* Chinese */
-           "eucTW" "\0" "EUC-TW" "\0"
+           "eucTW" "\0" "EUC-TW" "\0"
-           "DECHANYU" "\0" "DEC-HANYU" "\0"
+           "DECHANYU" "\0" "DEC-HANYU" "\0"
-           "DECHANZI" "\0" "GB2312" "\0"
+           "DECHANZI" "\0" "GB2312" "\0"
-           /* Korean */
+           /* Korean */
-           "DECKOREAN" "\0" "EUC-KR" "\0";
+           "DECKOREAN" "\0" "EUC-KR" "\0";
 # endif
 # if defined WIN32_NATIVE || defined __CYGWIN__
      /* To avoid the troubles of installing a separate file in the same
-         directory as the DLL and of retrieving the DLL's directory at
+         directory as the DLL and of retrieving the DLL's directory at
-         runtime, simply inline the aliases here.  */
+         runtime, simply inline the aliases here.  */
      cp = "CP936" "\0" "GBK" "\0"
-           "CP1361" "\0" "JOHAB" "\0"
+           "CP1361" "\0" "JOHAB" "\0"
-           "CP20127" "\0" "ASCII" "\0"
+           "CP20127" "\0" "ASCII" "\0"
-           "CP20866" "\0" "KOI8-R" "\0"
+           "CP20866" "\0" "KOI8-R" "\0"
-           "CP20936" "\0" "GB2312" "\0"
+           "CP20936" "\0" "GB2312" "\0"
-           "CP21866" "\0" "KOI8-RU" "\0"
+           "CP21866" "\0" "KOI8-RU" "\0"
-           "CP28591" "\0" "ISO-8859-1" "\0"
+           "CP28591" "\0" "ISO-8859-1" "\0"
-           "CP28592" "\0" "ISO-8859-2" "\0"
+           "CP28592" "\0" "ISO-8859-2" "\0"
-           "CP28593" "\0" "ISO-8859-3" "\0"
+           "CP28593" "\0" "ISO-8859-3" "\0"
-           "CP28594" "\0" "ISO-8859-4" "\0"
+           "CP28594" "\0" "ISO-8859-4" "\0"
-           "CP28595" "\0" "ISO-8859-5" "\0"
+           "CP28595" "\0" "ISO-8859-5" "\0"
-           "CP28596" "\0" "ISO-8859-6" "\0"
+           "CP28596" "\0" "ISO-8859-6" "\0"
-           "CP28597" "\0" "ISO-8859-7" "\0"
+           "CP28597" "\0" "ISO-8859-7" "\0"
-           "CP28598" "\0" "ISO-8859-8" "\0"
+           "CP28598" "\0" "ISO-8859-8" "\0"
-           "CP28599" "\0" "ISO-8859-9" "\0"
+           "CP28599" "\0" "ISO-8859-9" "\0"
-           "CP28605" "\0" "ISO-8859-15" "\0"
+           "CP28605" "\0" "ISO-8859-15" "\0"
-           "CP38598" "\0" "ISO-8859-8" "\0"
+           "CP38598" "\0" "ISO-8859-8" "\0"
-           "CP51932" "\0" "EUC-JP" "\0"
+           "CP51932" "\0" "EUC-JP" "\0"
-           "CP51936" "\0" "GB2312" "\0"
+           "CP51936" "\0" "GB2312" "\0"
-           "CP51949" "\0" "EUC-KR" "\0"
+           "CP51949" "\0" "EUC-KR" "\0"
-           "CP51950" "\0" "EUC-TW" "\0"
+           "CP51950" "\0" "EUC-TW" "\0"
-           "CP54936" "\0" "GB18030" "\0"
+           "CP54936" "\0" "GB18030" "\0"
-           "CP65001" "\0" "UTF-8" "\0";
+           "CP65001" "\0" "UTF-8" "\0";
 # endif
 #endif
@@ -335,7 +369,7 @@ locale_charset (void)
  codeset = nl_langinfo (CODESET);
 #  ifdef __CYGWIN__
-  /* Cygwin 2006 does not have locales.  nl_langinfo (CODESET) always
+  /* Cygwin 1.5.x does not have locales.  nl_langinfo (CODESET) always
     returns "US-ASCII".  As long as this is not fixed, return the suffix
     of the locale name from the environment variables (if present) or
     the codepage as a number.  */
@@ -346,36 +380,46 @@ locale_charset (void)
      locale = getenv ("LC_ALL");
      if (locale == NULL || locale[0] == '\0')
-        {
+        {
-          locale = getenv ("LC_CTYPE");
+          locale = getenv ("LC_CTYPE");
-          if (locale == NULL || locale[0] == '\0')
+          if (locale == NULL || locale[0] == '\0')
-            locale = getenv ("LANG");
+            locale = getenv ("LANG");
-        }
+        }
      if (locale != NULL && locale[0] != '\0')
-        {
+        {
-          /* If the locale name contains an encoding after the dot, return
+          /* If the locale name contains an encoding after the dot, return
-             it.  */
+             it.  */
-          const char *dot = strchr (locale, '.');
+          const char *dot = strchr (locale, '.');
-          if (dot != NULL)
+          if (dot != NULL)
-            {
+            {
-              const char *modifier;
+              const char *modifier;
-              dot++;
+              dot++;
-              /* Look for the possible @... trailer and remove it, if any.  */
+              /* Look for the possible @... trailer and remove it, if any.  */
-              modifier = strchr (dot, '@');
+              modifier = strchr (dot, '@');
-              if (modifier == NULL)
+              if (modifier == NULL)
-                return dot;
+                return dot;
-              if (modifier - dot < sizeof (buf))
+              if (modifier - dot < sizeof (buf))
-                {
+                {
-                  memcpy (buf, dot, modifier - dot);
+                  memcpy (buf, dot, modifier - dot);
-                  buf [modifier - dot] = '\0';
+                  buf [modifier - dot] = '\0';
-                  return buf;
+                  return buf;
-                }
+                }
-            }
+            }
-        }
+        }
-      /* Woe32 has a function returning the locale's codepage as a number.  */
+      /* Woe32 has a function returning the locale's codepage as a number:
+         GetACP().  This encoding is used by Cygwin, unless the user has set
+         the environment variable CYGWIN=codepage:oem (which very few people
+         do).
+         Output directed to console windows needs to be converted (to
+         GetOEMCP() if the console is using a raster font, or to
+         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
+         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
+         converting to GetConsoleOutputCP().  This leads to correct results,
+         except when SetConsoleOutputCP has been called and a raster font is
+         in use.  */
      sprintf (buf, "CP%u", GetACP ());
      codeset = buf;
    }
@@ -397,11 +441,11 @@ locale_charset (void)
    {
      locale = getenv ("LC_ALL");
      if (locale == NULL || locale[0] == '\0')
-        {
+        {
-          locale = getenv ("LC_CTYPE");
+          locale = getenv ("LC_CTYPE");
-          if (locale == NULL || locale[0] == '\0')
+          if (locale == NULL || locale[0] == '\0')
-            locale = getenv ("LANG");
+            locale = getenv ("LANG");
-        }
+        }
    }
  /* On some old systems, one used to set locale = "iso8859_1". On others,
@@ -415,7 +459,13 @@ locale_charset (void)
  static char buf[2 + 10 + 1];
-  /* Woe32 has a function returning the locale's codepage as a number.  */
+  /* Woe32 has a function returning the locale's codepage as a number:
+     GetACP().
+     When the output goes to a console window, it needs to be provided in
+     GetOEMCP() encoding if the console is using a raster font, or in
+     GetConsoleOutputCP() encoding if it is using a TrueType font.
+     But in GUI programs and for output sent to files and pipes, GetACP()
+     encoding is the best bet.  */
  sprintf (buf, "CP%u", GetACP ());
  codeset = buf;
@@ -433,7 +483,7 @@ locale_charset (void)
    {
      locale = getenv ("LC_CTYPE");
      if (locale == NULL || locale[0] == '\0')
-        locale = getenv ("LANG");
+        locale = getenv ("LANG");
    }
  if (locale != NULL && locale[0] != '\0')
    {
@@ -441,21 +491,21 @@ locale_charset (void)
      const char *dot = strchr (locale, '.');
      if (dot != NULL)
-        {
+        {
-          const char *modifier;
+          const char *modifier;
-          dot++;
+          dot++;
-          /* Look for the possible @... trailer and remove it, if any.  */
+          /* Look for the possible @... trailer and remove it, if any.  */
-          modifier = strchr (dot, '@');
+          modifier = strchr (dot, '@');
-          if (modifier == NULL)
+          if (modifier == NULL)
-            return dot;
+            return dot;
-          if (modifier - dot < sizeof (buf))
+          if (modifier - dot < sizeof (buf))
-            {
+            {
-              memcpy (buf, dot, modifier - dot);
+              memcpy (buf, dot, modifier - dot);
-              buf [modifier - dot] = '\0';
+              buf [modifier - dot] = '\0';
-              return buf;
+              return buf;
-            }
+            }
-        }
+        }
      /* Resolve through the charset.alias file.  */
      codeset = locale;
@@ -464,12 +514,12 @@ locale_charset (void)
    {
      /* OS/2 has a function returning the locale's codepage as a number.  */
      if (DosQueryCp (sizeof (cp), cp, &cplen))
-        codeset = "";
+        codeset = "";
      else
-        {
+        {
-          sprintf (buf, "CP%u", cp[0]);
+          sprintf (buf, "CP%u", cp[0]);
-          codeset = buf;
+          codeset = buf;
-        }
+        }
    }
 #endif
@@ -483,10 +533,10 @@ locale_charset (void)
       *aliases != '\0';
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
    if (strcmp (codeset, aliases) == 0
-        || (aliases[0] == '*' && aliases[1] == '\0'))
+        || (aliases[0] == '*' && aliases[1] == '\0'))
      {
-        codeset = aliases + strlen (aliases) + 1;
+        codeset = aliases + strlen (aliases) + 1;
-        break;
+        break;
      }
  /* Don't return an empty string.  GNU libc and GNU libiconv interpret

diff --git a/gl/localcharset.c b/gl/localcharset.c index a7ca94c1..a04dc446 100644 --- a/gl/localcharset.c +++ b/gl/localcharset.c
@@ -1,6 +1,6 @@
1	/* Determine a canonical name for the current locale's character encoding.	1	/* Determine a canonical name for the current locale's character encoding.
2		2
3	Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc.	3	Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc.
4		4
5	This program is free software; you can redistribute it and/or modify	5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by	6	it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
23	/* Specification. */	23	/* Specification. */
24	#include "localcharset.h"	24	#include "localcharset.h"
25		25
		26	#include <fcntl.h>
26	#include <stddef.h>	27	#include <stddef.h>
27	#include <stdio.h>	28	#include <stdio.h>
28	#include <string.h>	29	#include <string.h>
@@ -44,6 +45,7 @@
44	#endif	45	#endif
45		46
46	#if !defined WIN32_NATIVE	47	#if !defined WIN32_NATIVE
		48	# include <unistd.h>
47	# if HAVE_LANGINFO_CODESET	49	# if HAVE_LANGINFO_CODESET
48	# include <langinfo.h>	50	# include <langinfo.h>
49	# else	51	# else
@@ -75,6 +77,11 @@
75	# include "configmake.h"	77	# include "configmake.h"
76	#endif	78	#endif
77		79
		80	/* Define O_NOFOLLOW to 0 on platforms where it does not exist. */
		81	#ifndef O_NOFOLLOW
		82	# define O_NOFOLLOW 0
		83	#endif
		84
78	#if defined _WIN32 \|\| defined __WIN32__ \|\| defined __CYGWIN__ \|\| defined __EMX__ \|\| defined __DJGPP__	85	#if defined _WIN32 \|\| defined __WIN32__ \|\| defined __CYGWIN__ \|\| defined __EMX__ \|\| defined __DJGPP__
79	/* Win32, Cygwin, OS/2, DOS */	86	/* Win32, Cygwin, OS/2, DOS */
80	# define ISSLASH(C) ((C) == '/' \|\| (C) == '\\')	87	# define ISSLASH(C) ((C) == '/' \|\| (C) == '\\')
@@ -117,192 +124,219 @@ get_charset_aliases (void)
117	if (cp == NULL)	124	if (cp == NULL)
118	{	125	{
119	#if !(defined DARWIN7 \|\| defined VMS \|\| defined WIN32_NATIVE \|\| defined __CYGWIN__)	126	#if !(defined DARWIN7 \|\| defined VMS \|\| defined WIN32_NATIVE \|\| defined __CYGWIN__)
120	FILE *fp;
121	const char *dir;	127	const char *dir;
122	const char *base = "charset.alias";	128	const char *base = "charset.alias";
123	char *file_name;	129	char *file_name;
124		130
125	/* Make it possible to override the charset.alias location. This is	131	/* Make it possible to override the charset.alias location. This is
126	necessary for running the testsuite before "make install". */	132	necessary for running the testsuite before "make install". */
127	dir = getenv ("CHARSETALIASDIR");	133	dir = getenv ("CHARSETALIASDIR");
128	if (dir == NULL \|\| dir[0] == '\0')	134	if (dir == NULL \|\| dir[0] == '\0')
129	dir = relocate (LIBDIR);	135	dir = relocate (LIBDIR);
130		136
131	/* Concatenate dir and base into freshly allocated file_name. */	137	/* Concatenate dir and base into freshly allocated file_name. */
132	{	138	{
133	size_t dir_len = strlen (dir);	139	size_t dir_len = strlen (dir);
134	size_t base_len = strlen (base);	140	size_t base_len = strlen (base);
135	int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));	141	int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
136	file_name = (char *) malloc (dir_len + add_slash + base_len + 1);	142	file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
137	if (file_name != NULL)	143	if (file_name != NULL)
138	{	144	{
139	memcpy (file_name, dir, dir_len);	145	memcpy (file_name, dir, dir_len);
140	if (add_slash)	146	if (add_slash)
141	file_name[dir_len] = DIRECTORY_SEPARATOR;	147	file_name[dir_len] = DIRECTORY_SEPARATOR;
142	memcpy (file_name + dir_len + add_slash, base, base_len + 1);	148	memcpy (file_name + dir_len + add_slash, base, base_len + 1);
143	}	149	}
144	}	150	}
145		151
146	if (file_name == NULL \|\| (fp = fopen (file_name, "r")) == NULL)	152	if (file_name == NULL)
147	/* Out of memory or file not found, treat it as empty. */	153	/* Out of memory. Treat the file as empty. */
148	cp = "";	154	cp = "";
149	else	155	else
150	{	156	{
151	/* Parse the file's contents. */	157	int fd;
152	char *res_ptr = NULL;	158
153	size_t res_size = 0;	159	/* Open the file. Reject symbolic links on platforms that support
154		160	O_NOFOLLOW. This is a security feature. Without it, an attacker
155	for (;;)	161	could retrieve parts of the contents (namely, the tail of the
156	{	162	first line that starts with "* ") of an arbitrary file by placing
157	int c;	163	a symbolic link to that file under the name "charset.alias" in
158	char buf1[50+1];	164	some writable directory and defining the environment variable
159	char buf2[50+1];	165	CHARSETALIASDIR to point to that directory. */
160	size_t l1, l2;	166	fd = open (file_name,
161	char *old_res_ptr;	167	O_RDONLY \| (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
162		168	if (fd < 0)
163	c = getc (fp);	169	/* File not found. Treat it as empty. */
164	if (c == EOF)	170	cp = "";
165	break;	171	else
166	if (c == '\n' \|\| c == ' ' \|\| c == '\t')	172	{
167	continue;	173	FILE *fp;
168	if (c == '#')	174
169	{	175	fp = fdopen (fd, "r");
170	/* Skip comment, to end of line. */	176	if (fp == NULL)
171	do	177	{
172	c = getc (fp);	178	/* Out of memory. Treat the file as empty. */
173	while (!(c == EOF \|\| c == '\n'));	179	close (fd);
174	if (c == EOF)	180	cp = "";
175	break;	181	}
176	continue;	182	else
177	}	183	{
178	ungetc (c, fp);	184	/* Parse the file's contents. */
179	if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)	185	char *res_ptr = NULL;
180	break;	186	size_t res_size = 0;
181	l1 = strlen (buf1);	187
182	l2 = strlen (buf2);	188	for (;;)
183	old_res_ptr = res_ptr;	189	{
184	if (res_size == 0)	190	int c;
185	{	191	char buf1[50+1];
186	res_size = l1 + 1 + l2 + 1;	192	char buf2[50+1];
187	res_ptr = (char *) malloc (res_size + 1);	193	size_t l1, l2;
188	}	194	char *old_res_ptr;
189	else	195
190	{	196	c = getc (fp);
191	res_size += l1 + 1 + l2 + 1;	197	if (c == EOF)
192	res_ptr = (char *) realloc (res_ptr, res_size + 1);	198	break;
193	}	199	if (c == '\n' \|\| c == ' ' \|\| c == '\t')
194	if (res_ptr == NULL)	200	continue;
195	{	201	if (c == '#')
196	/* Out of memory. */	202	{
197	res_size = 0;	203	/* Skip comment, to end of line. */
198	if (old_res_ptr != NULL)	204	do
199	free (old_res_ptr);	205	c = getc (fp);
200	break;	206	while (!(c == EOF \|\| c == '\n'));
201	}	207	if (c == EOF)
202	strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);	208	break;
203	strcpy (res_ptr + res_size - (l2 + 1), buf2);	209	continue;
204	}	210	}
205	fclose (fp);	211	ungetc (c, fp);
206	if (res_size == 0)	212	if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
207	cp = "";	213	break;
208	else	214	l1 = strlen (buf1);
209	{	215	l2 = strlen (buf2);
210	*(res_ptr + res_size) = '\0';	216	old_res_ptr = res_ptr;
211	cp = res_ptr;	217	if (res_size == 0)
212	}	218	{
213	}	219	res_size = l1 + 1 + l2 + 1;
214		220	res_ptr = (char *) malloc (res_size + 1);
215	if (file_name != NULL)	221	}
216	free (file_name);	222	else
		223	{
		224	res_size += l1 + 1 + l2 + 1;
		225	res_ptr = (char *) realloc (res_ptr, res_size + 1);
		226	}
		227	if (res_ptr == NULL)
		228	{
		229	/* Out of memory. */
		230	res_size = 0;
		231	if (old_res_ptr != NULL)
		232	free (old_res_ptr);
		233	break;
		234	}
		235	strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
		236	strcpy (res_ptr + res_size - (l2 + 1), buf2);
		237	}
		238	fclose (fp);
		239	if (res_size == 0)
		240	cp = "";
		241	else
		242	{
		243	*(res_ptr + res_size) = '\0';
		244	cp = res_ptr;
		245	}
		246	}
		247	}
		248
		249	free (file_name);
		250	}
217		251
218	#else	252	#else
219		253
220	# if defined DARWIN7	254	# if defined DARWIN7
221	/* To avoid the trouble of installing a file that is shared by many	255	/* To avoid the trouble of installing a file that is shared by many
222	GNU packages -- many packaging systems have problems with this --,	256	GNU packages -- many packaging systems have problems with this --,
223	simply inline the aliases here. */	257	simply inline the aliases here. */
224	cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"	258	cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
225	"ISO8859-2" "\0" "ISO-8859-2" "\0"	259	"ISO8859-2" "\0" "ISO-8859-2" "\0"
226	"ISO8859-4" "\0" "ISO-8859-4" "\0"	260	"ISO8859-4" "\0" "ISO-8859-4" "\0"
227	"ISO8859-5" "\0" "ISO-8859-5" "\0"	261	"ISO8859-5" "\0" "ISO-8859-5" "\0"
228	"ISO8859-7" "\0" "ISO-8859-7" "\0"	262	"ISO8859-7" "\0" "ISO-8859-7" "\0"
229	"ISO8859-9" "\0" "ISO-8859-9" "\0"	263	"ISO8859-9" "\0" "ISO-8859-9" "\0"
230	"ISO8859-13" "\0" "ISO-8859-13" "\0"	264	"ISO8859-13" "\0" "ISO-8859-13" "\0"
231	"ISO8859-15" "\0" "ISO-8859-15" "\0"	265	"ISO8859-15" "\0" "ISO-8859-15" "\0"
232	"KOI8-R" "\0" "KOI8-R" "\0"	266	"KOI8-R" "\0" "KOI8-R" "\0"
233	"KOI8-U" "\0" "KOI8-U" "\0"	267	"KOI8-U" "\0" "KOI8-U" "\0"
234	"CP866" "\0" "CP866" "\0"	268	"CP866" "\0" "CP866" "\0"
235	"CP949" "\0" "CP949" "\0"	269	"CP949" "\0" "CP949" "\0"
236	"CP1131" "\0" "CP1131" "\0"	270	"CP1131" "\0" "CP1131" "\0"
237	"CP1251" "\0" "CP1251" "\0"	271	"CP1251" "\0" "CP1251" "\0"
238	"eucCN" "\0" "GB2312" "\0"	272	"eucCN" "\0" "GB2312" "\0"
239	"GB2312" "\0" "GB2312" "\0"	273	"GB2312" "\0" "GB2312" "\0"
240	"eucJP" "\0" "EUC-JP" "\0"	274	"eucJP" "\0" "EUC-JP" "\0"
241	"eucKR" "\0" "EUC-KR" "\0"	275	"eucKR" "\0" "EUC-KR" "\0"
242	"Big5" "\0" "BIG5" "\0"	276	"Big5" "\0" "BIG5" "\0"
243	"Big5HKSCS" "\0" "BIG5-HKSCS" "\0"	277	"Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
244	"GBK" "\0" "GBK" "\0"	278	"GBK" "\0" "GBK" "\0"
245	"GB18030" "\0" "GB18030" "\0"	279	"GB18030" "\0" "GB18030" "\0"
246	"SJIS" "\0" "SHIFT_JIS" "\0"	280	"SJIS" "\0" "SHIFT_JIS" "\0"
247	"ARMSCII-8" "\0" "ARMSCII-8" "\0"	281	"ARMSCII-8" "\0" "ARMSCII-8" "\0"
248	"PT154" "\0" "PT154" "\0"	282	"PT154" "\0" "PT154" "\0"
249	/"ISCII-DEV" "\0" "?" "\0"/	283	/"ISCII-DEV" "\0" "?" "\0"/
250	"*" "\0" "UTF-8" "\0";	284	"*" "\0" "UTF-8" "\0";
251	# endif	285	# endif
252		286
253	# if defined VMS	287	# if defined VMS
254	/* To avoid the troubles of an extra file charset.alias_vms in the	288	/* To avoid the troubles of an extra file charset.alias_vms in the
255	sources of many GNU packages, simply inline the aliases here. */	289	sources of many GNU packages, simply inline the aliases here. */
256	/* The list of encodings is taken from the OpenVMS 7.3-1 documentation	290	/* The list of encodings is taken from the OpenVMS 7.3-1 documentation
257	"Compaq C Run-Time Library Reference Manual for OpenVMS systems"	291	"Compaq C Run-Time Library Reference Manual for OpenVMS systems"
258	section 10.7 "Handling Different Character Sets". */	292	section 10.7 "Handling Different Character Sets". */
259	cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"	293	cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
260	"ISO8859-2" "\0" "ISO-8859-2" "\0"	294	"ISO8859-2" "\0" "ISO-8859-2" "\0"
261	"ISO8859-5" "\0" "ISO-8859-5" "\0"	295	"ISO8859-5" "\0" "ISO-8859-5" "\0"
262	"ISO8859-7" "\0" "ISO-8859-7" "\0"	296	"ISO8859-7" "\0" "ISO-8859-7" "\0"
263	"ISO8859-8" "\0" "ISO-8859-8" "\0"	297	"ISO8859-8" "\0" "ISO-8859-8" "\0"
264	"ISO8859-9" "\0" "ISO-8859-9" "\0"	298	"ISO8859-9" "\0" "ISO-8859-9" "\0"
265	/* Japanese */	299	/* Japanese */
266	"eucJP" "\0" "EUC-JP" "\0"	300	"eucJP" "\0" "EUC-JP" "\0"
267	"SJIS" "\0" "SHIFT_JIS" "\0"	301	"SJIS" "\0" "SHIFT_JIS" "\0"
268	"DECKANJI" "\0" "DEC-KANJI" "\0"	302	"DECKANJI" "\0" "DEC-KANJI" "\0"
269	"SDECKANJI" "\0" "EUC-JP" "\0"	303	"SDECKANJI" "\0" "EUC-JP" "\0"
270	/* Chinese */	304	/* Chinese */
271	"eucTW" "\0" "EUC-TW" "\0"	305	"eucTW" "\0" "EUC-TW" "\0"
272	"DECHANYU" "\0" "DEC-HANYU" "\0"	306	"DECHANYU" "\0" "DEC-HANYU" "\0"
273	"DECHANZI" "\0" "GB2312" "\0"	307	"DECHANZI" "\0" "GB2312" "\0"
274	/* Korean */	308	/* Korean */
275	"DECKOREAN" "\0" "EUC-KR" "\0";	309	"DECKOREAN" "\0" "EUC-KR" "\0";
276	# endif	310	# endif
277		311
278	# if defined WIN32_NATIVE \|\| defined __CYGWIN__	312	# if defined WIN32_NATIVE \|\| defined __CYGWIN__
279	/* To avoid the troubles of installing a separate file in the same	313	/* To avoid the troubles of installing a separate file in the same
280	directory as the DLL and of retrieving the DLL's directory at	314	directory as the DLL and of retrieving the DLL's directory at
281	runtime, simply inline the aliases here. */	315	runtime, simply inline the aliases here. */
282		316
283	cp = "CP936" "\0" "GBK" "\0"	317	cp = "CP936" "\0" "GBK" "\0"
284	"CP1361" "\0" "JOHAB" "\0"	318	"CP1361" "\0" "JOHAB" "\0"
285	"CP20127" "\0" "ASCII" "\0"	319	"CP20127" "\0" "ASCII" "\0"
286	"CP20866" "\0" "KOI8-R" "\0"	320	"CP20866" "\0" "KOI8-R" "\0"
287	"CP20936" "\0" "GB2312" "\0"	321	"CP20936" "\0" "GB2312" "\0"
288	"CP21866" "\0" "KOI8-RU" "\0"	322	"CP21866" "\0" "KOI8-RU" "\0"
289	"CP28591" "\0" "ISO-8859-1" "\0"	323	"CP28591" "\0" "ISO-8859-1" "\0"
290	"CP28592" "\0" "ISO-8859-2" "\0"	324	"CP28592" "\0" "ISO-8859-2" "\0"
291	"CP28593" "\0" "ISO-8859-3" "\0"	325	"CP28593" "\0" "ISO-8859-3" "\0"
292	"CP28594" "\0" "ISO-8859-4" "\0"	326	"CP28594" "\0" "ISO-8859-4" "\0"
293	"CP28595" "\0" "ISO-8859-5" "\0"	327	"CP28595" "\0" "ISO-8859-5" "\0"
294	"CP28596" "\0" "ISO-8859-6" "\0"	328	"CP28596" "\0" "ISO-8859-6" "\0"
295	"CP28597" "\0" "ISO-8859-7" "\0"	329	"CP28597" "\0" "ISO-8859-7" "\0"
296	"CP28598" "\0" "ISO-8859-8" "\0"	330	"CP28598" "\0" "ISO-8859-8" "\0"
297	"CP28599" "\0" "ISO-8859-9" "\0"	331	"CP28599" "\0" "ISO-8859-9" "\0"
298	"CP28605" "\0" "ISO-8859-15" "\0"	332	"CP28605" "\0" "ISO-8859-15" "\0"
299	"CP38598" "\0" "ISO-8859-8" "\0"	333	"CP38598" "\0" "ISO-8859-8" "\0"
300	"CP51932" "\0" "EUC-JP" "\0"	334	"CP51932" "\0" "EUC-JP" "\0"
301	"CP51936" "\0" "GB2312" "\0"	335	"CP51936" "\0" "GB2312" "\0"
302	"CP51949" "\0" "EUC-KR" "\0"	336	"CP51949" "\0" "EUC-KR" "\0"
303	"CP51950" "\0" "EUC-TW" "\0"	337	"CP51950" "\0" "EUC-TW" "\0"
304	"CP54936" "\0" "GB18030" "\0"	338	"CP54936" "\0" "GB18030" "\0"
305	"CP65001" "\0" "UTF-8" "\0";	339	"CP65001" "\0" "UTF-8" "\0";
306	# endif	340	# endif
307	#endif	341	#endif
308		342
@@ -335,7 +369,7 @@ locale_charset (void)
335	codeset = nl_langinfo (CODESET);	369	codeset = nl_langinfo (CODESET);
336		370
337	# ifdef __CYGWIN__	371	# ifdef __CYGWIN__
338	/* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always	372	/* Cygwin 1.5.x does not have locales. nl_langinfo (CODESET) always
339	returns "US-ASCII". As long as this is not fixed, return the suffix	373	returns "US-ASCII". As long as this is not fixed, return the suffix
340	of the locale name from the environment variables (if present) or	374	of the locale name from the environment variables (if present) or
341	the codepage as a number. */	375	the codepage as a number. */
@@ -346,36 +380,46 @@ locale_charset (void)
346		380
347	locale = getenv ("LC_ALL");	381	locale = getenv ("LC_ALL");
348	if (locale == NULL \|\| locale[0] == '\0')	382	if (locale == NULL \|\| locale[0] == '\0')
349	{	383	{
350	locale = getenv ("LC_CTYPE");	384	locale = getenv ("LC_CTYPE");
351	if (locale == NULL \|\| locale[0] == '\0')	385	if (locale == NULL \|\| locale[0] == '\0')
352	locale = getenv ("LANG");	386	locale = getenv ("LANG");
353	}	387	}
354	if (locale != NULL && locale[0] != '\0')	388	if (locale != NULL && locale[0] != '\0')
355	{	389	{
356	/* If the locale name contains an encoding after the dot, return	390	/* If the locale name contains an encoding after the dot, return
357	it. */	391	it. */
358	const char *dot = strchr (locale, '.');	392	const char *dot = strchr (locale, '.');
359		393
360	if (dot != NULL)	394	if (dot != NULL)
361	{	395	{
362	const char *modifier;	396	const char *modifier;
363		397
364	dot++;	398	dot++;
365	/* Look for the possible @... trailer and remove it, if any. */	399	/* Look for the possible @... trailer and remove it, if any. */
366	modifier = strchr (dot, '@');	400	modifier = strchr (dot, '@');
367	if (modifier == NULL)	401	if (modifier == NULL)
368	return dot;	402	return dot;
369	if (modifier - dot < sizeof (buf))	403	if (modifier - dot < sizeof (buf))
370	{	404	{
371	memcpy (buf, dot, modifier - dot);	405	memcpy (buf, dot, modifier - dot);
372	buf [modifier - dot] = '\0';	406	buf [modifier - dot] = '\0';
373	return buf;	407	return buf;
374	}	408	}
375	}	409	}
376	}	410	}
377		411
378	/* Woe32 has a function returning the locale's codepage as a number. */	412	/* Woe32 has a function returning the locale's codepage as a number:
		413	GetACP(). This encoding is used by Cygwin, unless the user has set
		414	the environment variable CYGWIN=codepage:oem (which very few people
		415	do).
		416	Output directed to console windows needs to be converted (to
		417	GetOEMCP() if the console is using a raster font, or to
		418	GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
		419	this conversion transparently (see winsup/cygwin/fhandler_console.cc),
		420	converting to GetConsoleOutputCP(). This leads to correct results,
		421	except when SetConsoleOutputCP has been called and a raster font is
		422	in use. */
379	sprintf (buf, "CP%u", GetACP ());	423	sprintf (buf, "CP%u", GetACP ());
380	codeset = buf;	424	codeset = buf;
381	}	425	}
@@ -397,11 +441,11 @@ locale_charset (void)
397	{	441	{
398	locale = getenv ("LC_ALL");	442	locale = getenv ("LC_ALL");
399	if (locale == NULL \|\| locale[0] == '\0')	443	if (locale == NULL \|\| locale[0] == '\0')
400	{	444	{
401	locale = getenv ("LC_CTYPE");	445	locale = getenv ("LC_CTYPE");
402	if (locale == NULL \|\| locale[0] == '\0')	446	if (locale == NULL \|\| locale[0] == '\0')
403	locale = getenv ("LANG");	447	locale = getenv ("LANG");
404	}	448	}
405	}	449	}
406		450
407	/* On some old systems, one used to set locale = "iso8859_1". On others,	451	/* On some old systems, one used to set locale = "iso8859_1". On others,
@@ -415,7 +459,13 @@ locale_charset (void)
415		459
416	static char buf[2 + 10 + 1];	460	static char buf[2 + 10 + 1];
417		461
418	/* Woe32 has a function returning the locale's codepage as a number. */	462	/* Woe32 has a function returning the locale's codepage as a number:
		463	GetACP().
		464	When the output goes to a console window, it needs to be provided in
		465	GetOEMCP() encoding if the console is using a raster font, or in
		466	GetConsoleOutputCP() encoding if it is using a TrueType font.
		467	But in GUI programs and for output sent to files and pipes, GetACP()
		468	encoding is the best bet. */
419	sprintf (buf, "CP%u", GetACP ());	469	sprintf (buf, "CP%u", GetACP ());
420	codeset = buf;	470	codeset = buf;
421		471
@@ -433,7 +483,7 @@ locale_charset (void)
433	{	483	{
434	locale = getenv ("LC_CTYPE");	484	locale = getenv ("LC_CTYPE");
435	if (locale == NULL \|\| locale[0] == '\0')	485	if (locale == NULL \|\| locale[0] == '\0')
436	locale = getenv ("LANG");	486	locale = getenv ("LANG");
437	}	487	}
438	if (locale != NULL && locale[0] != '\0')	488	if (locale != NULL && locale[0] != '\0')
439	{	489	{
@@ -441,21 +491,21 @@ locale_charset (void)
441	const char *dot = strchr (locale, '.');	491	const char *dot = strchr (locale, '.');
442		492
443	if (dot != NULL)	493	if (dot != NULL)
444	{	494	{
445	const char *modifier;	495	const char *modifier;
446		496
447	dot++;	497	dot++;
448	/* Look for the possible @... trailer and remove it, if any. */	498	/* Look for the possible @... trailer and remove it, if any. */
449	modifier = strchr (dot, '@');	499	modifier = strchr (dot, '@');
450	if (modifier == NULL)	500	if (modifier == NULL)
451	return dot;	501	return dot;
452	if (modifier - dot < sizeof (buf))	502	if (modifier - dot < sizeof (buf))
453	{	503	{
454	memcpy (buf, dot, modifier - dot);	504	memcpy (buf, dot, modifier - dot);
455	buf [modifier - dot] = '\0';	505	buf [modifier - dot] = '\0';
456	return buf;	506	return buf;
457	}	507	}
458	}	508	}
459		509
460	/* Resolve through the charset.alias file. */	510	/* Resolve through the charset.alias file. */
461	codeset = locale;	511	codeset = locale;
@@ -464,12 +514,12 @@ locale_charset (void)
464	{	514	{
465	/* OS/2 has a function returning the locale's codepage as a number. */	515	/* OS/2 has a function returning the locale's codepage as a number. */
466	if (DosQueryCp (sizeof (cp), cp, &cplen))	516	if (DosQueryCp (sizeof (cp), cp, &cplen))
467	codeset = "";	517	codeset = "";
468	else	518	else
469	{	519	{
470	sprintf (buf, "CP%u", cp[0]);	520	sprintf (buf, "CP%u", cp[0]);
471	codeset = buf;	521	codeset = buf;
472	}	522	}
473	}	523	}
474		524
475	#endif	525	#endif
@@ -483,10 +533,10 @@ locale_charset (void)
483	*aliases != '\0';	533	*aliases != '\0';
484	aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)	534	aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
485	if (strcmp (codeset, aliases) == 0	535	if (strcmp (codeset, aliases) == 0
486	\|\| (aliases[0] == '*' && aliases[1] == '\0'))	536	\|\| (aliases[0] == '*' && aliases[1] == '\0'))
487	{	537	{
488	codeset = aliases + strlen (aliases) + 1;	538	codeset = aliases + strlen (aliases) + 1;
489	break;	539	break;
490	}	540	}
491		541
492	/* Don't return an empty string. GNU libc and GNU libiconv interpret	542	/* Don't return an empty string. GNU libc and GNU libiconv interpret