summaryrefslogtreecommitdiffstats
path: root/gl/regex_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regex_internal.h')
-rw-r--r--gl/regex_internal.h194
1 files changed, 115 insertions, 79 deletions
diff --git a/gl/regex_internal.h b/gl/regex_internal.h
index 5aa5aa28..a2b8f16f 100644
--- a/gl/regex_internal.h
+++ b/gl/regex_internal.h
@@ -1,50 +1,81 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free 2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 Software Foundation, Inc.
4 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 5
7 This program is free software; you can redistribute it and/or modify 6 The GNU C Library is free software; you can redistribute it and/or
8 it under the terms of the GNU General Public License as published by 7 modify it under the terms of the GNU General Public
9 the Free Software Foundation; either version 3, or (at your option) 8 License as published by the Free Software Foundation; either
10 any later version. 9 version 3 of the License, or (at your option) any later version.
11 10
12 This program is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 GNU General Public License for more details. 14 General Public License for more details.
16 15
17 You should have received a copy of the GNU General Public License along 16 You should have received a copy of the GNU General Public
18 with this program; if not, write to the Free Software Foundation, 17 License along with the GNU C Library; if not, see
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 <http://www.gnu.org/licenses/>. */
20 19
21#ifndef _REGEX_INTERNAL_H 20#ifndef _REGEX_INTERNAL_H
22#define _REGEX_INTERNAL_H 1 21#define _REGEX_INTERNAL_H 1
23 22
24#include <assert.h> 23#include <assert.h>
25#include <ctype.h> 24#include <ctype.h>
26#include <stdbool.h>
27#include <stdio.h> 25#include <stdio.h>
28#include <stdlib.h> 26#include <stdlib.h>
29#include <string.h> 27#include <string.h>
30 28
31#include <langinfo.h> 29#include <langinfo.h>
32#ifndef _LIBC 30#include <locale.h>
33# include "localcharset.h"
34#endif
35#if defined HAVE_LOCALE_H || defined _LIBC
36# include <locale.h>
37#endif
38
39#include <wchar.h> 31#include <wchar.h>
40#include <wctype.h> 32#include <wctype.h>
33#include <stdbool.h>
41#include <stdint.h> 34#include <stdint.h>
42#if defined _LIBC 35
36#ifdef _LIBC
43# include <bits/libc-lock.h> 37# include <bits/libc-lock.h>
38# define lock_define(name) __libc_lock_define (, name)
39# define lock_init(lock) (__libc_lock_init (lock), 0)
40# define lock_fini(lock) 0
41# define lock_lock(lock) __libc_lock_lock (lock)
42# define lock_unlock(lock) __libc_lock_unlock (lock)
43#elif defined GNULIB_LOCK
44# include "glthread/lock.h"
45 /* Use gl_lock_define if empty macro arguments are known to work.
46 Otherwise, fall back on less-portable substitutes. */
47# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \
48 || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__))
49# define lock_define(name) gl_lock_define (, name)
50# elif USE_POSIX_THREADS
51# define lock_define(name) pthread_mutex_t name;
52# elif USE_PTH_THREADS
53# define lock_define(name) pth_mutex_t name;
54# elif USE_SOLARIS_THREADS
55# define lock_define(name) mutex_t name;
56# elif USE_WINDOWS_THREADS
57# define lock_define(name) gl_lock_t name;
58# else
59# define lock_define(name)
60# endif
61# define lock_init(lock) glthread_lock_init (&(lock))
62# define lock_fini(lock) glthread_lock_destroy (&(lock))
63# define lock_lock(lock) glthread_lock_lock (&(lock))
64# define lock_unlock(lock) glthread_lock_unlock (&(lock))
65#elif defined GNULIB_PTHREAD
66# include <pthread.h>
67# define lock_define(name) pthread_mutex_t name;
68# define lock_init(lock) pthread_mutex_init (&(lock), 0)
69# define lock_fini(lock) pthread_mutex_destroy (&(lock))
70# define lock_lock(lock) pthread_mutex_lock (&(lock))
71# define lock_unlock(lock) pthread_mutex_unlock (&(lock))
44#else 72#else
45# define __libc_lock_init(NAME) do { } while (0) 73# define lock_define(name)
46# define __libc_lock_lock(NAME) do { } while (0) 74# define lock_init(lock) 0
47# define __libc_lock_unlock(NAME) do { } while (0) 75# define lock_fini(lock) ((void) 0)
76 /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC. */
77# define lock_lock(lock) ((void) dfa)
78# define lock_unlock(lock) ((void) 0)
48#endif 79#endif
49 80
50/* In case that the system doesn't have isblank(). */ 81/* In case that the system doesn't have isblank(). */
@@ -67,7 +98,7 @@
67# ifdef _LIBC 98# ifdef _LIBC
68# undef gettext 99# undef gettext
69# define gettext(msgid) \ 100# define gettext(msgid) \
70 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) 101 __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES)
71# endif 102# endif
72#else 103#else
73# define gettext(msgid) (msgid) 104# define gettext(msgid) (msgid)
@@ -79,12 +110,7 @@
79# define gettext_noop(String) String 110# define gettext_noop(String) String
80#endif 111#endif
81 112
82/* For loser systems without the definition. */ 113#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
83#ifndef SIZE_MAX
84# define SIZE_MAX ((size_t) -1)
85#endif
86
87#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
88# define RE_ENABLE_I18N 114# define RE_ENABLE_I18N
89#endif 115#endif
90 116
@@ -92,9 +118,6 @@
92# define BE(expr, val) __builtin_expect (expr, val) 118# define BE(expr, val) __builtin_expect (expr, val)
93#else 119#else
94# define BE(expr, val) (expr) 120# define BE(expr, val) (expr)
95# ifdef _LIBC
96# define inline
97# endif
98#endif 121#endif
99 122
100/* Number of ASCII characters. */ 123/* Number of ASCII characters. */
@@ -111,22 +134,27 @@
111 134
112/* Rename to standard API for using out of glibc. */ 135/* Rename to standard API for using out of glibc. */
113#ifndef _LIBC 136#ifndef _LIBC
137# undef __wctype
138# undef __iswctype
114# define __wctype wctype 139# define __wctype wctype
115# define __iswctype iswctype 140# define __iswctype iswctype
116# define __btowc btowc 141# define __btowc btowc
117# define __wcrtomb wcrtomb
118# define __mbrtowc mbrtowc 142# define __mbrtowc mbrtowc
143# define __wcrtomb wcrtomb
119# define __regfree regfree 144# define __regfree regfree
120# define attribute_hidden 145# define attribute_hidden
121#endif /* not _LIBC */ 146#endif /* not _LIBC */
122 147
123#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) 148#if __GNUC__ < 3 + (__GNUC_MINOR__ < 1)
124# define __attribute(arg) __attribute__ (arg) 149# define __attribute__(arg)
125#else
126# define __attribute(arg)
127#endif 150#endif
128 151
129typedef __re_idx_t Idx; 152typedef __re_idx_t Idx;
153#ifdef _REGEX_LARGE_OFFSETS
154# define IDX_MAX (SIZE_MAX - 2)
155#else
156# define IDX_MAX INT_MAX
157#endif
130 158
131/* Special return value for failure to match. */ 159/* Special return value for failure to match. */
132#define REG_MISSING ((Idx) -1) 160#define REG_MISSING ((Idx) -1)
@@ -337,7 +365,7 @@ typedef struct
337 Idx idx; /* for BACK_REF */ 365 Idx idx; /* for BACK_REF */
338 re_context_type ctx_type; /* for ANCHOR */ 366 re_context_type ctx_type; /* for ANCHOR */
339 } opr; 367 } opr;
340#if __GNUC__ >= 2 && !__STRICT_ANSI__ 368#if __GNUC__ >= 2 && !defined __STRICT_ANSI__
341 re_token_type_t type : 8; 369 re_token_type_t type : 8;
342#else 370#else
343 re_token_type_t type; 371 re_token_type_t type;
@@ -418,26 +446,24 @@ struct re_dfa_t;
418typedef struct re_dfa_t re_dfa_t; 446typedef struct re_dfa_t re_dfa_t;
419 447
420#ifndef _LIBC 448#ifndef _LIBC
421# if defined __i386__ && !defined __EMX__ 449# define internal_function
422# define internal_function __attribute ((regparm (3), stdcall))
423# else
424# define internal_function
425# endif
426#endif 450#endif
427 451
452#ifndef NOT_IN_libc
428static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 453static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
429 Idx new_buf_len) 454 Idx new_buf_len)
430 internal_function; 455 internal_function;
431#ifdef RE_ENABLE_I18N 456# ifdef RE_ENABLE_I18N
432static void build_wcs_buffer (re_string_t *pstr) internal_function; 457static void build_wcs_buffer (re_string_t *pstr) internal_function;
433static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) 458static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
434 internal_function; 459 internal_function;
435#endif /* RE_ENABLE_I18N */ 460# endif /* RE_ENABLE_I18N */
436static void build_upper_buffer (re_string_t *pstr) internal_function; 461static void build_upper_buffer (re_string_t *pstr) internal_function;
437static void re_string_translate_buffer (re_string_t *pstr) internal_function; 462static void re_string_translate_buffer (re_string_t *pstr) internal_function;
438static unsigned int re_string_context_at (const re_string_t *input, Idx idx, 463static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
439 int eflags) 464 int eflags)
440 internal_function __attribute ((pure)); 465 internal_function __attribute__ ((pure));
466#endif
441#define re_string_peek_byte(pstr, offset) \ 467#define re_string_peek_byte(pstr, offset) \
442 ((pstr)->mbs[(pstr)->cur_idx + offset]) 468 ((pstr)->mbs[(pstr)->cur_idx + offset])
443#define re_string_fetch_byte(pstr) \ 469#define re_string_fetch_byte(pstr) \
@@ -455,7 +481,9 @@ static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
455#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) 481#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
456#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) 482#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
457 483
458#include <alloca.h> 484#if defined _LIBC || HAVE_ALLOCA
485# include <alloca.h>
486#endif
459 487
460#ifndef _LIBC 488#ifndef _LIBC
461# if HAVE_ALLOCA 489# if HAVE_ALLOCA
@@ -472,9 +500,18 @@ static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
472# endif 500# endif
473#endif 501#endif
474 502
503#ifdef _LIBC
504# define MALLOC_0_IS_NONNULL 1
505#elif !defined MALLOC_0_IS_NONNULL
506# define MALLOC_0_IS_NONNULL 0
507#endif
508
475#ifndef MAX 509#ifndef MAX
476# define MAX(a,b) ((a) < (b) ? (b) : (a)) 510# define MAX(a,b) ((a) < (b) ? (b) : (a))
477#endif 511#endif
512#ifndef MIN
513# define MIN(a,b) ((a) < (b) ? (a) : (b))
514#endif
478 515
479#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) 516#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
480#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) 517#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
@@ -490,8 +527,8 @@ struct bin_tree_t
490 527
491 re_token_t token; 528 re_token_t token;
492 529
493 /* `node_idx' is the index in dfa->nodes, if `type' == 0. 530 /* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
494 Otherwise `type' indicate the type of this node. */ 531 Otherwise 'type' indicate the type of this node. */
495 Idx node_idx; 532 Idx node_idx;
496}; 533};
497typedef struct bin_tree_t bin_tree_t; 534typedef struct bin_tree_t bin_tree_t;
@@ -544,9 +581,9 @@ struct re_dfastate_t
544 struct re_dfastate_t **trtable, **word_trtable; 581 struct re_dfastate_t **trtable, **word_trtable;
545 unsigned int context : 4; 582 unsigned int context : 4;
546 unsigned int halt : 1; 583 unsigned int halt : 1;
547 /* If this state can accept `multi byte'. 584 /* If this state can accept "multi byte".
548 Note that we refer to multibyte characters, and multi character 585 Note that we refer to multibyte characters, and multi character
549 collating elements as `multi byte'. */ 586 collating elements as "multi byte". */
550 unsigned int accept_mb : 1; 587 unsigned int accept_mb : 1;
551 /* If this state has backreference node(s). */ 588 /* If this state has backreference node(s). */
552 unsigned int has_backref : 1; 589 unsigned int has_backref : 1;
@@ -675,7 +712,7 @@ struct re_dfa_t
675 re_bitset_ptr_t sb_char; 712 re_bitset_ptr_t sb_char;
676 int str_tree_storage_idx; 713 int str_tree_storage_idx;
677 714
678 /* number of subexpressions `re_nsub' is in regex_t. */ 715 /* number of subexpressions 're_nsub' is in regex_t. */
679 re_hashval_t state_hash_mask; 716 re_hashval_t state_hash_mask;
680 Idx init_node; 717 Idx init_node;
681 Idx nbackref; /* The number of backreference in this dfa. */ 718 Idx nbackref; /* The number of backreference in this dfa. */
@@ -699,9 +736,7 @@ struct re_dfa_t
699#ifdef DEBUG 736#ifdef DEBUG
700 char* re_str; 737 char* re_str;
701#endif 738#endif
702#ifdef _LIBC 739 lock_define (lock)
703 __libc_lock_define (, lock)
704#endif
705}; 740};
706 741
707#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) 742#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
@@ -732,33 +767,33 @@ typedef struct
732} bracket_elem_t; 767} bracket_elem_t;
733 768
734 769
735/* Inline functions for bitset_t operation. */ 770/* Functions for bitset_t operation. */
736 771
737static inline void 772static void
738bitset_set (bitset_t set, Idx i) 773bitset_set (bitset_t set, Idx i)
739{ 774{
740 set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; 775 set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
741} 776}
742 777
743static inline void 778static void
744bitset_clear (bitset_t set, Idx i) 779bitset_clear (bitset_t set, Idx i)
745{ 780{
746 set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); 781 set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
747} 782}
748 783
749static inline bool 784static bool
750bitset_contain (const bitset_t set, Idx i) 785bitset_contain (const bitset_t set, Idx i)
751{ 786{
752 return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; 787 return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
753} 788}
754 789
755static inline void 790static void
756bitset_empty (bitset_t set) 791bitset_empty (bitset_t set)
757{ 792{
758 memset (set, '\0', sizeof (bitset_t)); 793 memset (set, '\0', sizeof (bitset_t));
759} 794}
760 795
761static inline void 796static void
762bitset_set_all (bitset_t set) 797bitset_set_all (bitset_t set)
763{ 798{
764 memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); 799 memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
@@ -767,13 +802,13 @@ bitset_set_all (bitset_t set)
767 ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; 802 ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
768} 803}
769 804
770static inline void 805static void
771bitset_copy (bitset_t dest, const bitset_t src) 806bitset_copy (bitset_t dest, const bitset_t src)
772{ 807{
773 memcpy (dest, src, sizeof (bitset_t)); 808 memcpy (dest, src, sizeof (bitset_t));
774} 809}
775 810
776static inline void 811static void __attribute__ ((unused))
777bitset_not (bitset_t set) 812bitset_not (bitset_t set)
778{ 813{
779 int bitset_i; 814 int bitset_i;
@@ -785,7 +820,7 @@ bitset_not (bitset_t set)
785 & ~set[BITSET_WORDS - 1]); 820 & ~set[BITSET_WORDS - 1]);
786} 821}
787 822
788static inline void 823static void __attribute__ ((unused))
789bitset_merge (bitset_t dest, const bitset_t src) 824bitset_merge (bitset_t dest, const bitset_t src)
790{ 825{
791 int bitset_i; 826 int bitset_i;
@@ -793,7 +828,7 @@ bitset_merge (bitset_t dest, const bitset_t src)
793 dest[bitset_i] |= src[bitset_i]; 828 dest[bitset_i] |= src[bitset_i];
794} 829}
795 830
796static inline void 831static void __attribute__ ((unused))
797bitset_mask (bitset_t dest, const bitset_t src) 832bitset_mask (bitset_t dest, const bitset_t src)
798{ 833{
799 int bitset_i; 834 int bitset_i;
@@ -802,9 +837,9 @@ bitset_mask (bitset_t dest, const bitset_t src)
802} 837}
803 838
804#ifdef RE_ENABLE_I18N 839#ifdef RE_ENABLE_I18N
805/* Inline functions for re_string. */ 840/* Functions for re_string. */
806static inline int 841static int
807internal_function __attribute ((pure)) 842internal_function __attribute__ ((pure, unused))
808re_string_char_size_at (const re_string_t *pstr, Idx idx) 843re_string_char_size_at (const re_string_t *pstr, Idx idx)
809{ 844{
810 int byte_idx; 845 int byte_idx;
@@ -816,8 +851,8 @@ re_string_char_size_at (const re_string_t *pstr, Idx idx)
816 return byte_idx; 851 return byte_idx;
817} 852}
818 853
819static inline wint_t 854static wint_t
820internal_function __attribute ((pure)) 855internal_function __attribute__ ((pure, unused))
821re_string_wchar_at (const re_string_t *pstr, Idx idx) 856re_string_wchar_at (const re_string_t *pstr, Idx idx)
822{ 857{
823 if (pstr->mb_cur_max == 1) 858 if (pstr->mb_cur_max == 1)
@@ -825,15 +860,15 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx)
825 return (wint_t) pstr->wcs[idx]; 860 return (wint_t) pstr->wcs[idx];
826} 861}
827 862
863# ifndef NOT_IN_libc
828static int 864static int
829internal_function __attribute ((pure)) 865internal_function __attribute__ ((pure, unused))
830re_string_elem_size_at (const re_string_t *pstr, Idx idx) 866re_string_elem_size_at (const re_string_t *pstr, Idx idx)
831{ 867{
832# ifdef _LIBC 868# ifdef _LIBC
833 const unsigned char *p, *extra; 869 const unsigned char *p, *extra;
834 const int32_t *table, *indirect; 870 const int32_t *table, *indirect;
835 int32_t tmp; 871# include <locale/weight.h>
836# include <locale/weight.h>
837 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 872 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
838 873
839 if (nrules != 0) 874 if (nrules != 0)
@@ -844,13 +879,14 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx)
844 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, 879 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
845 _NL_COLLATE_INDIRECTMB); 880 _NL_COLLATE_INDIRECTMB);
846 p = pstr->mbs + idx; 881 p = pstr->mbs + idx;
847 tmp = findidx (&p); 882 findidx (&p, pstr->len - idx);
848 return p - pstr->mbs - idx; 883 return p - pstr->mbs - idx;
849 } 884 }
850 else 885 else
851# endif /* _LIBC */ 886# endif /* _LIBC */
852 return 1; 887 return 1;
853} 888}
889# endif
854#endif /* RE_ENABLE_I18N */ 890#endif /* RE_ENABLE_I18N */
855 891
856#ifndef __GNUC_PREREQ 892#ifndef __GNUC_PREREQ