summaryrefslogtreecommitdiffstats
path: root/gl/regcomp.c
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2023-02-11 07:20:24 +0100
committerAndreas Baumann <mail@andreasbaumann.cc>2023-02-11 07:20:24 +0100
commitf867d7b44080fa9716deeff4476275f9a489879f (patch)
treebc964662fc3300dc626fb6d833d8ed5c8f46eca7 /gl/regcomp.c
parent9734c439cba0a02b087e50789e94ec9b07754608 (diff)
parentc07206f2ccc2356aa74bc6813a94c2190017d44e (diff)
downloadmonitoring-plugins-f867d7b44080fa9716deeff4476275f9a489879f.tar.gz
Merge branch 'master' into curlfixes
Diffstat (limited to 'gl/regcomp.c')
-rw-r--r--gl/regcomp.c1364
1 files changed, 615 insertions, 749 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c
index f0b2e522..89478396 100644
--- a/gl/regcomp.c
+++ b/gl/regcomp.c
@@ -1,21 +1,25 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc. 2 Copyright (C) 2002-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
6 The GNU C Library is free software; you can redistribute it and/or 6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public 7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 3 of the License, or (at your option) any later version. 9 version 2.1 of the License, or (at your option) any later version.
10 10
11 The GNU C Library is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details. 14 Lesser General Public License for more details.
15 15
16 You should have received a copy of the GNU General Public 16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see 17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */ 18 <https://www.gnu.org/licenses/>. */
19
20#ifdef _LIBC
21# include <locale/weight.h>
22#endif
19 23
20static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, 24static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
21 size_t length, reg_syntax_t syntax); 25 size_t length, reg_syntax_t syntax);
@@ -23,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp,
23 const re_dfastate_t *init_state, 27 const re_dfastate_t *init_state,
24 char *fastmap); 28 char *fastmap);
25static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); 29static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
26#ifdef RE_ENABLE_I18N
27static void free_charset (re_charset_t *cset); 30static void free_charset (re_charset_t *cset);
28#endif /* RE_ENABLE_I18N */
29static void free_workarea_compile (regex_t *preg); 31static void free_workarea_compile (regex_t *preg);
30static reg_errcode_t create_initial_state (re_dfa_t *dfa); 32static reg_errcode_t create_initial_state (re_dfa_t *dfa);
31#ifdef RE_ENABLE_I18N
32static void optimize_utf8 (re_dfa_t *dfa); 33static void optimize_utf8 (re_dfa_t *dfa);
33#endif
34static reg_errcode_t analyze (regex_t *preg); 34static reg_errcode_t analyze (regex_t *preg);
35static reg_errcode_t preorder (bin_tree_t *root, 35static reg_errcode_t preorder (bin_tree_t *root,
36 reg_errcode_t (fn (void *, bin_tree_t *)), 36 reg_errcode_t (fn (void *, bin_tree_t *)),
@@ -55,7 +55,7 @@ static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
55static Idx fetch_number (re_string_t *input, re_token_t *token, 55static Idx fetch_number (re_string_t *input, re_token_t *token,
56 reg_syntax_t syntax); 56 reg_syntax_t syntax);
57static int peek_token (re_token_t *token, re_string_t *input, 57static int peek_token (re_token_t *token, re_string_t *input,
58 reg_syntax_t syntax) internal_function; 58 reg_syntax_t syntax);
59static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, 59static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
60 reg_syntax_t syntax, reg_errcode_t *err); 60 reg_syntax_t syntax, reg_errcode_t *err);
61static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, 61static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
@@ -85,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
85static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, 85static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
86 re_string_t *regexp, 86 re_string_t *regexp,
87 re_token_t *token); 87 re_token_t *token);
88#ifdef RE_ENABLE_I18N
89static reg_errcode_t build_equiv_class (bitset_t sbcset, 88static reg_errcode_t build_equiv_class (bitset_t sbcset,
90 re_charset_t *mbcset, 89 re_charset_t *mbcset,
91 Idx *equiv_class_alloc, 90 Idx *equiv_class_alloc,
@@ -96,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
96 Idx *char_class_alloc, 95 Idx *char_class_alloc,
97 const char *class_name, 96 const char *class_name,
98 reg_syntax_t syntax); 97 reg_syntax_t syntax);
99#else /* not RE_ENABLE_I18N */
100static reg_errcode_t build_equiv_class (bitset_t sbcset,
101 const unsigned char *name);
102static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
103 bitset_t sbcset,
104 const char *class_name,
105 reg_syntax_t syntax);
106#endif /* not RE_ENABLE_I18N */
107static bin_tree_t *build_charclass_op (re_dfa_t *dfa, 98static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
108 RE_TRANSLATE_TYPE trans, 99 RE_TRANSLATE_TYPE trans,
109 const char *class_name, 100 const char *class_name,
@@ -149,9 +140,9 @@ static const char __re_error_msgid[] =
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */ 140 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
150 "\0" 141 "\0"
151#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 142#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ 143 gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */
153 "\0" 144 "\0"
154#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") 145#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=")
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ 146 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
156 "\0" 147 "\0"
157#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") 148#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
@@ -209,17 +200,9 @@ static const size_t __re_error_msgid_idx[] =
209 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields 200 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
210 are set in BUFP on entry. */ 201 are set in BUFP on entry. */
211 202
212#ifdef _LIBC
213const char *
214re_compile_pattern (pattern, length, bufp)
215 const char *pattern;
216 size_t length;
217 struct re_pattern_buffer *bufp;
218#else /* size_t might promote */
219const char * 203const char *
220re_compile_pattern (const char *pattern, size_t length, 204re_compile_pattern (const char *pattern, size_t length,
221 struct re_pattern_buffer *bufp) 205 struct re_pattern_buffer *bufp)
222#endif
223{ 206{
224 reg_errcode_t ret; 207 reg_errcode_t ret;
225 208
@@ -237,9 +220,7 @@ re_compile_pattern (const char *pattern, size_t length,
237 return NULL; 220 return NULL;
238 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 221 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
239} 222}
240#ifdef _LIBC
241weak_alias (__re_compile_pattern, re_compile_pattern) 223weak_alias (__re_compile_pattern, re_compile_pattern)
242#endif
243 224
244/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can 225/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
245 also be assigned to arbitrarily: each pattern buffer stores its own 226 also be assigned to arbitrarily: each pattern buffer stores its own
@@ -257,21 +238,17 @@ reg_syntax_t re_syntax_options;
257 defined in regex.h. We return the old syntax. */ 238 defined in regex.h. We return the old syntax. */
258 239
259reg_syntax_t 240reg_syntax_t
260re_set_syntax (syntax) 241re_set_syntax (reg_syntax_t syntax)
261 reg_syntax_t syntax;
262{ 242{
263 reg_syntax_t ret = re_syntax_options; 243 reg_syntax_t ret = re_syntax_options;
264 244
265 re_syntax_options = syntax; 245 re_syntax_options = syntax;
266 return ret; 246 return ret;
267} 247}
268#ifdef _LIBC
269weak_alias (__re_set_syntax, re_set_syntax) 248weak_alias (__re_set_syntax, re_set_syntax)
270#endif
271 249
272int 250int
273re_compile_fastmap (bufp) 251re_compile_fastmap (struct re_pattern_buffer *bufp)
274 struct re_pattern_buffer *bufp;
275{ 252{
276 re_dfa_t *dfa = bufp->buffer; 253 re_dfa_t *dfa = bufp->buffer;
277 char *fastmap = bufp->fastmap; 254 char *fastmap = bufp->fastmap;
@@ -287,12 +264,9 @@ re_compile_fastmap (bufp)
287 bufp->fastmap_accurate = 1; 264 bufp->fastmap_accurate = 1;
288 return 0; 265 return 0;
289} 266}
290#ifdef _LIBC
291weak_alias (__re_compile_fastmap, re_compile_fastmap) 267weak_alias (__re_compile_fastmap, re_compile_fastmap)
292#endif
293 268
294static inline void 269static __always_inline void
295__attribute__ ((always_inline))
296re_set_fastmap (char *fastmap, bool icase, int ch) 270re_set_fastmap (char *fastmap, bool icase, int ch)
297{ 271{
298 fastmap[ch] = 1; 272 fastmap[ch] = 1;
@@ -318,7 +292,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
318 if (type == CHARACTER) 292 if (type == CHARACTER)
319 { 293 {
320 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); 294 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
321#ifdef RE_ENABLE_I18N
322 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 295 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
323 { 296 {
324 unsigned char buf[MB_LEN_MAX]; 297 unsigned char buf[MB_LEN_MAX];
@@ -335,11 +308,10 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
335 memset (&state, '\0', sizeof (state)); 308 memset (&state, '\0', sizeof (state));
336 if (__mbrtowc (&wc, (const char *) buf, p - buf, 309 if (__mbrtowc (&wc, (const char *) buf, p - buf,
337 &state) == p - buf 310 &state) == p - buf
338 && (__wcrtomb ((char *) buf, towlower (wc), &state) 311 && (__wcrtomb ((char *) buf, __towlower (wc), &state)
339 != (size_t) -1)) 312 != (size_t) -1))
340 re_set_fastmap (fastmap, false, buf[0]); 313 re_set_fastmap (fastmap, false, buf[0]);
341 } 314 }
342#endif
343 } 315 }
344 else if (type == SIMPLE_BRACKET) 316 else if (type == SIMPLE_BRACKET)
345 { 317 {
@@ -353,13 +325,12 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
353 re_set_fastmap (fastmap, icase, ch); 325 re_set_fastmap (fastmap, icase, ch);
354 } 326 }
355 } 327 }
356#ifdef RE_ENABLE_I18N
357 else if (type == COMPLEX_BRACKET) 328 else if (type == COMPLEX_BRACKET)
358 { 329 {
359 re_charset_t *cset = dfa->nodes[node].opr.mbcset; 330 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
360 Idx i; 331 Idx i;
361 332
362# ifdef _LIBC 333#ifdef _LIBC
363 /* See if we have to try all bytes which start multiple collation 334 /* See if we have to try all bytes which start multiple collation
364 elements. 335 elements.
365 e.g. In da_DK, we want to catch 'a' since "aa" is a valid 336 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
@@ -375,7 +346,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
375 if (table[i] < 0) 346 if (table[i] < 0)
376 re_set_fastmap (fastmap, icase, i); 347 re_set_fastmap (fastmap, icase, i);
377 } 348 }
378# endif /* _LIBC */ 349#endif /* _LIBC */
379 350
380 /* See if we have to start the match at all multibyte characters, 351 /* See if we have to start the match at all multibyte characters,
381 i.e. where we would not find an invalid sequence. This only 352 i.e. where we would not find an invalid sequence. This only
@@ -383,9 +354,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
383 sets, the SIMPLE_BRACKET again suffices. */ 354 sets, the SIMPLE_BRACKET again suffices. */
384 if (dfa->mb_cur_max > 1 355 if (dfa->mb_cur_max > 1
385 && (cset->nchar_classes || cset->non_match || cset->nranges 356 && (cset->nchar_classes || cset->non_match || cset->nranges
386# ifdef _LIBC 357#ifdef _LIBC
387 || cset->nequiv_classes 358 || cset->nequiv_classes
388# endif /* _LIBC */ 359#endif /* _LIBC */
389 )) 360 ))
390 { 361 {
391 unsigned char c = 0; 362 unsigned char c = 0;
@@ -411,19 +382,14 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
411 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); 382 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
412 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 383 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
413 { 384 {
414 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) 385 if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
415 != (size_t) -1) 386 != (size_t) -1)
416 re_set_fastmap (fastmap, false, *(unsigned char *) buf); 387 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
417 } 388 }
418 } 389 }
419 } 390 }
420 } 391 }
421#endif /* RE_ENABLE_I18N */ 392 else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE)
422 else if (type == OP_PERIOD
423#ifdef RE_ENABLE_I18N
424 || type == OP_UTF8_PERIOD
425#endif /* RE_ENABLE_I18N */
426 || type == END_OF_RE)
427 { 393 {
428 memset (fastmap, '\1', sizeof (char) * SBC_MAX); 394 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
429 if (type == END_OF_RE) 395 if (type == END_OF_RE)
@@ -470,10 +436,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
470 the return codes and their meanings.) */ 436 the return codes and their meanings.) */
471 437
472int 438int
473regcomp (preg, pattern, cflags) 439regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
474 regex_t *_Restrict_ preg;
475 const char *_Restrict_ pattern;
476 int cflags;
477{ 440{
478 reg_errcode_t ret; 441 reg_errcode_t ret;
479 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED 442 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
@@ -485,7 +448,7 @@ regcomp (preg, pattern, cflags)
485 448
486 /* Try to allocate space for the fastmap. */ 449 /* Try to allocate space for the fastmap. */
487 preg->fastmap = re_malloc (char, SBC_MAX); 450 preg->fastmap = re_malloc (char, SBC_MAX);
488 if (BE (preg->fastmap == NULL, 0)) 451 if (__glibc_unlikely (preg->fastmap == NULL))
489 return REG_ESPACE; 452 return REG_ESPACE;
490 453
491 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; 454 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
@@ -511,7 +474,7 @@ regcomp (preg, pattern, cflags)
511 ret = REG_EPAREN; 474 ret = REG_EPAREN;
512 475
513 /* We have already checked preg->fastmap != NULL. */ 476 /* We have already checked preg->fastmap != NULL. */
514 if (BE (ret == REG_NOERROR, 1)) 477 if (__glibc_likely (ret == REG_NOERROR))
515 /* Compute the fastmap now, since regexec cannot modify the pattern 478 /* Compute the fastmap now, since regexec cannot modify the pattern
516 buffer. This function never fails in this implementation. */ 479 buffer. This function never fails in this implementation. */
517 (void) re_compile_fastmap (preg); 480 (void) re_compile_fastmap (preg);
@@ -524,32 +487,21 @@ regcomp (preg, pattern, cflags)
524 487
525 return (int) ret; 488 return (int) ret;
526} 489}
527#ifdef _LIBC 490libc_hidden_def (__regcomp)
528weak_alias (__regcomp, regcomp) 491weak_alias (__regcomp, regcomp)
529#endif
530 492
531/* Returns a message corresponding to an error code, ERRCODE, returned 493/* Returns a message corresponding to an error code, ERRCODE, returned
532 from either regcomp or regexec. We don't use PREG here. */ 494 from either regcomp or regexec. We don't use PREG here. */
533 495
534#ifdef _LIBC
535size_t
536regerror (errcode, preg, errbuf, errbuf_size)
537 int errcode;
538 const regex_t *_Restrict_ preg;
539 char *_Restrict_ errbuf;
540 size_t errbuf_size;
541#else /* size_t might promote */
542size_t 496size_t
543regerror (int errcode, const regex_t *_Restrict_ preg, 497regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf,
544 char *_Restrict_ errbuf, size_t errbuf_size) 498 size_t errbuf_size)
545#endif
546{ 499{
547 const char *msg; 500 const char *msg;
548 size_t msg_size; 501 size_t msg_size;
502 int nerrcodes = sizeof __re_error_msgid_idx / sizeof __re_error_msgid_idx[0];
549 503
550 if (BE (errcode < 0 504 if (__glibc_unlikely (errcode < 0 || errcode >= nerrcodes))
551 || errcode >= (int) (sizeof (__re_error_msgid_idx)
552 / sizeof (__re_error_msgid_idx[0])), 0))
553 /* Only error codes returned by the rest of the code should be passed 505 /* Only error codes returned by the rest of the code should be passed
554 to this routine. If we are given anything else, or if other regex 506 to this routine. If we are given anything else, or if other regex
555 code generates an invalid error code, then the program has a bug. 507 code generates an invalid error code, then the program has a bug.
@@ -560,10 +512,10 @@ regerror (int errcode, const regex_t *_Restrict_ preg,
560 512
561 msg_size = strlen (msg) + 1; /* Includes the null. */ 513 msg_size = strlen (msg) + 1; /* Includes the null. */
562 514
563 if (BE (errbuf_size != 0, 1)) 515 if (__glibc_likely (errbuf_size != 0))
564 { 516 {
565 size_t cpy_size = msg_size; 517 size_t cpy_size = msg_size;
566 if (BE (msg_size > errbuf_size, 0)) 518 if (__glibc_unlikely (msg_size > errbuf_size))
567 { 519 {
568 cpy_size = errbuf_size - 1; 520 cpy_size = errbuf_size - 1;
569 errbuf[cpy_size] = '\0'; 521 errbuf[cpy_size] = '\0';
@@ -573,12 +525,9 @@ regerror (int errcode, const regex_t *_Restrict_ preg,
573 525
574 return msg_size; 526 return msg_size;
575} 527}
576#ifdef _LIBC
577weak_alias (__regerror, regerror) 528weak_alias (__regerror, regerror)
578#endif
579 529
580 530
581#ifdef RE_ENABLE_I18N
582/* This static array is used for the map to single-byte characters when 531/* This static array is used for the map to single-byte characters when
583 UTF-8 is used. Otherwise we would allocate memory just to initialize 532 UTF-8 is used. Otherwise we would allocate memory just to initialize
584 it the same all the time. UTF-8 is the preferred encoding so this is 533 it the same all the time. UTF-8 is the preferred encoding so this is
@@ -586,25 +535,24 @@ weak_alias (__regerror, regerror)
586static const bitset_t utf8_sb_map = 535static const bitset_t utf8_sb_map =
587{ 536{
588 /* Set the first 128 bits. */ 537 /* Set the first 128 bits. */
589# if defined __GNUC__ && !defined __STRICT_ANSI__ 538#if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
590 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 539 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
591# else 540#else
592# if 4 * BITSET_WORD_BITS < ASCII_CHARS 541# if 4 * BITSET_WORD_BITS < ASCII_CHARS
593# error "bitset_word_t is narrower than 32 bits" 542# error "bitset_word_t is narrower than 32 bits"
594# elif 3 * BITSET_WORD_BITS < ASCII_CHARS 543# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
595 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 544 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
596# elif 2 * BITSET_WORD_BITS < ASCII_CHARS 545# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
597 BITSET_WORD_MAX, BITSET_WORD_MAX, 546 BITSET_WORD_MAX, BITSET_WORD_MAX,
598# elif 1 * BITSET_WORD_BITS < ASCII_CHARS 547# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
599 BITSET_WORD_MAX, 548 BITSET_WORD_MAX,
600# endif 549# endif
601 (BITSET_WORD_MAX 550 (BITSET_WORD_MAX
602 >> (SBC_MAX % BITSET_WORD_BITS == 0 551 >> (SBC_MAX % BITSET_WORD_BITS == 0
603 ? 0 552 ? 0
604 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) 553 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
605# endif
606};
607#endif 554#endif
555};
608 556
609 557
610static void 558static void
@@ -642,10 +590,8 @@ free_dfa_content (re_dfa_t *dfa)
642 re_free (entry->array); 590 re_free (entry->array);
643 } 591 }
644 re_free (dfa->state_table); 592 re_free (dfa->state_table);
645#ifdef RE_ENABLE_I18N
646 if (dfa->sb_char != utf8_sb_map) 593 if (dfa->sb_char != utf8_sb_map)
647 re_free (dfa->sb_char); 594 re_free (dfa->sb_char);
648#endif
649 re_free (dfa->subexp_map); 595 re_free (dfa->subexp_map);
650#ifdef DEBUG 596#ifdef DEBUG
651 re_free (dfa->re_str); 597 re_free (dfa->re_str);
@@ -658,11 +604,10 @@ free_dfa_content (re_dfa_t *dfa)
658/* Free dynamically allocated space used by PREG. */ 604/* Free dynamically allocated space used by PREG. */
659 605
660void 606void
661regfree (preg) 607regfree (regex_t *preg)
662 regex_t *preg;
663{ 608{
664 re_dfa_t *dfa = preg->buffer; 609 re_dfa_t *dfa = preg->buffer;
665 if (BE (dfa != NULL, 1)) 610 if (__glibc_likely (dfa != NULL))
666 { 611 {
667 lock_fini (dfa->lock); 612 lock_fini (dfa->lock);
668 free_dfa_content (dfa); 613 free_dfa_content (dfa);
@@ -676,9 +621,8 @@ regfree (preg)
676 re_free (preg->translate); 621 re_free (preg->translate);
677 preg->translate = NULL; 622 preg->translate = NULL;
678} 623}
679#ifdef _LIBC 624libc_hidden_def (__regfree)
680weak_alias (__regfree, regfree) 625weak_alias (__regfree, regfree)
681#endif
682 626
683/* Entry points compatible with 4.2 BSD regex library. We don't define 627/* Entry points compatible with 4.2 BSD regex library. We don't define
684 them unless specifically requested. */ 628 them unless specifically requested. */
@@ -695,8 +639,7 @@ char *
695 regcomp/regexec above without link errors. */ 639 regcomp/regexec above without link errors. */
696weak_function 640weak_function
697# endif 641# endif
698re_comp (s) 642re_comp (const char *s)
699 const char *s;
700{ 643{
701 reg_errcode_t ret; 644 reg_errcode_t ret;
702 char *fastmap; 645 char *fastmap;
@@ -719,7 +662,7 @@ re_comp (s)
719 662
720 if (re_comp_buf.fastmap == NULL) 663 if (re_comp_buf.fastmap == NULL)
721 { 664 {
722 re_comp_buf.fastmap = (char *) malloc (SBC_MAX); 665 re_comp_buf.fastmap = re_malloc (char, SBC_MAX);
723 if (re_comp_buf.fastmap == NULL) 666 if (re_comp_buf.fastmap == NULL)
724 return (char *) gettext (__re_error_msgid 667 return (char *) gettext (__re_error_msgid
725 + __re_error_msgid_idx[(int) REG_ESPACE]); 668 + __re_error_msgid_idx[(int) REG_ESPACE]);
@@ -772,7 +715,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
772 715
773 /* Initialize the dfa. */ 716 /* Initialize the dfa. */
774 dfa = preg->buffer; 717 dfa = preg->buffer;
775 if (BE (preg->allocated < sizeof (re_dfa_t), 0)) 718 if (__glibc_unlikely (preg->allocated < sizeof (re_dfa_t)))
776 { 719 {
777 /* If zero allocated, but buffer is non-null, try to realloc 720 /* If zero allocated, but buffer is non-null, try to realloc
778 enough space. This loses if buffer's address is bogus, but 721 enough space. This loses if buffer's address is bogus, but
@@ -787,9 +730,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
787 preg->used = sizeof (re_dfa_t); 730 preg->used = sizeof (re_dfa_t);
788 731
789 err = init_dfa (dfa, length); 732 err = init_dfa (dfa, length);
790 if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0)) 733 if (__glibc_unlikely (err == REG_NOERROR && lock_init (dfa->lock) != 0))
791 err = REG_ESPACE; 734 err = REG_ESPACE;
792 if (BE (err != REG_NOERROR, 0)) 735 if (__glibc_unlikely (err != REG_NOERROR))
793 { 736 {
794 free_dfa_content (dfa); 737 free_dfa_content (dfa);
795 preg->buffer = NULL; 738 preg->buffer = NULL;
@@ -804,7 +747,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
804 747
805 err = re_string_construct (&regexp, pattern, length, preg->translate, 748 err = re_string_construct (&regexp, pattern, length, preg->translate,
806 (syntax & RE_ICASE) != 0, dfa); 749 (syntax & RE_ICASE) != 0, dfa);
807 if (BE (err != REG_NOERROR, 0)) 750 if (__glibc_unlikely (err != REG_NOERROR))
808 { 751 {
809 re_compile_internal_free_return: 752 re_compile_internal_free_return:
810 free_workarea_compile (preg); 753 free_workarea_compile (preg);
@@ -819,19 +762,17 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
819 /* Parse the regular expression, and build a structure tree. */ 762 /* Parse the regular expression, and build a structure tree. */
820 preg->re_nsub = 0; 763 preg->re_nsub = 0;
821 dfa->str_tree = parse (&regexp, preg, syntax, &err); 764 dfa->str_tree = parse (&regexp, preg, syntax, &err);
822 if (BE (dfa->str_tree == NULL, 0)) 765 if (__glibc_unlikely (dfa->str_tree == NULL))
823 goto re_compile_internal_free_return; 766 goto re_compile_internal_free_return;
824 767
825 /* Analyze the tree and create the nfa. */ 768 /* Analyze the tree and create the nfa. */
826 err = analyze (preg); 769 err = analyze (preg);
827 if (BE (err != REG_NOERROR, 0)) 770 if (__glibc_unlikely (err != REG_NOERROR))
828 goto re_compile_internal_free_return; 771 goto re_compile_internal_free_return;
829 772
830#ifdef RE_ENABLE_I18N
831 /* If possible, do searching in single byte encoding to speed things up. */ 773 /* If possible, do searching in single byte encoding to speed things up. */
832 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) 774 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
833 optimize_utf8 (dfa); 775 optimize_utf8 (dfa);
834#endif
835 776
836 /* Then create the initial state of the dfa. */ 777 /* Then create the initial state of the dfa. */
837 err = create_initial_state (dfa); 778 err = create_initial_state (dfa);
@@ -840,7 +781,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
840 free_workarea_compile (preg); 781 free_workarea_compile (preg);
841 re_string_destruct (&regexp); 782 re_string_destruct (&regexp);
842 783
843 if (BE (err != REG_NOERROR, 0)) 784 if (__glibc_unlikely (err != REG_NOERROR))
844 { 785 {
845 lock_fini (dfa->lock); 786 lock_fini (dfa->lock);
846 free_dfa_content (dfa); 787 free_dfa_content (dfa);
@@ -861,11 +802,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
861#ifndef _LIBC 802#ifndef _LIBC
862 const char *codeset_name; 803 const char *codeset_name;
863#endif 804#endif
864#ifdef RE_ENABLE_I18N
865 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); 805 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
866#else
867 size_t max_i18n_object_size = 0;
868#endif
869 size_t max_object_size = 806 size_t max_object_size =
870 MAX (sizeof (struct re_state_table_entry), 807 MAX (sizeof (struct re_state_table_entry),
871 MAX (sizeof (re_token_t), 808 MAX (sizeof (re_token_t),
@@ -882,7 +819,8 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
882 calculation below, and for similar doubling calculations 819 calculation below, and for similar doubling calculations
883 elsewhere. And it's <= rather than <, because some of the 820 elsewhere. And it's <= rather than <, because some of the
884 doubling calculations add 1 afterwards. */ 821 doubling calculations add 1 afterwards. */
885 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0)) 822 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2
823 <= pat_len))
886 return REG_ESPACE; 824 return REG_ESPACE;
887 825
888 dfa->nodes_alloc = pat_len + 1; 826 dfa->nodes_alloc = pat_len + 1;
@@ -916,7 +854,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
916 dfa->map_notascii = 0; 854 dfa->map_notascii = 0;
917#endif 855#endif
918 856
919#ifdef RE_ENABLE_I18N
920 if (dfa->mb_cur_max > 1) 857 if (dfa->mb_cur_max > 1)
921 { 858 {
922 if (dfa->is_utf8) 859 if (dfa->is_utf8)
@@ -926,7 +863,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
926 int i, j, ch; 863 int i, j, ch;
927 864
928 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 865 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
929 if (BE (dfa->sb_char == NULL, 0)) 866 if (__glibc_unlikely (dfa->sb_char == NULL))
930 return REG_ESPACE; 867 return REG_ESPACE;
931 868
932 /* Set the bits corresponding to single byte chars. */ 869 /* Set the bits corresponding to single byte chars. */
@@ -936,16 +873,15 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
936 wint_t wch = __btowc (ch); 873 wint_t wch = __btowc (ch);
937 if (wch != WEOF) 874 if (wch != WEOF)
938 dfa->sb_char[i] |= (bitset_word_t) 1 << j; 875 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
939# ifndef _LIBC 876#ifndef _LIBC
940 if (isascii (ch) && wch != ch) 877 if (isascii (ch) && wch != ch)
941 dfa->map_notascii = 1; 878 dfa->map_notascii = 1;
942# endif 879#endif
943 } 880 }
944 } 881 }
945 } 882 }
946#endif
947 883
948 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) 884 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
949 return REG_ESPACE; 885 return REG_ESPACE;
950 return REG_NOERROR; 886 return REG_NOERROR;
951} 887}
@@ -955,14 +891,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
955 character used by some operators like "\<", "\>", etc. */ 891 character used by some operators like "\<", "\>", etc. */
956 892
957static void 893static void
958internal_function
959init_word_char (re_dfa_t *dfa) 894init_word_char (re_dfa_t *dfa)
960{ 895{
961 int i = 0; 896 int i = 0;
962 int j; 897 int j;
963 int ch = 0; 898 int ch = 0;
964 dfa->word_ops_used = 1; 899 dfa->word_ops_used = 1;
965 if (BE (dfa->map_notascii == 0, 1)) 900 if (__glibc_likely (dfa->map_notascii == 0))
966 { 901 {
967 bitset_word_t bits0 = 0x00000000; 902 bitset_word_t bits0 = 0x00000000;
968 bitset_word_t bits1 = 0x03ff0000; 903 bitset_word_t bits1 = 0x03ff0000;
@@ -970,6 +905,7 @@ init_word_char (re_dfa_t *dfa)
970 bitset_word_t bits3 = 0x07fffffe; 905 bitset_word_t bits3 = 0x07fffffe;
971 if (BITSET_WORD_BITS == 64) 906 if (BITSET_WORD_BITS == 64)
972 { 907 {
908 /* Pacify gcc -Woverflow on 32-bit platformns. */
973 dfa->word_char[0] = bits1 << 31 << 1 | bits0; 909 dfa->word_char[0] = bits1 << 31 << 1 | bits0;
974 dfa->word_char[1] = bits3 << 31 << 1 | bits2; 910 dfa->word_char[1] = bits3 << 31 << 1 | bits2;
975 i = 2; 911 i = 2;
@@ -986,7 +922,7 @@ init_word_char (re_dfa_t *dfa)
986 goto general_case; 922 goto general_case;
987 ch = 128; 923 ch = 128;
988 924
989 if (BE (dfa->is_utf8, 1)) 925 if (__glibc_likely (dfa->is_utf8))
990 { 926 {
991 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); 927 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
992 return; 928 return;
@@ -1033,7 +969,7 @@ create_initial_state (re_dfa_t *dfa)
1033 first = dfa->str_tree->first->node_idx; 969 first = dfa->str_tree->first->node_idx;
1034 dfa->init_node = first; 970 dfa->init_node = first;
1035 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); 971 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1036 if (BE (err != REG_NOERROR, 0)) 972 if (__glibc_unlikely (err != REG_NOERROR))
1037 return err; 973 return err;
1038 974
1039 /* The back-references which are in initial states can epsilon transit, 975 /* The back-references which are in initial states can epsilon transit,
@@ -1077,7 +1013,7 @@ create_initial_state (re_dfa_t *dfa)
1077 /* It must be the first time to invoke acquire_state. */ 1013 /* It must be the first time to invoke acquire_state. */
1078 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); 1014 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1079 /* We don't check ERR here, since the initial state must not be NULL. */ 1015 /* We don't check ERR here, since the initial state must not be NULL. */
1080 if (BE (dfa->init_state == NULL, 0)) 1016 if (__glibc_unlikely (dfa->init_state == NULL))
1081 return err; 1017 return err;
1082 if (dfa->init_state->has_constraint) 1018 if (dfa->init_state->has_constraint)
1083 { 1019 {
@@ -1089,8 +1025,9 @@ create_initial_state (re_dfa_t *dfa)
1089 &init_nodes, 1025 &init_nodes,
1090 CONTEXT_NEWLINE 1026 CONTEXT_NEWLINE
1091 | CONTEXT_BEGBUF); 1027 | CONTEXT_BEGBUF);
1092 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL 1028 if (__glibc_unlikely (dfa->init_state_word == NULL
1093 || dfa->init_state_begbuf == NULL, 0)) 1029 || dfa->init_state_nl == NULL
1030 || dfa->init_state_begbuf == NULL))
1094 return err; 1031 return err;
1095 } 1032 }
1096 else 1033 else
@@ -1101,7 +1038,6 @@ create_initial_state (re_dfa_t *dfa)
1101 return REG_NOERROR; 1038 return REG_NOERROR;
1102} 1039}
1103 1040
1104#ifdef RE_ENABLE_I18N
1105/* If it is possible to do searching in single byte encoding instead of UTF-8 1041/* If it is possible to do searching in single byte encoding instead of UTF-8
1106 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change 1042 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1107 DFA nodes where needed. */ 1043 DFA nodes where needed. */
@@ -1181,7 +1117,6 @@ optimize_utf8 (re_dfa_t *dfa)
1181 dfa->is_utf8 = 0; 1117 dfa->is_utf8 = 0;
1182 dfa->has_mb_node = dfa->nbackref > 0 || has_period; 1118 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1183} 1119}
1184#endif
1185 1120
1186/* Analyze the structure tree, and calculate "first", "next", "edest", 1121/* Analyze the structure tree, and calculate "first", "next", "edest",
1187 "eclosure", and "inveclosure". */ 1122 "eclosure", and "inveclosure". */
@@ -1197,8 +1132,8 @@ analyze (regex_t *preg)
1197 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); 1132 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1198 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); 1133 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1199 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); 1134 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1200 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL 1135 if (__glibc_unlikely (dfa->nexts == NULL || dfa->org_indices == NULL
1201 || dfa->eclosures == NULL, 0)) 1136 || dfa->edests == NULL || dfa->eclosures == NULL))
1202 return REG_ESPACE; 1137 return REG_ESPACE;
1203 1138
1204 dfa->subexp_map = re_malloc (Idx, preg->re_nsub); 1139 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
@@ -1213,23 +1148,23 @@ analyze (regex_t *preg)
1213 break; 1148 break;
1214 if (i == preg->re_nsub) 1149 if (i == preg->re_nsub)
1215 { 1150 {
1216 free (dfa->subexp_map); 1151 re_free (dfa->subexp_map);
1217 dfa->subexp_map = NULL; 1152 dfa->subexp_map = NULL;
1218 } 1153 }
1219 } 1154 }
1220 1155
1221 ret = postorder (dfa->str_tree, lower_subexps, preg); 1156 ret = postorder (dfa->str_tree, lower_subexps, preg);
1222 if (BE (ret != REG_NOERROR, 0)) 1157 if (__glibc_unlikely (ret != REG_NOERROR))
1223 return ret; 1158 return ret;
1224 ret = postorder (dfa->str_tree, calc_first, dfa); 1159 ret = postorder (dfa->str_tree, calc_first, dfa);
1225 if (BE (ret != REG_NOERROR, 0)) 1160 if (__glibc_unlikely (ret != REG_NOERROR))
1226 return ret; 1161 return ret;
1227 preorder (dfa->str_tree, calc_next, dfa); 1162 preorder (dfa->str_tree, calc_next, dfa);
1228 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); 1163 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1229 if (BE (ret != REG_NOERROR, 0)) 1164 if (__glibc_unlikely (ret != REG_NOERROR))
1230 return ret; 1165 return ret;
1231 ret = calc_eclosure (dfa); 1166 ret = calc_eclosure (dfa);
1232 if (BE (ret != REG_NOERROR, 0)) 1167 if (__glibc_unlikely (ret != REG_NOERROR))
1233 return ret; 1168 return ret;
1234 1169
1235 /* We only need this during the prune_impossible_nodes pass in regexec.c; 1170 /* We only need this during the prune_impossible_nodes pass in regexec.c;
@@ -1238,7 +1173,7 @@ analyze (regex_t *preg)
1238 || dfa->nbackref) 1173 || dfa->nbackref)
1239 { 1174 {
1240 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); 1175 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1241 if (BE (dfa->inveclosures == NULL, 0)) 1176 if (__glibc_unlikely (dfa->inveclosures == NULL))
1242 return REG_ESPACE; 1177 return REG_ESPACE;
1243 ret = calc_inveclosure (dfa); 1178 ret = calc_inveclosure (dfa);
1244 } 1179 }
@@ -1268,7 +1203,7 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1268 do 1203 do
1269 { 1204 {
1270 reg_errcode_t err = fn (extra, node); 1205 reg_errcode_t err = fn (extra, node);
1271 if (BE (err != REG_NOERROR, 0)) 1206 if (__glibc_unlikely (err != REG_NOERROR))
1272 return err; 1207 return err;
1273 if (node->parent == NULL) 1208 if (node->parent == NULL)
1274 return REG_NOERROR; 1209 return REG_NOERROR;
@@ -1290,7 +1225,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1290 for (node = root; ; ) 1225 for (node = root; ; )
1291 { 1226 {
1292 reg_errcode_t err = fn (extra, node); 1227 reg_errcode_t err = fn (extra, node);
1293 if (BE (err != REG_NOERROR, 0)) 1228 if (__glibc_unlikely (err != REG_NOERROR))
1294 return err; 1229 return err;
1295 1230
1296 /* Go to the left node, or up and to the right. */ 1231 /* Go to the left node, or up and to the right. */
@@ -1391,7 +1326,8 @@ lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1391 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); 1326 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1392 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; 1327 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1393 tree = create_tree (dfa, op, tree1, CONCAT); 1328 tree = create_tree (dfa, op, tree1, CONCAT);
1394 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) 1329 if (__glibc_unlikely (tree == NULL || tree1 == NULL
1330 || op == NULL || cls == NULL))
1395 { 1331 {
1396 *err = REG_ESPACE; 1332 *err = REG_ESPACE;
1397 return NULL; 1333 return NULL;
@@ -1417,7 +1353,7 @@ calc_first (void *extra, bin_tree_t *node)
1417 { 1353 {
1418 node->first = node; 1354 node->first = node;
1419 node->node_idx = re_dfa_add_node (dfa, node->token); 1355 node->node_idx = re_dfa_add_node (dfa, node->token);
1420 if (BE (node->node_idx == REG_MISSING, 0)) 1356 if (__glibc_unlikely (node->node_idx == -1))
1421 return REG_ESPACE; 1357 return REG_ESPACE;
1422 if (node->token.type == ANCHOR) 1358 if (node->token.type == ANCHOR)
1423 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; 1359 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
@@ -1462,7 +1398,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1462 break; 1398 break;
1463 1399
1464 case END_OF_RE: 1400 case END_OF_RE:
1465 assert (node->next == NULL); 1401 DEBUG_ASSERT (node->next == NULL);
1466 break; 1402 break;
1467 1403
1468 case OP_DUP_ASTERISK: 1404 case OP_DUP_ASTERISK:
@@ -1478,8 +1414,8 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1478 right = node->right->first->node_idx; 1414 right = node->right->first->node_idx;
1479 else 1415 else
1480 right = node->next->node_idx; 1416 right = node->next->node_idx;
1481 assert (REG_VALID_INDEX (left)); 1417 DEBUG_ASSERT (left > -1);
1482 assert (REG_VALID_INDEX (right)); 1418 DEBUG_ASSERT (right > -1);
1483 err = re_node_set_init_2 (dfa->edests + idx, left, right); 1419 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1484 } 1420 }
1485 break; 1421 break;
@@ -1497,7 +1433,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1497 break; 1433 break;
1498 1434
1499 default: 1435 default:
1500 assert (!IS_EPSILON_NODE (node->token.type)); 1436 DEBUG_ASSERT (!IS_EPSILON_NODE (node->token.type));
1501 dfa->nexts[idx] = node->next->node_idx; 1437 dfa->nexts[idx] = node->next->node_idx;
1502 break; 1438 break;
1503 } 1439 }
@@ -1510,7 +1446,6 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1510 to their own constraint. */ 1446 to their own constraint. */
1511 1447
1512static reg_errcode_t 1448static reg_errcode_t
1513internal_function
1514duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, 1449duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1515 Idx root_node, unsigned int init_constraint) 1450 Idx root_node, unsigned int init_constraint)
1516{ 1451{
@@ -1529,11 +1464,11 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1529 org_dest = dfa->nexts[org_node]; 1464 org_dest = dfa->nexts[org_node];
1530 re_node_set_empty (dfa->edests + clone_node); 1465 re_node_set_empty (dfa->edests + clone_node);
1531 clone_dest = duplicate_node (dfa, org_dest, constraint); 1466 clone_dest = duplicate_node (dfa, org_dest, constraint);
1532 if (BE (clone_dest == REG_MISSING, 0)) 1467 if (__glibc_unlikely (clone_dest == -1))
1533 return REG_ESPACE; 1468 return REG_ESPACE;
1534 dfa->nexts[clone_node] = dfa->nexts[org_node]; 1469 dfa->nexts[clone_node] = dfa->nexts[org_node];
1535 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1470 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1536 if (BE (! ok, 0)) 1471 if (__glibc_unlikely (! ok))
1537 return REG_ESPACE; 1472 return REG_ESPACE;
1538 } 1473 }
1539 else if (dfa->edests[org_node].nelem == 0) 1474 else if (dfa->edests[org_node].nelem == 0)
@@ -1555,17 +1490,17 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1555 if (org_node == root_node && clone_node != org_node) 1490 if (org_node == root_node && clone_node != org_node)
1556 { 1491 {
1557 ok = re_node_set_insert (dfa->edests + clone_node, org_dest); 1492 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
1558 if (BE (! ok, 0)) 1493 if (__glibc_unlikely (! ok))
1559 return REG_ESPACE; 1494 return REG_ESPACE;
1560 break; 1495 break;
1561 } 1496 }
1562 /* In case the node has another constraint, append it. */ 1497 /* In case the node has another constraint, append it. */
1563 constraint |= dfa->nodes[org_node].constraint; 1498 constraint |= dfa->nodes[org_node].constraint;
1564 clone_dest = duplicate_node (dfa, org_dest, constraint); 1499 clone_dest = duplicate_node (dfa, org_dest, constraint);
1565 if (BE (clone_dest == REG_MISSING, 0)) 1500 if (__glibc_unlikely (clone_dest == -1))
1566 return REG_ESPACE; 1501 return REG_ESPACE;
1567 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1502 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1568 if (BE (! ok, 0)) 1503 if (__glibc_unlikely (! ok))
1569 return REG_ESPACE; 1504 return REG_ESPACE;
1570 } 1505 }
1571 else /* dfa->edests[org_node].nelem == 2 */ 1506 else /* dfa->edests[org_node].nelem == 2 */
@@ -1576,19 +1511,19 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1576 re_node_set_empty (dfa->edests + clone_node); 1511 re_node_set_empty (dfa->edests + clone_node);
1577 /* Search for a duplicated node which satisfies the constraint. */ 1512 /* Search for a duplicated node which satisfies the constraint. */
1578 clone_dest = search_duplicated_node (dfa, org_dest, constraint); 1513 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1579 if (clone_dest == REG_MISSING) 1514 if (clone_dest == -1)
1580 { 1515 {
1581 /* There is no such duplicated node, create a new one. */ 1516 /* There is no such duplicated node, create a new one. */
1582 reg_errcode_t err; 1517 reg_errcode_t err;
1583 clone_dest = duplicate_node (dfa, org_dest, constraint); 1518 clone_dest = duplicate_node (dfa, org_dest, constraint);
1584 if (BE (clone_dest == REG_MISSING, 0)) 1519 if (__glibc_unlikely (clone_dest == -1))
1585 return REG_ESPACE; 1520 return REG_ESPACE;
1586 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1521 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1587 if (BE (! ok, 0)) 1522 if (__glibc_unlikely (! ok))
1588 return REG_ESPACE; 1523 return REG_ESPACE;
1589 err = duplicate_node_closure (dfa, org_dest, clone_dest, 1524 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1590 root_node, constraint); 1525 root_node, constraint);
1591 if (BE (err != REG_NOERROR, 0)) 1526 if (__glibc_unlikely (err != REG_NOERROR))
1592 return err; 1527 return err;
1593 } 1528 }
1594 else 1529 else
@@ -1596,16 +1531,16 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1596 /* There is a duplicated node which satisfies the constraint, 1531 /* There is a duplicated node which satisfies the constraint,
1597 use it to avoid infinite loop. */ 1532 use it to avoid infinite loop. */
1598 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1533 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1599 if (BE (! ok, 0)) 1534 if (__glibc_unlikely (! ok))
1600 return REG_ESPACE; 1535 return REG_ESPACE;
1601 } 1536 }
1602 1537
1603 org_dest = dfa->edests[org_node].elems[1]; 1538 org_dest = dfa->edests[org_node].elems[1];
1604 clone_dest = duplicate_node (dfa, org_dest, constraint); 1539 clone_dest = duplicate_node (dfa, org_dest, constraint);
1605 if (BE (clone_dest == REG_MISSING, 0)) 1540 if (__glibc_unlikely (clone_dest == -1))
1606 return REG_ESPACE; 1541 return REG_ESPACE;
1607 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1542 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1608 if (BE (! ok, 0)) 1543 if (__glibc_unlikely (! ok))
1609 return REG_ESPACE; 1544 return REG_ESPACE;
1610 } 1545 }
1611 org_node = org_dest; 1546 org_node = org_dest;
@@ -1628,18 +1563,18 @@ search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1628 && constraint == dfa->nodes[idx].constraint) 1563 && constraint == dfa->nodes[idx].constraint)
1629 return idx; /* Found. */ 1564 return idx; /* Found. */
1630 } 1565 }
1631 return REG_MISSING; /* Not found. */ 1566 return -1; /* Not found. */
1632} 1567}
1633 1568
1634/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. 1569/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1635 Return the index of the new node, or REG_MISSING if insufficient storage is 1570 Return the index of the new node, or -1 if insufficient storage is
1636 available. */ 1571 available. */
1637 1572
1638static Idx 1573static Idx
1639duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) 1574duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
1640{ 1575{
1641 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1576 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1642 if (BE (dup_idx != REG_MISSING, 1)) 1577 if (__glibc_likely (dup_idx != -1))
1643 { 1578 {
1644 dfa->nodes[dup_idx].constraint = constraint; 1579 dfa->nodes[dup_idx].constraint = constraint;
1645 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; 1580 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
@@ -1665,7 +1600,7 @@ calc_inveclosure (re_dfa_t *dfa)
1665 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) 1600 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1666 { 1601 {
1667 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); 1602 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1668 if (BE (! ok, 0)) 1603 if (__glibc_unlikely (! ok))
1669 return REG_ESPACE; 1604 return REG_ESPACE;
1670 } 1605 }
1671 } 1606 }
@@ -1680,9 +1615,7 @@ calc_eclosure (re_dfa_t *dfa)
1680{ 1615{
1681 Idx node_idx; 1616 Idx node_idx;
1682 bool incomplete; 1617 bool incomplete;
1683#ifdef DEBUG 1618 DEBUG_ASSERT (dfa->nodes_len > 0);
1684 assert (dfa->nodes_len > 0);
1685#endif
1686 incomplete = false; 1619 incomplete = false;
1687 /* For each nodes, calculate epsilon closure. */ 1620 /* For each nodes, calculate epsilon closure. */
1688 for (node_idx = 0; ; ++node_idx) 1621 for (node_idx = 0; ; ++node_idx)
@@ -1697,16 +1630,14 @@ calc_eclosure (re_dfa_t *dfa)
1697 node_idx = 0; 1630 node_idx = 0;
1698 } 1631 }
1699 1632
1700#ifdef DEBUG 1633 DEBUG_ASSERT (dfa->eclosures[node_idx].nelem != -1);
1701 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1702#endif
1703 1634
1704 /* If we have already calculated, skip it. */ 1635 /* If we have already calculated, skip it. */
1705 if (dfa->eclosures[node_idx].nelem != 0) 1636 if (dfa->eclosures[node_idx].nelem != 0)
1706 continue; 1637 continue;
1707 /* Calculate epsilon closure of 'node_idx'. */ 1638 /* Calculate epsilon closure of 'node_idx'. */
1708 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); 1639 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1709 if (BE (err != REG_NOERROR, 0)) 1640 if (__glibc_unlikely (err != REG_NOERROR))
1710 return err; 1641 return err;
1711 1642
1712 if (dfa->eclosures[node_idx].nelem == 0) 1643 if (dfa->eclosures[node_idx].nelem == 0)
@@ -1726,15 +1657,17 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1726 reg_errcode_t err; 1657 reg_errcode_t err;
1727 Idx i; 1658 Idx i;
1728 re_node_set eclosure; 1659 re_node_set eclosure;
1729 bool ok;
1730 bool incomplete = false; 1660 bool incomplete = false;
1731 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); 1661 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1732 if (BE (err != REG_NOERROR, 0)) 1662 if (__glibc_unlikely (err != REG_NOERROR))
1733 return err; 1663 return err;
1734 1664
1665 /* An epsilon closure includes itself. */
1666 eclosure.elems[eclosure.nelem++] = node;
1667
1735 /* This indicates that we are calculating this node now. 1668 /* This indicates that we are calculating this node now.
1736 We reference this value to avoid infinite loop. */ 1669 We reference this value to avoid infinite loop. */
1737 dfa->eclosures[node].nelem = REG_MISSING; 1670 dfa->eclosures[node].nelem = -1;
1738 1671
1739 /* If the current node has constraints, duplicate all nodes 1672 /* If the current node has constraints, duplicate all nodes
1740 since they must inherit the constraints. */ 1673 since they must inherit the constraints. */
@@ -1744,7 +1677,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1744 { 1677 {
1745 err = duplicate_node_closure (dfa, node, node, node, 1678 err = duplicate_node_closure (dfa, node, node, node,
1746 dfa->nodes[node].constraint); 1679 dfa->nodes[node].constraint);
1747 if (BE (err != REG_NOERROR, 0)) 1680 if (__glibc_unlikely (err != REG_NOERROR))
1748 return err; 1681 return err;
1749 } 1682 }
1750 1683
@@ -1756,7 +1689,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1756 Idx edest = dfa->edests[node].elems[i]; 1689 Idx edest = dfa->edests[node].elems[i];
1757 /* If calculating the epsilon closure of 'edest' is in progress, 1690 /* If calculating the epsilon closure of 'edest' is in progress,
1758 return intermediate result. */ 1691 return intermediate result. */
1759 if (dfa->eclosures[edest].nelem == REG_MISSING) 1692 if (dfa->eclosures[edest].nelem == -1)
1760 { 1693 {
1761 incomplete = true; 1694 incomplete = true;
1762 continue; 1695 continue;
@@ -1766,14 +1699,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1766 if (dfa->eclosures[edest].nelem == 0) 1699 if (dfa->eclosures[edest].nelem == 0)
1767 { 1700 {
1768 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); 1701 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1769 if (BE (err != REG_NOERROR, 0)) 1702 if (__glibc_unlikely (err != REG_NOERROR))
1770 return err; 1703 return err;
1771 } 1704 }
1772 else 1705 else
1773 eclosure_elem = dfa->eclosures[edest]; 1706 eclosure_elem = dfa->eclosures[edest];
1774 /* Merge the epsilon closure of 'edest'. */ 1707 /* Merge the epsilon closure of 'edest'. */
1775 err = re_node_set_merge (&eclosure, &eclosure_elem); 1708 err = re_node_set_merge (&eclosure, &eclosure_elem);
1776 if (BE (err != REG_NOERROR, 0)) 1709 if (__glibc_unlikely (err != REG_NOERROR))
1777 return err; 1710 return err;
1778 /* If the epsilon closure of 'edest' is incomplete, 1711 /* If the epsilon closure of 'edest' is incomplete,
1779 the epsilon closure of this node is also incomplete. */ 1712 the epsilon closure of this node is also incomplete. */
@@ -1784,10 +1717,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1784 } 1717 }
1785 } 1718 }
1786 1719
1787 /* An epsilon closure includes itself. */
1788 ok = re_node_set_insert (&eclosure, node);
1789 if (BE (! ok, 0))
1790 return REG_ESPACE;
1791 if (incomplete && !root) 1720 if (incomplete && !root)
1792 dfa->eclosures[node].nelem = 0; 1721 dfa->eclosures[node].nelem = 0;
1793 else 1722 else
@@ -1802,7 +1731,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1802 We must not use this function inside bracket expressions. */ 1731 We must not use this function inside bracket expressions. */
1803 1732
1804static void 1733static void
1805internal_function
1806fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) 1734fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1807{ 1735{
1808 re_string_skip_bytes (input, peek_token (result, input, syntax)); 1736 re_string_skip_bytes (input, peek_token (result, input, syntax));
@@ -1812,7 +1740,6 @@ fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1812 We must not use this function inside bracket expressions. */ 1740 We must not use this function inside bracket expressions. */
1813 1741
1814static int 1742static int
1815internal_function
1816peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1743peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1817{ 1744{
1818 unsigned char c; 1745 unsigned char c;
@@ -1827,16 +1754,14 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1827 token->opr.c = c; 1754 token->opr.c = c;
1828 1755
1829 token->word_char = 0; 1756 token->word_char = 0;
1830#ifdef RE_ENABLE_I18N
1831 token->mb_partial = 0; 1757 token->mb_partial = 0;
1832 if (input->mb_cur_max > 1 && 1758 if (input->mb_cur_max > 1
1833 !re_string_first_byte (input, re_string_cur_idx (input))) 1759 && !re_string_first_byte (input, re_string_cur_idx (input)))
1834 { 1760 {
1835 token->type = CHARACTER; 1761 token->type = CHARACTER;
1836 token->mb_partial = 1; 1762 token->mb_partial = 1;
1837 return 1; 1763 return 1;
1838 } 1764 }
1839#endif
1840 if (c == '\\') 1765 if (c == '\\')
1841 { 1766 {
1842 unsigned char c2; 1767 unsigned char c2;
@@ -1849,7 +1774,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1849 c2 = re_string_peek_byte_case (input, 1); 1774 c2 = re_string_peek_byte_case (input, 1);
1850 token->opr.c = c2; 1775 token->opr.c = c2;
1851 token->type = CHARACTER; 1776 token->type = CHARACTER;
1852#ifdef RE_ENABLE_I18N
1853 if (input->mb_cur_max > 1) 1777 if (input->mb_cur_max > 1)
1854 { 1778 {
1855 wint_t wc = re_string_wchar_at (input, 1779 wint_t wc = re_string_wchar_at (input,
@@ -1857,7 +1781,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1857 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1781 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1858 } 1782 }
1859 else 1783 else
1860#endif
1861 token->word_char = IS_WORD_CHAR (c2) != 0; 1784 token->word_char = IS_WORD_CHAR (c2) != 0;
1862 1785
1863 switch (c2) 1786 switch (c2)
@@ -1963,14 +1886,12 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1963 } 1886 }
1964 1887
1965 token->type = CHARACTER; 1888 token->type = CHARACTER;
1966#ifdef RE_ENABLE_I18N
1967 if (input->mb_cur_max > 1) 1889 if (input->mb_cur_max > 1)
1968 { 1890 {
1969 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); 1891 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1970 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1892 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1971 } 1893 }
1972 else 1894 else
1973#endif
1974 token->word_char = IS_WORD_CHAR (token->opr.c); 1895 token->word_char = IS_WORD_CHAR (token->opr.c);
1975 1896
1976 switch (c) 1897 switch (c)
@@ -2017,8 +1938,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2017 token->type = OP_PERIOD; 1938 token->type = OP_PERIOD;
2018 break; 1939 break;
2019 case '^': 1940 case '^':
2020 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && 1941 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE))
2021 re_string_cur_idx (input) != 0) 1942 && re_string_cur_idx (input) != 0)
2022 { 1943 {
2023 char prev = re_string_peek_byte (input, -1); 1944 char prev = re_string_peek_byte (input, -1);
2024 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') 1945 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
@@ -2028,8 +1949,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2028 token->opr.ctx_type = LINE_FIRST; 1949 token->opr.ctx_type = LINE_FIRST;
2029 break; 1950 break;
2030 case '$': 1951 case '$':
2031 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && 1952 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS)
2032 re_string_cur_idx (input) + 1 != re_string_length (input)) 1953 && re_string_cur_idx (input) + 1 != re_string_length (input))
2033 { 1954 {
2034 re_token_t next; 1955 re_token_t next;
2035 re_string_skip_bytes (input, 1); 1956 re_string_skip_bytes (input, 1);
@@ -2051,7 +1972,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2051 We must not use this function out of bracket expressions. */ 1972 We must not use this function out of bracket expressions. */
2052 1973
2053static int 1974static int
2054internal_function
2055peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1975peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2056{ 1976{
2057 unsigned char c; 1977 unsigned char c;
@@ -2063,14 +1983,12 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2063 c = re_string_peek_byte (input, 0); 1983 c = re_string_peek_byte (input, 0);
2064 token->opr.c = c; 1984 token->opr.c = c;
2065 1985
2066#ifdef RE_ENABLE_I18N 1986 if (input->mb_cur_max > 1
2067 if (input->mb_cur_max > 1 && 1987 && !re_string_first_byte (input, re_string_cur_idx (input)))
2068 !re_string_first_byte (input, re_string_cur_idx (input)))
2069 { 1988 {
2070 token->type = CHARACTER; 1989 token->type = CHARACTER;
2071 return 1; 1990 return 1;
2072 } 1991 }
2073#endif /* RE_ENABLE_I18N */
2074 1992
2075 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) 1993 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2076 && re_string_cur_idx (input) + 1 < re_string_length (input)) 1994 && re_string_cur_idx (input) + 1 < re_string_length (input))
@@ -2098,16 +2016,18 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2098 case '.': 2016 case '.':
2099 token->type = OP_OPEN_COLL_ELEM; 2017 token->type = OP_OPEN_COLL_ELEM;
2100 break; 2018 break;
2019
2101 case '=': 2020 case '=':
2102 token->type = OP_OPEN_EQUIV_CLASS; 2021 token->type = OP_OPEN_EQUIV_CLASS;
2103 break; 2022 break;
2023
2104 case ':': 2024 case ':':
2105 if (syntax & RE_CHAR_CLASSES) 2025 if (syntax & RE_CHAR_CLASSES)
2106 { 2026 {
2107 token->type = OP_OPEN_CHAR_CLASS; 2027 token->type = OP_OPEN_CHAR_CLASS;
2108 break; 2028 break;
2109 } 2029 }
2110 /* else fall through. */ 2030 FALLTHROUGH;
2111 default: 2031 default:
2112 token->type = CHARACTER; 2032 token->type = CHARACTER;
2113 token->opr.c = c; 2033 token->opr.c = c;
@@ -2118,15 +2038,25 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2118 } 2038 }
2119 switch (c) 2039 switch (c)
2120 { 2040 {
2121 case '-':
2122 token->type = OP_CHARSET_RANGE;
2123 break;
2124 case ']': 2041 case ']':
2125 token->type = OP_CLOSE_BRACKET; 2042 token->type = OP_CLOSE_BRACKET;
2126 break; 2043 break;
2127 case '^': 2044 case '^':
2128 token->type = OP_NON_MATCH_LIST; 2045 token->type = OP_NON_MATCH_LIST;
2129 break; 2046 break;
2047 case '-':
2048 /* In V7 Unix grep and Unix awk and mawk, [...---...]
2049 (3 adjacent minus signs) stands for a single minus sign.
2050 Support that without breaking anything else. */
2051 if (! (re_string_cur_idx (input) + 2 < re_string_length (input)
2052 && re_string_peek_byte (input, 1) == '-'
2053 && re_string_peek_byte (input, 2) == '-'))
2054 {
2055 token->type = OP_CHARSET_RANGE;
2056 break;
2057 }
2058 re_string_skip_bytes (input, 2);
2059 FALLTHROUGH;
2130 default: 2060 default:
2131 token->type = CHARACTER; 2061 token->type = CHARACTER;
2132 } 2062 }
@@ -2157,14 +2087,14 @@ parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2157 dfa->syntax = syntax; 2087 dfa->syntax = syntax;
2158 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2088 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2159 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err); 2089 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2160 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2090 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2161 return NULL; 2091 return NULL;
2162 eor = create_tree (dfa, NULL, NULL, END_OF_RE); 2092 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2163 if (tree != NULL) 2093 if (tree != NULL)
2164 root = create_tree (dfa, tree, eor, CONCAT); 2094 root = create_tree (dfa, tree, eor, CONCAT);
2165 else 2095 else
2166 root = eor; 2096 root = eor;
2167 if (BE (eor == NULL || root == NULL, 0)) 2097 if (__glibc_unlikely (eor == NULL || root == NULL))
2168 { 2098 {
2169 *err = REG_ESPACE; 2099 *err = REG_ESPACE;
2170 return NULL; 2100 return NULL;
@@ -2187,8 +2117,9 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2187{ 2117{
2188 re_dfa_t *dfa = preg->buffer; 2118 re_dfa_t *dfa = preg->buffer;
2189 bin_tree_t *tree, *branch = NULL; 2119 bin_tree_t *tree, *branch = NULL;
2120 bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
2190 tree = parse_branch (regexp, preg, token, syntax, nest, err); 2121 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2191 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2122 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2192 return NULL; 2123 return NULL;
2193 2124
2194 while (token->type == OP_ALT) 2125 while (token->type == OP_ALT)
@@ -2197,14 +2128,21 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2197 if (token->type != OP_ALT && token->type != END_OF_RE 2128 if (token->type != OP_ALT && token->type != END_OF_RE
2198 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2129 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2199 { 2130 {
2131 bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
2132 dfa->completed_bkref_map = initial_bkref_map;
2200 branch = parse_branch (regexp, preg, token, syntax, nest, err); 2133 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2201 if (BE (*err != REG_NOERROR && branch == NULL, 0)) 2134 if (__glibc_unlikely (*err != REG_NOERROR && branch == NULL))
2202 return NULL; 2135 {
2136 if (tree != NULL)
2137 postorder (tree, free_tree, NULL);
2138 return NULL;
2139 }
2140 dfa->completed_bkref_map |= accumulated_bkref_map;
2203 } 2141 }
2204 else 2142 else
2205 branch = NULL; 2143 branch = NULL;
2206 tree = create_tree (dfa, tree, branch, OP_ALT); 2144 tree = create_tree (dfa, tree, branch, OP_ALT);
2207 if (BE (tree == NULL, 0)) 2145 if (__glibc_unlikely (tree == NULL))
2208 { 2146 {
2209 *err = REG_ESPACE; 2147 *err = REG_ESPACE;
2210 return NULL; 2148 return NULL;
@@ -2229,14 +2167,14 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2229 bin_tree_t *tree, *expr; 2167 bin_tree_t *tree, *expr;
2230 re_dfa_t *dfa = preg->buffer; 2168 re_dfa_t *dfa = preg->buffer;
2231 tree = parse_expression (regexp, preg, token, syntax, nest, err); 2169 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2232 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2170 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2233 return NULL; 2171 return NULL;
2234 2172
2235 while (token->type != OP_ALT && token->type != END_OF_RE 2173 while (token->type != OP_ALT && token->type != END_OF_RE
2236 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2174 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2237 { 2175 {
2238 expr = parse_expression (regexp, preg, token, syntax, nest, err); 2176 expr = parse_expression (regexp, preg, token, syntax, nest, err);
2239 if (BE (*err != REG_NOERROR && expr == NULL, 0)) 2177 if (__glibc_unlikely (*err != REG_NOERROR && expr == NULL))
2240 { 2178 {
2241 if (tree != NULL) 2179 if (tree != NULL)
2242 postorder (tree, free_tree, NULL); 2180 postorder (tree, free_tree, NULL);
@@ -2277,12 +2215,11 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2277 { 2215 {
2278 case CHARACTER: 2216 case CHARACTER:
2279 tree = create_token_tree (dfa, NULL, NULL, token); 2217 tree = create_token_tree (dfa, NULL, NULL, token);
2280 if (BE (tree == NULL, 0)) 2218 if (__glibc_unlikely (tree == NULL))
2281 { 2219 {
2282 *err = REG_ESPACE; 2220 *err = REG_ESPACE;
2283 return NULL; 2221 return NULL;
2284 } 2222 }
2285#ifdef RE_ENABLE_I18N
2286 if (dfa->mb_cur_max > 1) 2223 if (dfa->mb_cur_max > 1)
2287 { 2224 {
2288 while (!re_string_eoi (regexp) 2225 while (!re_string_eoi (regexp)
@@ -2292,34 +2229,36 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2292 fetch_token (token, regexp, syntax); 2229 fetch_token (token, regexp, syntax);
2293 mbc_remain = create_token_tree (dfa, NULL, NULL, token); 2230 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2294 tree = create_tree (dfa, tree, mbc_remain, CONCAT); 2231 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2295 if (BE (mbc_remain == NULL || tree == NULL, 0)) 2232 if (__glibc_unlikely (mbc_remain == NULL || tree == NULL))
2296 { 2233 {
2297 *err = REG_ESPACE; 2234 *err = REG_ESPACE;
2298 return NULL; 2235 return NULL;
2299 } 2236 }
2300 } 2237 }
2301 } 2238 }
2302#endif
2303 break; 2239 break;
2240
2304 case OP_OPEN_SUBEXP: 2241 case OP_OPEN_SUBEXP:
2305 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); 2242 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2306 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2243 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2307 return NULL; 2244 return NULL;
2308 break; 2245 break;
2246
2309 case OP_OPEN_BRACKET: 2247 case OP_OPEN_BRACKET:
2310 tree = parse_bracket_exp (regexp, dfa, token, syntax, err); 2248 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2311 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2249 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2312 return NULL; 2250 return NULL;
2313 break; 2251 break;
2252
2314 case OP_BACK_REF: 2253 case OP_BACK_REF:
2315 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) 2254 if (!__glibc_likely (dfa->completed_bkref_map & (1 << token->opr.idx)))
2316 { 2255 {
2317 *err = REG_ESUBREG; 2256 *err = REG_ESUBREG;
2318 return NULL; 2257 return NULL;
2319 } 2258 }
2320 dfa->used_bkref_map |= 1 << token->opr.idx; 2259 dfa->used_bkref_map |= 1 << token->opr.idx;
2321 tree = create_token_tree (dfa, NULL, NULL, token); 2260 tree = create_token_tree (dfa, NULL, NULL, token);
2322 if (BE (tree == NULL, 0)) 2261 if (__glibc_unlikely (tree == NULL))
2323 { 2262 {
2324 *err = REG_ESPACE; 2263 *err = REG_ESPACE;
2325 return NULL; 2264 return NULL;
@@ -2327,13 +2266,14 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2327 ++dfa->nbackref; 2266 ++dfa->nbackref;
2328 dfa->has_mb_node = 1; 2267 dfa->has_mb_node = 1;
2329 break; 2268 break;
2269
2330 case OP_OPEN_DUP_NUM: 2270 case OP_OPEN_DUP_NUM:
2331 if (syntax & RE_CONTEXT_INVALID_DUP) 2271 if (syntax & RE_CONTEXT_INVALID_DUP)
2332 { 2272 {
2333 *err = REG_BADRPT; 2273 *err = REG_BADRPT;
2334 return NULL; 2274 return NULL;
2335 } 2275 }
2336 /* FALLTHROUGH */ 2276 FALLTHROUGH;
2337 case OP_DUP_ASTERISK: 2277 case OP_DUP_ASTERISK:
2338 case OP_DUP_PLUS: 2278 case OP_DUP_PLUS:
2339 case OP_DUP_QUESTION: 2279 case OP_DUP_QUESTION:
@@ -2347,15 +2287,15 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2347 fetch_token (token, regexp, syntax); 2287 fetch_token (token, regexp, syntax);
2348 return parse_expression (regexp, preg, token, syntax, nest, err); 2288 return parse_expression (regexp, preg, token, syntax, nest, err);
2349 } 2289 }
2350 /* else fall through */ 2290 FALLTHROUGH;
2351 case OP_CLOSE_SUBEXP: 2291 case OP_CLOSE_SUBEXP:
2352 if ((token->type == OP_CLOSE_SUBEXP) && 2292 if ((token->type == OP_CLOSE_SUBEXP)
2353 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) 2293 && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2354 { 2294 {
2355 *err = REG_ERPAREN; 2295 *err = REG_ERPAREN;
2356 return NULL; 2296 return NULL;
2357 } 2297 }
2358 /* else fall through */ 2298 FALLTHROUGH;
2359 case OP_CLOSE_DUP_NUM: 2299 case OP_CLOSE_DUP_NUM:
2360 /* We treat it as a normal character. */ 2300 /* We treat it as a normal character. */
2361 2301
@@ -2364,12 +2304,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2364 /* mb_partial and word_char bits should be initialized already 2304 /* mb_partial and word_char bits should be initialized already
2365 by peek_token. */ 2305 by peek_token. */
2366 tree = create_token_tree (dfa, NULL, NULL, token); 2306 tree = create_token_tree (dfa, NULL, NULL, token);
2367 if (BE (tree == NULL, 0)) 2307 if (__glibc_unlikely (tree == NULL))
2368 { 2308 {
2369 *err = REG_ESPACE; 2309 *err = REG_ESPACE;
2370 return NULL; 2310 return NULL;
2371 } 2311 }
2372 break; 2312 break;
2313
2373 case ANCHOR: 2314 case ANCHOR:
2374 if ((token->opr.ctx_type 2315 if ((token->opr.ctx_type
2375 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) 2316 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
@@ -2393,7 +2334,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2393 } 2334 }
2394 tree_last = create_token_tree (dfa, NULL, NULL, token); 2335 tree_last = create_token_tree (dfa, NULL, NULL, token);
2395 tree = create_tree (dfa, tree_first, tree_last, OP_ALT); 2336 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2396 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) 2337 if (__glibc_unlikely (tree_first == NULL || tree_last == NULL
2338 || tree == NULL))
2397 { 2339 {
2398 *err = REG_ESPACE; 2340 *err = REG_ESPACE;
2399 return NULL; 2341 return NULL;
@@ -2402,7 +2344,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2402 else 2344 else
2403 { 2345 {
2404 tree = create_token_tree (dfa, NULL, NULL, token); 2346 tree = create_token_tree (dfa, NULL, NULL, token);
2405 if (BE (tree == NULL, 0)) 2347 if (__glibc_unlikely (tree == NULL))
2406 { 2348 {
2407 *err = REG_ESPACE; 2349 *err = REG_ESPACE;
2408 return NULL; 2350 return NULL;
@@ -2414,9 +2356,10 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2414 it must not be "<ANCHOR(^)><REPEAT(*)>". */ 2356 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2415 fetch_token (token, regexp, syntax); 2357 fetch_token (token, regexp, syntax);
2416 return tree; 2358 return tree;
2359
2417 case OP_PERIOD: 2360 case OP_PERIOD:
2418 tree = create_token_tree (dfa, NULL, NULL, token); 2361 tree = create_token_tree (dfa, NULL, NULL, token);
2419 if (BE (tree == NULL, 0)) 2362 if (__glibc_unlikely (tree == NULL))
2420 { 2363 {
2421 *err = REG_ESPACE; 2364 *err = REG_ESPACE;
2422 return NULL; 2365 return NULL;
@@ -2424,35 +2367,38 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2424 if (dfa->mb_cur_max > 1) 2367 if (dfa->mb_cur_max > 1)
2425 dfa->has_mb_node = 1; 2368 dfa->has_mb_node = 1;
2426 break; 2369 break;
2370
2427 case OP_WORD: 2371 case OP_WORD:
2428 case OP_NOTWORD: 2372 case OP_NOTWORD:
2429 tree = build_charclass_op (dfa, regexp->trans, 2373 tree = build_charclass_op (dfa, regexp->trans,
2430 "alnum", 2374 "alnum",
2431 "_", 2375 "_",
2432 token->type == OP_NOTWORD, err); 2376 token->type == OP_NOTWORD, err);
2433 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2377 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2434 return NULL; 2378 return NULL;
2435 break; 2379 break;
2380
2436 case OP_SPACE: 2381 case OP_SPACE:
2437 case OP_NOTSPACE: 2382 case OP_NOTSPACE:
2438 tree = build_charclass_op (dfa, regexp->trans, 2383 tree = build_charclass_op (dfa, regexp->trans,
2439 "space", 2384 "space",
2440 "", 2385 "",
2441 token->type == OP_NOTSPACE, err); 2386 token->type == OP_NOTSPACE, err);
2442 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2387 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2443 return NULL; 2388 return NULL;
2444 break; 2389 break;
2390
2445 case OP_ALT: 2391 case OP_ALT:
2446 case END_OF_RE: 2392 case END_OF_RE:
2447 return NULL; 2393 return NULL;
2394
2448 case BACK_SLASH: 2395 case BACK_SLASH:
2449 *err = REG_EESCAPE; 2396 *err = REG_EESCAPE;
2450 return NULL; 2397 return NULL;
2398
2451 default: 2399 default:
2452 /* Must not happen? */ 2400 /* Must not happen? */
2453#ifdef DEBUG 2401 DEBUG_ASSERT (false);
2454 assert (0);
2455#endif
2456 return NULL; 2402 return NULL;
2457 } 2403 }
2458 fetch_token (token, regexp, syntax); 2404 fetch_token (token, regexp, syntax);
@@ -2460,14 +2406,22 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2460 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS 2406 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2461 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) 2407 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2462 { 2408 {
2463 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); 2409 bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
2464 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2410 syntax, err);
2465 return NULL; 2411 if (__glibc_unlikely (*err != REG_NOERROR && dup_tree == NULL))
2412 {
2413 if (tree != NULL)
2414 postorder (tree, free_tree, NULL);
2415 return NULL;
2416 }
2417 tree = dup_tree;
2466 /* In BRE consecutive duplications are not allowed. */ 2418 /* In BRE consecutive duplications are not allowed. */
2467 if ((syntax & RE_CONTEXT_INVALID_DUP) 2419 if ((syntax & RE_CONTEXT_INVALID_DUP)
2468 && (token->type == OP_DUP_ASTERISK 2420 && (token->type == OP_DUP_ASTERISK
2469 || token->type == OP_OPEN_DUP_NUM)) 2421 || token->type == OP_OPEN_DUP_NUM))
2470 { 2422 {
2423 if (tree != NULL)
2424 postorder (tree, free_tree, NULL);
2471 *err = REG_BADRPT; 2425 *err = REG_BADRPT;
2472 return NULL; 2426 return NULL;
2473 } 2427 }
@@ -2500,13 +2454,14 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2500 else 2454 else
2501 { 2455 {
2502 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); 2456 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2503 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) 2457 if (__glibc_unlikely (*err == REG_NOERROR
2458 && token->type != OP_CLOSE_SUBEXP))
2504 { 2459 {
2505 if (tree != NULL) 2460 if (tree != NULL)
2506 postorder (tree, free_tree, NULL); 2461 postorder (tree, free_tree, NULL);
2507 *err = REG_EPAREN; 2462 *err = REG_EPAREN;
2508 } 2463 }
2509 if (BE (*err != REG_NOERROR, 0)) 2464 if (__glibc_unlikely (*err != REG_NOERROR))
2510 return NULL; 2465 return NULL;
2511 } 2466 }
2512 2467
@@ -2514,7 +2469,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2514 dfa->completed_bkref_map |= 1 << cur_nsub; 2469 dfa->completed_bkref_map |= 1 << cur_nsub;
2515 2470
2516 tree = create_tree (dfa, tree, NULL, SUBEXP); 2471 tree = create_tree (dfa, tree, NULL, SUBEXP);
2517 if (BE (tree == NULL, 0)) 2472 if (__glibc_unlikely (tree == NULL))
2518 { 2473 {
2519 *err = REG_ESPACE; 2474 *err = REG_ESPACE;
2520 return NULL; 2475 return NULL;
@@ -2537,7 +2492,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2537 { 2492 {
2538 end = 0; 2493 end = 0;
2539 start = fetch_number (regexp, token, syntax); 2494 start = fetch_number (regexp, token, syntax);
2540 if (start == REG_MISSING) 2495 if (start == -1)
2541 { 2496 {
2542 if (token->type == CHARACTER && token->opr.c == ',') 2497 if (token->type == CHARACTER && token->opr.c == ',')
2543 start = 0; /* We treat "{,m}" as "{0,m}". */ 2498 start = 0; /* We treat "{,m}" as "{0,m}". */
@@ -2547,17 +2502,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2547 return NULL; 2502 return NULL;
2548 } 2503 }
2549 } 2504 }
2550 if (BE (start != REG_ERROR, 1)) 2505 if (__glibc_likely (start != -2))
2551 { 2506 {
2552 /* We treat "{n}" as "{n,n}". */ 2507 /* We treat "{n}" as "{n,n}". */
2553 end = ((token->type == OP_CLOSE_DUP_NUM) ? start 2508 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2554 : ((token->type == CHARACTER && token->opr.c == ',') 2509 : ((token->type == CHARACTER && token->opr.c == ',')
2555 ? fetch_number (regexp, token, syntax) : REG_ERROR)); 2510 ? fetch_number (regexp, token, syntax) : -2));
2556 } 2511 }
2557 if (BE (start == REG_ERROR || end == REG_ERROR, 0)) 2512 if (__glibc_unlikely (start == -2 || end == -2))
2558 { 2513 {
2559 /* Invalid sequence. */ 2514 /* Invalid sequence. */
2560 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) 2515 if (__glibc_unlikely (!(syntax & RE_INVALID_INTERVAL_ORD)))
2561 { 2516 {
2562 if (token->type == END_OF_RE) 2517 if (token->type == END_OF_RE)
2563 *err = REG_EBRACE; 2518 *err = REG_EBRACE;
@@ -2576,15 +2531,15 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2576 return elem; 2531 return elem;
2577 } 2532 }
2578 2533
2579 if (BE ((end != REG_MISSING && start > end) 2534 if (__glibc_unlikely ((end != -1 && start > end)
2580 || token->type != OP_CLOSE_DUP_NUM, 0)) 2535 || token->type != OP_CLOSE_DUP_NUM))
2581 { 2536 {
2582 /* First number greater than second. */ 2537 /* First number greater than second. */
2583 *err = REG_BADBR; 2538 *err = REG_BADBR;
2584 return NULL; 2539 return NULL;
2585 } 2540 }
2586 2541
2587 if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0)) 2542 if (__glibc_unlikely (RE_DUP_MAX < (end == -1 ? start : end)))
2588 { 2543 {
2589 *err = REG_ESIZE; 2544 *err = REG_ESIZE;
2590 return NULL; 2545 return NULL;
@@ -2593,28 +2548,28 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2593 else 2548 else
2594 { 2549 {
2595 start = (token->type == OP_DUP_PLUS) ? 1 : 0; 2550 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2596 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING; 2551 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2597 } 2552 }
2598 2553
2599 fetch_token (token, regexp, syntax); 2554 fetch_token (token, regexp, syntax);
2600 2555
2601 if (BE (elem == NULL, 0)) 2556 if (__glibc_unlikely (elem == NULL))
2602 return NULL; 2557 return NULL;
2603 if (BE (start == 0 && end == 0, 0)) 2558 if (__glibc_unlikely (start == 0 && end == 0))
2604 { 2559 {
2605 postorder (elem, free_tree, NULL); 2560 postorder (elem, free_tree, NULL);
2606 return NULL; 2561 return NULL;
2607 } 2562 }
2608 2563
2609 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ 2564 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2610 if (BE (start > 0, 0)) 2565 if (__glibc_unlikely (start > 0))
2611 { 2566 {
2612 tree = elem; 2567 tree = elem;
2613 for (i = 2; i <= start; ++i) 2568 for (i = 2; i <= start; ++i)
2614 { 2569 {
2615 elem = duplicate_tree (elem, dfa); 2570 elem = duplicate_tree (elem, dfa);
2616 tree = create_tree (dfa, tree, elem, CONCAT); 2571 tree = create_tree (dfa, tree, elem, CONCAT);
2617 if (BE (elem == NULL || tree == NULL, 0)) 2572 if (__glibc_unlikely (elem == NULL || tree == NULL))
2618 goto parse_dup_op_espace; 2573 goto parse_dup_op_espace;
2619 } 2574 }
2620 2575
@@ -2623,6 +2578,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2623 2578
2624 /* Duplicate ELEM before it is marked optional. */ 2579 /* Duplicate ELEM before it is marked optional. */
2625 elem = duplicate_tree (elem, dfa); 2580 elem = duplicate_tree (elem, dfa);
2581 if (__glibc_unlikely (elem == NULL))
2582 goto parse_dup_op_espace;
2626 old_tree = tree; 2583 old_tree = tree;
2627 } 2584 }
2628 else 2585 else
@@ -2635,27 +2592,23 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2635 } 2592 }
2636 2593
2637 tree = create_tree (dfa, elem, NULL, 2594 tree = create_tree (dfa, elem, NULL,
2638 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); 2595 (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2639 if (BE (tree == NULL, 0)) 2596 if (__glibc_unlikely (tree == NULL))
2640 goto parse_dup_op_espace; 2597 goto parse_dup_op_espace;
2641 2598
2642/* From gnulib's "intprops.h": 2599 /* This loop is actually executed only when end != -1,
2643 True if the arithmetic type T is signed. */
2644#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
2645
2646 /* This loop is actually executed only when end != REG_MISSING,
2647 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have 2600 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2648 already created the start+1-th copy. */ 2601 already created the start+1-th copy. */
2649 if (TYPE_SIGNED (Idx) || end != REG_MISSING) 2602 if (TYPE_SIGNED (Idx) || end != -1)
2650 for (i = start + 2; i <= end; ++i) 2603 for (i = start + 2; i <= end; ++i)
2651 { 2604 {
2652 elem = duplicate_tree (elem, dfa); 2605 elem = duplicate_tree (elem, dfa);
2653 tree = create_tree (dfa, tree, elem, CONCAT); 2606 tree = create_tree (dfa, tree, elem, CONCAT);
2654 if (BE (elem == NULL || tree == NULL, 0)) 2607 if (__glibc_unlikely (elem == NULL || tree == NULL))
2655 goto parse_dup_op_espace; 2608 goto parse_dup_op_espace;
2656 2609
2657 tree = create_tree (dfa, tree, NULL, OP_ALT); 2610 tree = create_tree (dfa, tree, NULL, OP_ALT);
2658 if (BE (tree == NULL, 0)) 2611 if (__glibc_unlikely (tree == NULL))
2659 goto parse_dup_op_espace; 2612 goto parse_dup_op_espace;
2660 } 2613 }
2661 2614
@@ -2674,146 +2627,131 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2674#define BRACKET_NAME_BUF_SIZE 32 2627#define BRACKET_NAME_BUF_SIZE 32
2675 2628
2676#ifndef _LIBC 2629#ifndef _LIBC
2677 /* Local function for parse_bracket_exp only used in case of NOT _LIBC. 2630
2678 Build the range expression which starts from START_ELEM, and ends 2631/* Convert the byte B to the corresponding wide character. In a
2679 at END_ELEM. The result are written to MBCSET and SBCSET. 2632 unibyte locale, treat B as itself. In a multibyte locale, return
2680 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2633 WEOF if B is an encoding error. */
2681 mbcset->range_ends, is a pointer argument since we may 2634static wint_t
2682 update it. */ 2635parse_byte (unsigned char b, re_dfa_t const *dfa)
2636{
2637 return dfa->mb_cur_max > 1 ? __btowc (b) : b;
2638}
2639
2640/* Local function for parse_bracket_exp used in _LIBC environment.
2641 Build the range expression which starts from START_ELEM, and ends
2642 at END_ELEM. The result are written to MBCSET and SBCSET.
2643 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2644 mbcset->range_ends, is a pointer argument since we may
2645 update it. */
2683 2646
2684static reg_errcode_t 2647static reg_errcode_t
2685internal_function 2648build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
2686# ifdef RE_ENABLE_I18N 2649 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
2687build_range_exp (const reg_syntax_t syntax, 2650 re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules,
2688 bitset_t sbcset, 2651 const unsigned char *collseqmb, const char *collseqwc,
2689 re_charset_t *mbcset, 2652 int_fast32_t table_size, const void *symb_table,
2690 Idx *range_alloc, 2653 const unsigned char *extra)
2691 const bracket_elem_t *start_elem,
2692 const bracket_elem_t *end_elem)
2693# else /* not RE_ENABLE_I18N */
2694build_range_exp (const reg_syntax_t syntax,
2695 bitset_t sbcset,
2696 const bracket_elem_t *start_elem,
2697 const bracket_elem_t *end_elem)
2698# endif /* not RE_ENABLE_I18N */
2699{ 2654{
2700 unsigned int start_ch, end_ch;
2701 /* Equivalence Classes and Character Classes can't be a range start/end. */ 2655 /* Equivalence Classes and Character Classes can't be a range start/end. */
2702 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2656 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2703 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2657 || start_elem->type == CHAR_CLASS
2704 0)) 2658 || end_elem->type == EQUIV_CLASS
2659 || end_elem->type == CHAR_CLASS))
2705 return REG_ERANGE; 2660 return REG_ERANGE;
2706 2661
2707 /* We can handle no multi character collating elements without libc 2662 /* We can handle no multi character collating elements without libc
2708 support. */ 2663 support. */
2709 if (BE ((start_elem->type == COLL_SYM 2664 if (__glibc_unlikely ((start_elem->type == COLL_SYM
2710 && strlen ((char *) start_elem->opr.name) > 1) 2665 && strlen ((char *) start_elem->opr.name) > 1)
2711 || (end_elem->type == COLL_SYM 2666 || (end_elem->type == COLL_SYM
2712 && strlen ((char *) end_elem->opr.name) > 1), 0)) 2667 && strlen ((char *) end_elem->opr.name) > 1)))
2713 return REG_ECOLLATE; 2668 return REG_ECOLLATE;
2714 2669
2715# ifdef RE_ENABLE_I18N 2670 unsigned int
2716 {
2717 wchar_t wc;
2718 wint_t start_wc;
2719 wint_t end_wc;
2720
2721 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch 2671 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2722 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2672 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2723 : 0)); 2673 : 0)),
2724 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch 2674 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2725 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2675 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2726 : 0)); 2676 : 0));
2677 wint_t
2727 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) 2678 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2728 ? __btowc (start_ch) : start_elem->opr.wch); 2679 ? parse_byte (start_ch, dfa) : start_elem->opr.wch),
2729 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) 2680 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2730 ? __btowc (end_ch) : end_elem->opr.wch); 2681 ? parse_byte (end_ch, dfa) : end_elem->opr.wch);
2731 if (start_wc == WEOF || end_wc == WEOF)
2732 return REG_ECOLLATE;
2733 else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
2734 return REG_ERANGE;
2735
2736 /* Got valid collation sequence values, add them as a new entry.
2737 However, for !_LIBC we have no collation elements: if the
2738 character set is single byte, the single byte character set
2739 that we build below suffices. parse_bracket_exp passes
2740 no MBCSET if dfa->mb_cur_max == 1. */
2741 if (mbcset)
2742 {
2743 /* Check the space of the arrays. */
2744 if (BE (*range_alloc == mbcset->nranges, 0))
2745 {
2746 /* There is not enough space, need realloc. */
2747 wchar_t *new_array_start, *new_array_end;
2748 Idx new_nranges;
2749
2750 /* +1 in case of mbcset->nranges is 0. */
2751 new_nranges = 2 * mbcset->nranges + 1;
2752 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2753 are NULL if *range_alloc == 0. */
2754 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2755 new_nranges);
2756 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2757 new_nranges);
2758 2682
2759 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2683 if (start_wc == WEOF || end_wc == WEOF)
2760 return REG_ESPACE; 2684 return REG_ECOLLATE;
2685 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2686 && start_wc > end_wc))
2687 return REG_ERANGE;
2761 2688
2762 mbcset->range_starts = new_array_start; 2689 /* Got valid collation sequence values, add them as a new entry.
2763 mbcset->range_ends = new_array_end; 2690 However, for !_LIBC we have no collation elements: if the
2764 *range_alloc = new_nranges; 2691 character set is single byte, the single byte character set
2765 } 2692 that we build below suffices. parse_bracket_exp passes
2693 no MBCSET if dfa->mb_cur_max == 1. */
2694 if (dfa->mb_cur_max > 1)
2695 {
2696 /* Check the space of the arrays. */
2697 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2698 {
2699 /* There is not enough space, need realloc. */
2700 wchar_t *new_array_start, *new_array_end;
2701 Idx new_nranges;
2766 2702
2767 mbcset->range_starts[mbcset->nranges] = start_wc; 2703 /* +1 in case of mbcset->nranges is 0. */
2768 mbcset->range_ends[mbcset->nranges++] = end_wc; 2704 new_nranges = 2 * mbcset->nranges + 1;
2769 } 2705 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2706 are NULL if *range_alloc == 0. */
2707 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2708 new_nranges);
2709 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2710 new_nranges);
2711
2712 if (__glibc_unlikely (new_array_start == NULL
2713 || new_array_end == NULL))
2714 {
2715 re_free (new_array_start);
2716 re_free (new_array_end);
2717 return REG_ESPACE;
2718 }
2719
2720 mbcset->range_starts = new_array_start;
2721 mbcset->range_ends = new_array_end;
2722 *range_alloc = new_nranges;
2723 }
2724
2725 mbcset->range_starts[mbcset->nranges] = start_wc;
2726 mbcset->range_ends[mbcset->nranges++] = end_wc;
2727 }
2728
2729 /* Build the table for single byte characters. */
2730 for (wchar_t wc = 0; wc < SBC_MAX; ++wc)
2731 {
2732 if (start_wc <= wc && wc <= end_wc)
2733 bitset_set (sbcset, wc);
2734 }
2770 2735
2771 /* Build the table for single byte characters. */
2772 for (wc = 0; wc < SBC_MAX; ++wc)
2773 {
2774 if (start_wc <= wc && wc <= end_wc)
2775 bitset_set (sbcset, wc);
2776 }
2777 }
2778# else /* not RE_ENABLE_I18N */
2779 {
2780 unsigned int ch;
2781 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2782 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2783 : 0));
2784 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2785 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2786 : 0));
2787 if (start_ch > end_ch)
2788 return REG_ERANGE;
2789 /* Build the table for single byte characters. */
2790 for (ch = 0; ch < SBC_MAX; ++ch)
2791 if (start_ch <= ch && ch <= end_ch)
2792 bitset_set (sbcset, ch);
2793 }
2794# endif /* not RE_ENABLE_I18N */
2795 return REG_NOERROR; 2736 return REG_NOERROR;
2796} 2737}
2797#endif /* not _LIBC */ 2738#endif /* not _LIBC */
2798 2739
2799#ifndef _LIBC 2740#ifndef _LIBC
2800/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. 2741/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.
2801 Build the collating element which is represented by NAME. 2742 Build the collating element which is represented by NAME.
2802 The result are written to MBCSET and SBCSET. 2743 The result are written to MBCSET and SBCSET.
2803 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2744 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2804 pointer argument since we may update it. */ 2745 pointer argument since we may update it. */
2805 2746
2806static reg_errcode_t 2747static reg_errcode_t
2807internal_function
2808# ifdef RE_ENABLE_I18N
2809build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2748build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2810 Idx *coll_sym_alloc, const unsigned char *name) 2749 Idx *coll_sym_alloc, const unsigned char *name,
2811# else /* not RE_ENABLE_I18N */ 2750 uint_fast32_t nrules, int_fast32_t table_size,
2812build_collating_symbol (bitset_t sbcset, const unsigned char *name) 2751 const void *symb_table, const unsigned char *extra)
2813# endif /* not RE_ENABLE_I18N */
2814{ 2752{
2815 size_t name_len = strlen ((const char *) name); 2753 size_t name_len = strlen ((const char *) name);
2816 if (BE (name_len != 1, 0)) 2754 if (__glibc_unlikely (name_len != 1))
2817 return REG_ECOLLATE; 2755 return REG_ECOLLATE;
2818 else 2756 else
2819 { 2757 {
@@ -2823,267 +2761,280 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2823} 2761}
2824#endif /* not _LIBC */ 2762#endif /* not _LIBC */
2825 2763
2826/* This function parse bracket expression like "[abc]", "[a-c]",
2827 "[[.a-a.]]" etc. */
2828
2829static bin_tree_t *
2830parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2831 reg_syntax_t syntax, reg_errcode_t *err)
2832{
2833#ifdef _LIBC 2764#ifdef _LIBC
2834 const unsigned char *collseqmb; 2765/* Local function for parse_bracket_exp used in _LIBC environment.
2835 const char *collseqwc; 2766 Seek the collating symbol entry corresponding to NAME.
2836 uint32_t nrules; 2767 Return the index of the symbol in the SYMB_TABLE,
2837 int32_t table_size; 2768 or -1 if not found. */
2838 const int32_t *symb_table; 2769
2839 const unsigned char *extra; 2770static __always_inline int32_t
2840 2771seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
2841 /* Local function for parse_bracket_exp used in _LIBC environment. 2772 const int32_t *symb_table,
2842 Seek the collating symbol entry corresponding to NAME. 2773 int_fast32_t table_size,
2843 Return the index of the symbol in the SYMB_TABLE, 2774 const unsigned char *extra)
2844 or -1 if not found. */ 2775{
2845 2776 int_fast32_t elem;
2846 auto inline int32_t
2847 __attribute__ ((always_inline))
2848 seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2849 {
2850 int32_t elem;
2851
2852 for (elem = 0; elem < table_size; elem++)
2853 if (symb_table[2 * elem] != 0)
2854 {
2855 int32_t idx = symb_table[2 * elem + 1];
2856 /* Skip the name of collating element name. */
2857 idx += 1 + extra[idx];
2858 if (/* Compare the length of the name. */
2859 name_len == extra[idx]
2860 /* Compare the name. */
2861 && memcmp (name, &extra[idx + 1], name_len) == 0)
2862 /* Yep, this is the entry. */
2863 return elem;
2864 }
2865 return -1;
2866 }
2867 2777
2868 /* Local function for parse_bracket_exp used in _LIBC environment. 2778 for (elem = 0; elem < table_size; elem++)
2869 Look up the collation sequence value of BR_ELEM. 2779 if (symb_table[2 * elem] != 0)
2870 Return the value if succeeded, UINT_MAX otherwise. */ 2780 {
2781 int32_t idx = symb_table[2 * elem + 1];
2782 /* Skip the name of collating element name. */
2783 idx += 1 + extra[idx];
2784 if (/* Compare the length of the name. */
2785 name_len == extra[idx]
2786 /* Compare the name. */
2787 && memcmp (name, &extra[idx + 1], name_len) == 0)
2788 /* Yep, this is the entry. */
2789 return elem;
2790 }
2791 return -1;
2792}
2871 2793
2872 auto inline unsigned int 2794/* Local function for parse_bracket_exp used in _LIBC environment.
2873 __attribute__ ((always_inline)) 2795 Look up the collation sequence value of BR_ELEM.
2874 lookup_collation_sequence_value (bracket_elem_t *br_elem) 2796 Return the value if succeeded, UINT_MAX otherwise. */
2797
2798static __always_inline unsigned int
2799lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
2800 const unsigned char *collseqmb,
2801 const char *collseqwc,
2802 int_fast32_t table_size,
2803 const int32_t *symb_table,
2804 const unsigned char *extra)
2805{
2806 if (br_elem->type == SB_CHAR)
2875 { 2807 {
2876 if (br_elem->type == SB_CHAR) 2808 /* if (MB_CUR_MAX == 1) */
2877 { 2809 if (nrules == 0)
2878 /* 2810 return collseqmb[br_elem->opr.ch];
2879 if (MB_CUR_MAX == 1) 2811 else
2880 */
2881 if (nrules == 0)
2882 return collseqmb[br_elem->opr.ch];
2883 else
2884 {
2885 wint_t wc = __btowc (br_elem->opr.ch);
2886 return __collseq_table_lookup (collseqwc, wc);
2887 }
2888 }
2889 else if (br_elem->type == MB_CHAR)
2890 { 2812 {
2891 if (nrules != 0) 2813 wint_t wc = __btowc (br_elem->opr.ch);
2892 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2814 return __collseq_table_lookup (collseqwc, wc);
2893 } 2815 }
2894 else if (br_elem->type == COLL_SYM) 2816 }
2817 else if (br_elem->type == MB_CHAR)
2818 {
2819 if (nrules != 0)
2820 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2821 }
2822 else if (br_elem->type == COLL_SYM)
2823 {
2824 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2825 if (nrules != 0)
2895 { 2826 {
2896 size_t sym_name_len = strlen ((char *) br_elem->opr.name); 2827 int32_t elem, idx;
2897 if (nrules != 0) 2828 elem = seek_collating_symbol_entry (br_elem->opr.name,
2829 sym_name_len,
2830 symb_table, table_size,
2831 extra);
2832 if (elem != -1)
2898 { 2833 {
2899 int32_t elem, idx; 2834 /* We found the entry. */
2900 elem = seek_collating_symbol_entry (br_elem->opr.name, 2835 idx = symb_table[2 * elem + 1];
2901 sym_name_len); 2836 /* Skip the name of collating element name. */
2902 if (elem != -1) 2837 idx += 1 + extra[idx];
2903 { 2838 /* Skip the byte sequence of the collating element. */
2904 /* We found the entry. */ 2839 idx += 1 + extra[idx];
2905 idx = symb_table[2 * elem + 1]; 2840 /* Adjust for the alignment. */
2906 /* Skip the name of collating element name. */ 2841 idx = (idx + 3) & ~3;
2907 idx += 1 + extra[idx]; 2842 /* Skip the multibyte collation sequence value. */
2908 /* Skip the byte sequence of the collating element. */ 2843 idx += sizeof (unsigned int);
2909 idx += 1 + extra[idx]; 2844 /* Skip the wide char sequence of the collating element. */
2910 /* Adjust for the alignment. */ 2845 idx += sizeof (unsigned int) *
2911 idx = (idx + 3) & ~3; 2846 (1 + *(unsigned int *) (extra + idx));
2912 /* Skip the multibyte collation sequence value. */ 2847 /* Return the collation sequence value. */
2913 idx += sizeof (unsigned int); 2848 return *(unsigned int *) (extra + idx);
2914 /* Skip the wide char sequence of the collating element. */
2915 idx += sizeof (unsigned int) *
2916 (1 + *(unsigned int *) (extra + idx));
2917 /* Return the collation sequence value. */
2918 return *(unsigned int *) (extra + idx);
2919 }
2920 else if (sym_name_len == 1)
2921 {
2922 /* No valid character. Match it as a single byte
2923 character. */
2924 return collseqmb[br_elem->opr.name[0]];
2925 }
2926 } 2849 }
2927 else if (sym_name_len == 1) 2850 else if (sym_name_len == 1)
2928 return collseqmb[br_elem->opr.name[0]]; 2851 {
2852 /* No valid character. Match it as a single byte
2853 character. */
2854 return collseqmb[br_elem->opr.name[0]];
2855 }
2929 } 2856 }
2930 return UINT_MAX; 2857 else if (sym_name_len == 1)
2858 return collseqmb[br_elem->opr.name[0]];
2931 } 2859 }
2860 return UINT_MAX;
2861}
2932 2862
2933 /* Local function for parse_bracket_exp used in _LIBC environment. 2863/* Local function for parse_bracket_exp used in _LIBC environment.
2934 Build the range expression which starts from START_ELEM, and ends 2864 Build the range expression which starts from START_ELEM, and ends
2935 at END_ELEM. The result are written to MBCSET and SBCSET. 2865 at END_ELEM. The result are written to MBCSET and SBCSET.
2936 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2866 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2937 mbcset->range_ends, is a pointer argument since we may 2867 mbcset->range_ends, is a pointer argument since we may
2938 update it. */ 2868 update it. */
2939 2869
2940 auto inline reg_errcode_t 2870static __always_inline reg_errcode_t
2941 __attribute__ ((always_inline)) 2871build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
2942 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, 2872 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
2943 bracket_elem_t *start_elem, bracket_elem_t *end_elem) 2873 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
2944 { 2874 const unsigned char *collseqmb, const char *collseqwc,
2945 unsigned int ch; 2875 int_fast32_t table_size, const int32_t *symb_table,
2946 uint32_t start_collseq; 2876 const unsigned char *extra)
2947 uint32_t end_collseq; 2877{
2948 2878 unsigned int ch;
2949 /* Equivalence Classes and Character Classes can't be a range 2879 uint32_t start_collseq;
2950 start/end. */ 2880 uint32_t end_collseq;
2951 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2881
2952 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2882 /* Equivalence Classes and Character Classes can't be a range
2953 0)) 2883 start/end. */
2954 return REG_ERANGE; 2884 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2885 || start_elem->type == CHAR_CLASS
2886 || end_elem->type == EQUIV_CLASS
2887 || end_elem->type == CHAR_CLASS))
2888 return REG_ERANGE;
2955 2889
2956 /* FIXME: Implement rational ranges here, too. */ 2890 /* FIXME: Implement rational ranges here, too. */
2957 start_collseq = lookup_collation_sequence_value (start_elem); 2891 start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
2958 end_collseq = lookup_collation_sequence_value (end_elem); 2892 table_size, symb_table, extra);
2959 /* Check start/end collation sequence values. */ 2893 end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
2960 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) 2894 table_size, symb_table, extra);
2961 return REG_ECOLLATE; 2895 /* Check start/end collation sequence values. */
2962 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) 2896 if (__glibc_unlikely (start_collseq == UINT_MAX
2963 return REG_ERANGE; 2897 || end_collseq == UINT_MAX))
2898 return REG_ECOLLATE;
2899 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2900 && start_collseq > end_collseq))
2901 return REG_ERANGE;
2964 2902
2965 /* Got valid collation sequence values, add them as a new entry. 2903 /* Got valid collation sequence values, add them as a new entry.
2966 However, if we have no collation elements, and the character set 2904 However, if we have no collation elements, and the character set
2967 is single byte, the single byte character set that we 2905 is single byte, the single byte character set that we
2968 build below suffices. */ 2906 build below suffices. */
2969 if (nrules > 0 || dfa->mb_cur_max > 1) 2907 if (nrules > 0 || dfa->mb_cur_max > 1)
2908 {
2909 /* Check the space of the arrays. */
2910 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2970 { 2911 {
2971 /* Check the space of the arrays. */ 2912 /* There is not enough space, need realloc. */
2972 if (BE (*range_alloc == mbcset->nranges, 0)) 2913 uint32_t *new_array_start;
2973 { 2914 uint32_t *new_array_end;
2974 /* There is not enough space, need realloc. */ 2915 int new_nranges;
2975 uint32_t *new_array_start;
2976 uint32_t *new_array_end;
2977 Idx new_nranges;
2978
2979 /* +1 in case of mbcset->nranges is 0. */
2980 new_nranges = 2 * mbcset->nranges + 1;
2981 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2982 new_nranges);
2983 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2984 new_nranges);
2985
2986 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2987 return REG_ESPACE;
2988 2916
2989 mbcset->range_starts = new_array_start; 2917 /* +1 in case of mbcset->nranges is 0. */
2990 mbcset->range_ends = new_array_end; 2918 new_nranges = 2 * mbcset->nranges + 1;
2991 *range_alloc = new_nranges; 2919 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2992 } 2920 new_nranges);
2921 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2922 new_nranges);
2993 2923
2994 mbcset->range_starts[mbcset->nranges] = start_collseq; 2924 if (__glibc_unlikely (new_array_start == NULL
2995 mbcset->range_ends[mbcset->nranges++] = end_collseq; 2925 || new_array_end == NULL))
2996 } 2926 return REG_ESPACE;
2997 2927
2998 /* Build the table for single byte characters. */ 2928 mbcset->range_starts = new_array_start;
2999 for (ch = 0; ch < SBC_MAX; ch++) 2929 mbcset->range_ends = new_array_end;
3000 { 2930 *range_alloc = new_nranges;
3001 uint32_t ch_collseq;
3002 /*
3003 if (MB_CUR_MAX == 1)
3004 */
3005 if (nrules == 0)
3006 ch_collseq = collseqmb[ch];
3007 else
3008 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
3009 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
3010 bitset_set (sbcset, ch);
3011 } 2931 }
3012 return REG_NOERROR; 2932
2933 mbcset->range_starts[mbcset->nranges] = start_collseq;
2934 mbcset->range_ends[mbcset->nranges++] = end_collseq;
3013 } 2935 }
3014 2936
3015 /* Local function for parse_bracket_exp used in _LIBC environment. 2937 /* Build the table for single byte characters. */
3016 Build the collating element which is represented by NAME. 2938 for (ch = 0; ch < SBC_MAX; ch++)
3017 The result are written to MBCSET and SBCSET. 2939 {
3018 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2940 uint32_t ch_collseq;
3019 pointer argument since we may update it. */ 2941 /* if (MB_CUR_MAX == 1) */
2942 if (nrules == 0)
2943 ch_collseq = collseqmb[ch];
2944 else
2945 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2946 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2947 bitset_set (sbcset, ch);
2948 }
2949 return REG_NOERROR;
2950}
3020 2951
3021 auto inline reg_errcode_t 2952/* Local function for parse_bracket_exp used in _LIBC environment.
3022 __attribute__ ((always_inline)) 2953 Build the collating element which is represented by NAME.
3023 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2954 The result are written to MBCSET and SBCSET.
3024 Idx *coll_sym_alloc, const unsigned char *name) 2955 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2956 pointer argument since we may update it. */
2957
2958static __always_inline reg_errcode_t
2959build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2960 Idx *coll_sym_alloc, const unsigned char *name,
2961 uint_fast32_t nrules, int_fast32_t table_size,
2962 const int32_t *symb_table, const unsigned char *extra)
2963{
2964 int32_t elem, idx;
2965 size_t name_len = strlen ((const char *) name);
2966 if (nrules != 0)
3025 { 2967 {
3026 int32_t elem, idx; 2968 elem = seek_collating_symbol_entry (name, name_len, symb_table,
3027 size_t name_len = strlen ((const char *) name); 2969 table_size, extra);
3028 if (nrules != 0) 2970 if (elem != -1)
3029 { 2971 {
3030 elem = seek_collating_symbol_entry (name, name_len); 2972 /* We found the entry. */
3031 if (elem != -1) 2973 idx = symb_table[2 * elem + 1];
3032 { 2974 /* Skip the name of collating element name. */
3033 /* We found the entry. */ 2975 idx += 1 + extra[idx];
3034 idx = symb_table[2 * elem + 1]; 2976 }
3035 /* Skip the name of collating element name. */ 2977 else if (name_len == 1)
3036 idx += 1 + extra[idx]; 2978 {
3037 } 2979 /* No valid character, treat it as a normal
3038 else if (name_len == 1) 2980 character. */
3039 { 2981 bitset_set (sbcset, name[0]);
3040 /* No valid character, treat it as a normal
3041 character. */
3042 bitset_set (sbcset, name[0]);
3043 return REG_NOERROR;
3044 }
3045 else
3046 return REG_ECOLLATE;
3047
3048 /* Got valid collation sequence, add it as a new entry. */
3049 /* Check the space of the arrays. */
3050 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
3051 {
3052 /* Not enough, realloc it. */
3053 /* +1 in case of mbcset->ncoll_syms is 0. */
3054 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3055 /* Use realloc since mbcset->coll_syms is NULL
3056 if *alloc == 0. */
3057 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3058 new_coll_sym_alloc);
3059 if (BE (new_coll_syms == NULL, 0))
3060 return REG_ESPACE;
3061 mbcset->coll_syms = new_coll_syms;
3062 *coll_sym_alloc = new_coll_sym_alloc;
3063 }
3064 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3065 return REG_NOERROR; 2982 return REG_NOERROR;
3066 } 2983 }
3067 else 2984 else
2985 return REG_ECOLLATE;
2986
2987 /* Got valid collation sequence, add it as a new entry. */
2988 /* Check the space of the arrays. */
2989 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
3068 { 2990 {
3069 if (BE (name_len != 1, 0)) 2991 /* Not enough, realloc it. */
3070 return REG_ECOLLATE; 2992 /* +1 in case of mbcset->ncoll_syms is 0. */
3071 else 2993 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3072 { 2994 /* Use realloc since mbcset->coll_syms is NULL
3073 bitset_set (sbcset, name[0]); 2995 if *alloc == 0. */
3074 return REG_NOERROR; 2996 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3075 } 2997 new_coll_sym_alloc);
2998 if (__glibc_unlikely (new_coll_syms == NULL))
2999 return REG_ESPACE;
3000 mbcset->coll_syms = new_coll_syms;
3001 *coll_sym_alloc = new_coll_sym_alloc;
3076 } 3002 }
3003 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3004 return REG_NOERROR;
3077 } 3005 }
3078#endif 3006 else
3007 {
3008 if (__glibc_unlikely (name_len != 1))
3009 return REG_ECOLLATE;
3010 else
3011 {
3012 bitset_set (sbcset, name[0]);
3013 return REG_NOERROR;
3014 }
3015 }
3016}
3017#endif /* _LIBC */
3018
3019/* This function parse bracket expression like "[abc]", "[a-c]",
3020 "[[.a-a.]]" etc. */
3021
3022static bin_tree_t *
3023parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3024 reg_syntax_t syntax, reg_errcode_t *err)
3025{
3026 const unsigned char *collseqmb = NULL;
3027 const char *collseqwc = NULL;
3028 uint_fast32_t nrules = 0;
3029 int_fast32_t table_size = 0;
3030 const void *symb_table = NULL;
3031 const unsigned char *extra = NULL;
3079 3032
3080 re_token_t br_token; 3033 re_token_t br_token;
3081 re_bitset_ptr_t sbcset; 3034 re_bitset_ptr_t sbcset;
3082#ifdef RE_ENABLE_I18N
3083 re_charset_t *mbcset; 3035 re_charset_t *mbcset;
3084 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; 3036 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3085 Idx equiv_class_alloc = 0, char_class_alloc = 0; 3037 Idx equiv_class_alloc = 0, char_class_alloc = 0;
3086#endif /* not RE_ENABLE_I18N */
3087 bool non_match = false; 3038 bool non_match = false;
3088 bin_tree_t *work_tree; 3039 bin_tree_t *work_tree;
3089 int token_len; 3040 int token_len;
@@ -3099,47 +3050,36 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3099 */ 3050 */
3100 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3051 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3101 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); 3052 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3102 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3053 symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB);
3103 _NL_COLLATE_SYMB_TABLEMB);
3104 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3054 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3105 _NL_COLLATE_SYMB_EXTRAMB); 3055 _NL_COLLATE_SYMB_EXTRAMB);
3106 } 3056 }
3107#endif 3057#endif
3108 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3058 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3109#ifdef RE_ENABLE_I18N
3110 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3059 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3111#endif /* RE_ENABLE_I18N */ 3060 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
3112#ifdef RE_ENABLE_I18N
3113 if (BE (sbcset == NULL || mbcset == NULL, 0))
3114#else
3115 if (BE (sbcset == NULL, 0))
3116#endif /* RE_ENABLE_I18N */
3117 { 3061 {
3118 re_free (sbcset); 3062 re_free (sbcset);
3119#ifdef RE_ENABLE_I18N
3120 re_free (mbcset); 3063 re_free (mbcset);
3121#endif
3122 *err = REG_ESPACE; 3064 *err = REG_ESPACE;
3123 return NULL; 3065 return NULL;
3124 } 3066 }
3125 3067
3126 token_len = peek_token_bracket (token, regexp, syntax); 3068 token_len = peek_token_bracket (token, regexp, syntax);
3127 if (BE (token->type == END_OF_RE, 0)) 3069 if (__glibc_unlikely (token->type == END_OF_RE))
3128 { 3070 {
3129 *err = REG_BADPAT; 3071 *err = REG_BADPAT;
3130 goto parse_bracket_exp_free_return; 3072 goto parse_bracket_exp_free_return;
3131 } 3073 }
3132 if (token->type == OP_NON_MATCH_LIST) 3074 if (token->type == OP_NON_MATCH_LIST)
3133 { 3075 {
3134#ifdef RE_ENABLE_I18N
3135 mbcset->non_match = 1; 3076 mbcset->non_match = 1;
3136#endif /* not RE_ENABLE_I18N */
3137 non_match = true; 3077 non_match = true;
3138 if (syntax & RE_HAT_LISTS_NOT_NEWLINE) 3078 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3139 bitset_set (sbcset, '\n'); 3079 bitset_set (sbcset, '\n');
3140 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3080 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3141 token_len = peek_token_bracket (token, regexp, syntax); 3081 token_len = peek_token_bracket (token, regexp, syntax);
3142 if (BE (token->type == END_OF_RE, 0)) 3082 if (__glibc_unlikely (token->type == END_OF_RE))
3143 { 3083 {
3144 *err = REG_BADPAT; 3084 *err = REG_BADPAT;
3145 goto parse_bracket_exp_free_return; 3085 goto parse_bracket_exp_free_return;
@@ -3161,9 +3101,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3161 re_token_t token2; 3101 re_token_t token2;
3162 3102
3163 start_elem.opr.name = start_name_buf; 3103 start_elem.opr.name = start_name_buf;
3104 start_elem.type = COLL_SYM;
3164 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, 3105 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3165 syntax, first_round); 3106 syntax, first_round);
3166 if (BE (ret != REG_NOERROR, 0)) 3107 if (__glibc_unlikely (ret != REG_NOERROR))
3167 { 3108 {
3168 *err = ret; 3109 *err = ret;
3169 goto parse_bracket_exp_free_return; 3110 goto parse_bracket_exp_free_return;
@@ -3176,7 +3117,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3176 /* Do not check for ranges if we know they are not allowed. */ 3117 /* Do not check for ranges if we know they are not allowed. */
3177 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) 3118 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3178 { 3119 {
3179 if (BE (token->type == END_OF_RE, 0)) 3120 if (__glibc_unlikely (token->type == END_OF_RE))
3180 { 3121 {
3181 *err = REG_EBRACK; 3122 *err = REG_EBRACK;
3182 goto parse_bracket_exp_free_return; 3123 goto parse_bracket_exp_free_return;
@@ -3185,7 +3126,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3185 { 3126 {
3186 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ 3127 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3187 token_len2 = peek_token_bracket (&token2, regexp, syntax); 3128 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3188 if (BE (token2.type == END_OF_RE, 0)) 3129 if (__glibc_unlikely (token2.type == END_OF_RE))
3189 { 3130 {
3190 *err = REG_EBRACK; 3131 *err = REG_EBRACK;
3191 goto parse_bracket_exp_free_return; 3132 goto parse_bracket_exp_free_return;
@@ -3204,9 +3145,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3204 if (is_range_exp == true) 3145 if (is_range_exp == true)
3205 { 3146 {
3206 end_elem.opr.name = end_name_buf; 3147 end_elem.opr.name = end_name_buf;
3148 end_elem.type = COLL_SYM;
3207 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, 3149 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3208 dfa, syntax, true); 3150 dfa, syntax, true);
3209 if (BE (ret != REG_NOERROR, 0)) 3151 if (__glibc_unlikely (ret != REG_NOERROR))
3210 { 3152 {
3211 *err = ret; 3153 *err = ret;
3212 goto parse_bracket_exp_free_return; 3154 goto parse_bracket_exp_free_return;
@@ -3214,19 +3156,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3214 3156
3215 token_len = peek_token_bracket (token, regexp, syntax); 3157 token_len = peek_token_bracket (token, regexp, syntax);
3216 3158
3217#ifdef _LIBC
3218 *err = build_range_exp (sbcset, mbcset, &range_alloc, 3159 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3219 &start_elem, &end_elem); 3160 &start_elem, &end_elem,
3220#else 3161 dfa, syntax, nrules, collseqmb, collseqwc,
3221# ifdef RE_ENABLE_I18N 3162 table_size, symb_table, extra);
3222 *err = build_range_exp (syntax, sbcset, 3163 if (__glibc_unlikely (*err != REG_NOERROR))
3223 dfa->mb_cur_max > 1 ? mbcset : NULL,
3224 &range_alloc, &start_elem, &end_elem);
3225# else
3226 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
3227# endif
3228#endif /* RE_ENABLE_I18N */
3229 if (BE (*err != REG_NOERROR, 0))
3230 goto parse_bracket_exp_free_return; 3164 goto parse_bracket_exp_free_return;
3231 } 3165 }
3232 else 3166 else
@@ -3236,10 +3170,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3236 case SB_CHAR: 3170 case SB_CHAR:
3237 bitset_set (sbcset, start_elem.opr.ch); 3171 bitset_set (sbcset, start_elem.opr.ch);
3238 break; 3172 break;
3239#ifdef RE_ENABLE_I18N
3240 case MB_CHAR: 3173 case MB_CHAR:
3241 /* Check whether the array has enough space. */ 3174 /* Check whether the array has enough space. */
3242 if (BE (mbchar_alloc == mbcset->nmbchars, 0)) 3175 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
3243 { 3176 {
3244 wchar_t *new_mbchars; 3177 wchar_t *new_mbchars;
3245 /* Not enough, realloc it. */ 3178 /* Not enough, realloc it. */
@@ -3248,47 +3181,41 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3248 /* Use realloc since array is NULL if *alloc == 0. */ 3181 /* Use realloc since array is NULL if *alloc == 0. */
3249 new_mbchars = re_realloc (mbcset->mbchars, wchar_t, 3182 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3250 mbchar_alloc); 3183 mbchar_alloc);
3251 if (BE (new_mbchars == NULL, 0)) 3184 if (__glibc_unlikely (new_mbchars == NULL))
3252 goto parse_bracket_exp_espace; 3185 goto parse_bracket_exp_espace;
3253 mbcset->mbchars = new_mbchars; 3186 mbcset->mbchars = new_mbchars;
3254 } 3187 }
3255 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; 3188 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3256 break; 3189 break;
3257#endif /* RE_ENABLE_I18N */
3258 case EQUIV_CLASS: 3190 case EQUIV_CLASS:
3259 *err = build_equiv_class (sbcset, 3191 *err = build_equiv_class (sbcset,
3260#ifdef RE_ENABLE_I18N
3261 mbcset, &equiv_class_alloc, 3192 mbcset, &equiv_class_alloc,
3262#endif /* RE_ENABLE_I18N */
3263 start_elem.opr.name); 3193 start_elem.opr.name);
3264 if (BE (*err != REG_NOERROR, 0)) 3194 if (__glibc_unlikely (*err != REG_NOERROR))
3265 goto parse_bracket_exp_free_return; 3195 goto parse_bracket_exp_free_return;
3266 break; 3196 break;
3267 case COLL_SYM: 3197 case COLL_SYM:
3268 *err = build_collating_symbol (sbcset, 3198 *err = build_collating_symbol (sbcset,
3269#ifdef RE_ENABLE_I18N
3270 mbcset, &coll_sym_alloc, 3199 mbcset, &coll_sym_alloc,
3271#endif /* RE_ENABLE_I18N */ 3200 start_elem.opr.name,
3272 start_elem.opr.name); 3201 nrules, table_size, symb_table, extra);
3273 if (BE (*err != REG_NOERROR, 0)) 3202 if (__glibc_unlikely (*err != REG_NOERROR))
3274 goto parse_bracket_exp_free_return; 3203 goto parse_bracket_exp_free_return;
3275 break; 3204 break;
3276 case CHAR_CLASS: 3205 case CHAR_CLASS:
3277 *err = build_charclass (regexp->trans, sbcset, 3206 *err = build_charclass (regexp->trans, sbcset,
3278#ifdef RE_ENABLE_I18N
3279 mbcset, &char_class_alloc, 3207 mbcset, &char_class_alloc,
3280#endif /* RE_ENABLE_I18N */
3281 (const char *) start_elem.opr.name, 3208 (const char *) start_elem.opr.name,
3282 syntax); 3209 syntax);
3283 if (BE (*err != REG_NOERROR, 0)) 3210 if (__glibc_unlikely (*err != REG_NOERROR))
3284 goto parse_bracket_exp_free_return; 3211 goto parse_bracket_exp_free_return;
3285 break; 3212 break;
3286 default: 3213 default:
3287 assert (0); 3214 DEBUG_ASSERT (false);
3288 break; 3215 break;
3289 } 3216 }
3290 } 3217 }
3291 if (BE (token->type == END_OF_RE, 0)) 3218 if (__glibc_unlikely (token->type == END_OF_RE))
3292 { 3219 {
3293 *err = REG_EBRACK; 3220 *err = REG_EBRACK;
3294 goto parse_bracket_exp_free_return; 3221 goto parse_bracket_exp_free_return;
@@ -3303,7 +3230,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3303 if (non_match) 3230 if (non_match)
3304 bitset_not (sbcset); 3231 bitset_not (sbcset);
3305 3232
3306#ifdef RE_ENABLE_I18N
3307 /* Ensure only single byte characters are set. */ 3233 /* Ensure only single byte characters are set. */
3308 if (dfa->mb_cur_max > 1) 3234 if (dfa->mb_cur_max > 1)
3309 bitset_mask (sbcset, dfa->sb_char); 3235 bitset_mask (sbcset, dfa->sb_char);
@@ -3319,7 +3245,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3319 br_token.type = COMPLEX_BRACKET; 3245 br_token.type = COMPLEX_BRACKET;
3320 br_token.opr.mbcset = mbcset; 3246 br_token.opr.mbcset = mbcset;
3321 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3247 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3322 if (BE (mbc_tree == NULL, 0)) 3248 if (__glibc_unlikely (mbc_tree == NULL))
3323 goto parse_bracket_exp_espace; 3249 goto parse_bracket_exp_espace;
3324 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) 3250 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3325 if (sbcset[sbc_idx]) 3251 if (sbcset[sbc_idx])
@@ -3332,12 +3258,12 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3332 br_token.type = SIMPLE_BRACKET; 3258 br_token.type = SIMPLE_BRACKET;
3333 br_token.opr.sbcset = sbcset; 3259 br_token.opr.sbcset = sbcset;
3334 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3260 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3335 if (BE (work_tree == NULL, 0)) 3261 if (__glibc_unlikely (work_tree == NULL))
3336 goto parse_bracket_exp_espace; 3262 goto parse_bracket_exp_espace;
3337 3263
3338 /* Then join them by ALT node. */ 3264 /* Then join them by ALT node. */
3339 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); 3265 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3340 if (BE (work_tree == NULL, 0)) 3266 if (__glibc_unlikely (work_tree == NULL))
3341 goto parse_bracket_exp_espace; 3267 goto parse_bracket_exp_espace;
3342 } 3268 }
3343 else 3269 else
@@ -3347,16 +3273,13 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3347 } 3273 }
3348 } 3274 }
3349 else 3275 else
3350#endif /* not RE_ENABLE_I18N */
3351 { 3276 {
3352#ifdef RE_ENABLE_I18N
3353 free_charset (mbcset); 3277 free_charset (mbcset);
3354#endif
3355 /* Build a tree for simple bracket. */ 3278 /* Build a tree for simple bracket. */
3356 br_token.type = SIMPLE_BRACKET; 3279 br_token.type = SIMPLE_BRACKET;
3357 br_token.opr.sbcset = sbcset; 3280 br_token.opr.sbcset = sbcset;
3358 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3281 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3359 if (BE (work_tree == NULL, 0)) 3282 if (__glibc_unlikely (work_tree == NULL))
3360 goto parse_bracket_exp_espace; 3283 goto parse_bracket_exp_espace;
3361 } 3284 }
3362 return work_tree; 3285 return work_tree;
@@ -3365,9 +3288,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3365 *err = REG_ESPACE; 3288 *err = REG_ESPACE;
3366 parse_bracket_exp_free_return: 3289 parse_bracket_exp_free_return:
3367 re_free (sbcset); 3290 re_free (sbcset);
3368#ifdef RE_ENABLE_I18N
3369 free_charset (mbcset); 3291 free_charset (mbcset);
3370#endif /* RE_ENABLE_I18N */
3371 return NULL; 3292 return NULL;
3372} 3293}
3373 3294
@@ -3378,7 +3299,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3378 re_token_t *token, int token_len, re_dfa_t *dfa, 3299 re_token_t *token, int token_len, re_dfa_t *dfa,
3379 reg_syntax_t syntax, bool accept_hyphen) 3300 reg_syntax_t syntax, bool accept_hyphen)
3380{ 3301{
3381#ifdef RE_ENABLE_I18N
3382 int cur_char_size; 3302 int cur_char_size;
3383 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); 3303 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3384 if (cur_char_size > 1) 3304 if (cur_char_size > 1)
@@ -3388,12 +3308,11 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3388 re_string_skip_bytes (regexp, cur_char_size); 3308 re_string_skip_bytes (regexp, cur_char_size);
3389 return REG_NOERROR; 3309 return REG_NOERROR;
3390 } 3310 }
3391#endif /* RE_ENABLE_I18N */
3392 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3311 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3393 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS 3312 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3394 || token->type == OP_OPEN_EQUIV_CLASS) 3313 || token->type == OP_OPEN_EQUIV_CLASS)
3395 return parse_bracket_symbol (elem, regexp, token); 3314 return parse_bracket_symbol (elem, regexp, token);
3396 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) 3315 if (__glibc_unlikely (token->type == OP_CHARSET_RANGE) && !accept_hyphen)
3397 { 3316 {
3398 /* A '-' must only appear as anything but a range indicator before 3317 /* A '-' must only appear as anything but a range indicator before
3399 the closing bracket. Everything else is an error. */ 3318 the closing bracket. Everything else is an error. */
@@ -3461,12 +3380,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3461 is a pointer argument since we may update it. */ 3380 is a pointer argument since we may update it. */
3462 3381
3463static reg_errcode_t 3382static reg_errcode_t
3464#ifdef RE_ENABLE_I18N
3465build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, 3383build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3466 Idx *equiv_class_alloc, const unsigned char *name) 3384 Idx *equiv_class_alloc, const unsigned char *name)
3467#else /* not RE_ENABLE_I18N */
3468build_equiv_class (bitset_t sbcset, const unsigned char *name)
3469#endif /* not RE_ENABLE_I18N */
3470{ 3385{
3471#ifdef _LIBC 3386#ifdef _LIBC
3472 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3387 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3478,8 +3393,6 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3478 int32_t idx1, idx2; 3393 int32_t idx1, idx2;
3479 unsigned int ch; 3394 unsigned int ch;
3480 size_t len; 3395 size_t len;
3481 /* This #include defines a local function! */
3482# include <locale/weight.h>
3483 /* Calculate the index for equivalence class. */ 3396 /* Calculate the index for equivalence class. */
3484 cp = name; 3397 cp = name;
3485 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3398 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
@@ -3489,8 +3402,8 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3489 _NL_COLLATE_EXTRAMB); 3402 _NL_COLLATE_EXTRAMB);
3490 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3403 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3491 _NL_COLLATE_INDIRECTMB); 3404 _NL_COLLATE_INDIRECTMB);
3492 idx1 = findidx (&cp, -1); 3405 idx1 = findidx (table, indirect, extra, &cp, -1);
3493 if (BE (idx1 == 0 || *cp != '\0', 0)) 3406 if (__glibc_unlikely (idx1 == 0 || *cp != '\0'))
3494 /* This isn't a valid character. */ 3407 /* This isn't a valid character. */
3495 return REG_ECOLLATE; 3408 return REG_ECOLLATE;
3496 3409
@@ -3500,7 +3413,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3500 { 3413 {
3501 char_buf[0] = ch; 3414 char_buf[0] = ch;
3502 cp = char_buf; 3415 cp = char_buf;
3503 idx2 = findidx (&cp, 1); 3416 idx2 = findidx (table, indirect, extra, &cp, 1);
3504/* 3417/*
3505 idx2 = table[ch]; 3418 idx2 = table[ch];
3506*/ 3419*/
@@ -3509,21 +3422,13 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3509 continue; 3422 continue;
3510 /* Compare only if the length matches and the collation rule 3423 /* Compare only if the length matches and the collation rule
3511 index is the same. */ 3424 index is the same. */
3512 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) 3425 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)
3513 { 3426 && memcmp (weights + (idx1 & 0xffffff) + 1,
3514 int cnt = 0; 3427 weights + (idx2 & 0xffffff) + 1, len) == 0)
3515 3428 bitset_set (sbcset, ch);
3516 while (cnt <= len &&
3517 weights[(idx1 & 0xffffff) + 1 + cnt]
3518 == weights[(idx2 & 0xffffff) + 1 + cnt])
3519 ++cnt;
3520
3521 if (cnt > len)
3522 bitset_set (sbcset, ch);
3523 }
3524 } 3429 }
3525 /* Check whether the array has enough space. */ 3430 /* Check whether the array has enough space. */
3526 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) 3431 if (__glibc_unlikely (*equiv_class_alloc == mbcset->nequiv_classes))
3527 { 3432 {
3528 /* Not enough, realloc it. */ 3433 /* Not enough, realloc it. */
3529 /* +1 in case of mbcset->nequiv_classes is 0. */ 3434 /* +1 in case of mbcset->nequiv_classes is 0. */
@@ -3532,7 +3437,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3532 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, 3437 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3533 int32_t, 3438 int32_t,
3534 new_equiv_class_alloc); 3439 new_equiv_class_alloc);
3535 if (BE (new_equiv_classes == NULL, 0)) 3440 if (__glibc_unlikely (new_equiv_classes == NULL))
3536 return REG_ESPACE; 3441 return REG_ESPACE;
3537 mbcset->equiv_classes = new_equiv_classes; 3442 mbcset->equiv_classes = new_equiv_classes;
3538 *equiv_class_alloc = new_equiv_class_alloc; 3443 *equiv_class_alloc = new_equiv_class_alloc;
@@ -3542,7 +3447,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3542 else 3447 else
3543#endif /* _LIBC */ 3448#endif /* _LIBC */
3544 { 3449 {
3545 if (BE (strlen ((const char *) name) != 1, 0)) 3450 if (__glibc_unlikely (strlen ((const char *) name) != 1))
3546 return REG_ECOLLATE; 3451 return REG_ECOLLATE;
3547 bitset_set (sbcset, *name); 3452 bitset_set (sbcset, *name);
3548 } 3453 }
@@ -3556,14 +3461,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3556 is a pointer argument since we may update it. */ 3461 is a pointer argument since we may update it. */
3557 3462
3558static reg_errcode_t 3463static reg_errcode_t
3559#ifdef RE_ENABLE_I18N
3560build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3464build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3561 re_charset_t *mbcset, Idx *char_class_alloc, 3465 re_charset_t *mbcset, Idx *char_class_alloc,
3562 const char *class_name, reg_syntax_t syntax) 3466 const char *class_name, reg_syntax_t syntax)
3563#else /* not RE_ENABLE_I18N */
3564build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3565 const char *class_name, reg_syntax_t syntax)
3566#endif /* not RE_ENABLE_I18N */
3567{ 3467{
3568 int i; 3468 int i;
3569 const char *name = class_name; 3469 const char *name = class_name;
@@ -3574,9 +3474,8 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3574 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) 3474 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
3575 name = "alpha"; 3475 name = "alpha";
3576 3476
3577#ifdef RE_ENABLE_I18N
3578 /* Check the space of the arrays. */ 3477 /* Check the space of the arrays. */
3579 if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) 3478 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
3580 { 3479 {
3581 /* Not enough, realloc it. */ 3480 /* Not enough, realloc it. */
3582 /* +1 in case of mbcset->nchar_classes is 0. */ 3481 /* +1 in case of mbcset->nchar_classes is 0. */
@@ -3584,17 +3483,16 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3584 /* Use realloc since array is NULL if *alloc == 0. */ 3483 /* Use realloc since array is NULL if *alloc == 0. */
3585 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, 3484 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3586 new_char_class_alloc); 3485 new_char_class_alloc);
3587 if (BE (new_char_classes == NULL, 0)) 3486 if (__glibc_unlikely (new_char_classes == NULL))
3588 return REG_ESPACE; 3487 return REG_ESPACE;
3589 mbcset->char_classes = new_char_classes; 3488 mbcset->char_classes = new_char_classes;
3590 *char_class_alloc = new_char_class_alloc; 3489 *char_class_alloc = new_char_class_alloc;
3591 } 3490 }
3592 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); 3491 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3593#endif /* RE_ENABLE_I18N */
3594 3492
3595#define BUILD_CHARCLASS_LOOP(ctype_func) \ 3493#define BUILD_CHARCLASS_LOOP(ctype_func) \
3596 do { \ 3494 do { \
3597 if (BE (trans != NULL, 0)) \ 3495 if (__glibc_unlikely (trans != NULL)) \
3598 { \ 3496 { \
3599 for (i = 0; i < SBC_MAX; ++i) \ 3497 for (i = 0; i < SBC_MAX; ++i) \
3600 if (ctype_func (i)) \ 3498 if (ctype_func (i)) \
@@ -3645,49 +3543,33 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3645 reg_errcode_t *err) 3543 reg_errcode_t *err)
3646{ 3544{
3647 re_bitset_ptr_t sbcset; 3545 re_bitset_ptr_t sbcset;
3648#ifdef RE_ENABLE_I18N
3649 re_charset_t *mbcset; 3546 re_charset_t *mbcset;
3650 Idx alloc = 0; 3547 Idx alloc = 0;
3651#endif /* not RE_ENABLE_I18N */
3652 reg_errcode_t ret; 3548 reg_errcode_t ret;
3653 re_token_t br_token;
3654 bin_tree_t *tree; 3549 bin_tree_t *tree;
3655 3550
3656 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3551 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3657#ifdef RE_ENABLE_I18N 3552 if (__glibc_unlikely (sbcset == NULL))
3658 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3659#endif /* RE_ENABLE_I18N */
3660
3661#ifdef RE_ENABLE_I18N
3662 if (BE (sbcset == NULL || mbcset == NULL, 0))
3663#else /* not RE_ENABLE_I18N */
3664 if (BE (sbcset == NULL, 0))
3665#endif /* not RE_ENABLE_I18N */
3666 { 3553 {
3667 *err = REG_ESPACE; 3554 *err = REG_ESPACE;
3668 return NULL; 3555 return NULL;
3669 } 3556 }
3670 3557 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3671 if (non_match) 3558 if (__glibc_unlikely (mbcset == NULL))
3672 { 3559 {
3673#ifdef RE_ENABLE_I18N 3560 re_free (sbcset);
3674 mbcset->non_match = 1; 3561 *err = REG_ESPACE;
3675#endif /* not RE_ENABLE_I18N */ 3562 return NULL;
3676 } 3563 }
3564 mbcset->non_match = non_match;
3677 3565
3678 /* We don't care the syntax in this case. */ 3566 /* We don't care the syntax in this case. */
3679 ret = build_charclass (trans, sbcset, 3567 ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0);
3680#ifdef RE_ENABLE_I18N
3681 mbcset, &alloc,
3682#endif /* RE_ENABLE_I18N */
3683 class_name, 0);
3684 3568
3685 if (BE (ret != REG_NOERROR, 0)) 3569 if (__glibc_unlikely (ret != REG_NOERROR))
3686 { 3570 {
3687 re_free (sbcset); 3571 re_free (sbcset);
3688#ifdef RE_ENABLE_I18N
3689 free_charset (mbcset); 3572 free_charset (mbcset);
3690#endif /* RE_ENABLE_I18N */
3691 *err = ret; 3573 *err = ret;
3692 return NULL; 3574 return NULL;
3693 } 3575 }
@@ -3699,20 +3581,16 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3699 if (non_match) 3581 if (non_match)
3700 bitset_not (sbcset); 3582 bitset_not (sbcset);
3701 3583
3702#ifdef RE_ENABLE_I18N
3703 /* Ensure only single byte characters are set. */ 3584 /* Ensure only single byte characters are set. */
3704 if (dfa->mb_cur_max > 1) 3585 if (dfa->mb_cur_max > 1)
3705 bitset_mask (sbcset, dfa->sb_char); 3586 bitset_mask (sbcset, dfa->sb_char);
3706#endif
3707 3587
3708 /* Build a tree for simple bracket. */ 3588 /* Build a tree for simple bracket. */
3709 br_token.type = SIMPLE_BRACKET; 3589 re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
3710 br_token.opr.sbcset = sbcset;
3711 tree = create_token_tree (dfa, NULL, NULL, &br_token); 3590 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3712 if (BE (tree == NULL, 0)) 3591 if (__glibc_unlikely (tree == NULL))
3713 goto build_word_op_espace; 3592 goto build_word_op_espace;
3714 3593
3715#ifdef RE_ENABLE_I18N
3716 if (dfa->mb_cur_max > 1) 3594 if (dfa->mb_cur_max > 1)
3717 { 3595 {
3718 bin_tree_t *mbc_tree; 3596 bin_tree_t *mbc_tree;
@@ -3721,11 +3599,11 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3721 br_token.opr.mbcset = mbcset; 3599 br_token.opr.mbcset = mbcset;
3722 dfa->has_mb_node = 1; 3600 dfa->has_mb_node = 1;
3723 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3601 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3724 if (BE (mbc_tree == NULL, 0)) 3602 if (__glibc_unlikely (mbc_tree == NULL))
3725 goto build_word_op_espace; 3603 goto build_word_op_espace;
3726 /* Then join them by ALT node. */ 3604 /* Then join them by ALT node. */
3727 tree = create_tree (dfa, tree, mbc_tree, OP_ALT); 3605 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3728 if (BE (mbc_tree != NULL, 1)) 3606 if (__glibc_likely (mbc_tree != NULL))
3729 return tree; 3607 return tree;
3730 } 3608 }
3731 else 3609 else
@@ -3733,63 +3611,55 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3733 free_charset (mbcset); 3611 free_charset (mbcset);
3734 return tree; 3612 return tree;
3735 } 3613 }
3736#else /* not RE_ENABLE_I18N */
3737 return tree;
3738#endif /* not RE_ENABLE_I18N */
3739 3614
3740 build_word_op_espace: 3615 build_word_op_espace:
3741 re_free (sbcset); 3616 re_free (sbcset);
3742#ifdef RE_ENABLE_I18N
3743 free_charset (mbcset); 3617 free_charset (mbcset);
3744#endif /* RE_ENABLE_I18N */
3745 *err = REG_ESPACE; 3618 *err = REG_ESPACE;
3746 return NULL; 3619 return NULL;
3747} 3620}
3748 3621
3749/* This is intended for the expressions like "a{1,3}". 3622/* This is intended for the expressions like "a{1,3}".
3750 Fetch a number from 'input', and return the number. 3623 Fetch a number from 'input', and return the number.
3751 Return REG_MISSING if the number field is empty like "{,1}". 3624 Return -1 if the number field is empty like "{,1}".
3752 Return RE_DUP_MAX + 1 if the number field is too large. 3625 Return RE_DUP_MAX + 1 if the number field is too large.
3753 Return REG_ERROR if an error occurred. */ 3626 Return -2 if an error occurred. */
3754 3627
3755static Idx 3628static Idx
3756fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) 3629fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3757{ 3630{
3758 Idx num = REG_MISSING; 3631 Idx num = -1;
3759 unsigned char c; 3632 unsigned char c;
3760 while (1) 3633 while (1)
3761 { 3634 {
3762 fetch_token (token, input, syntax); 3635 fetch_token (token, input, syntax);
3763 c = token->opr.c; 3636 c = token->opr.c;
3764 if (BE (token->type == END_OF_RE, 0)) 3637 if (__glibc_unlikely (token->type == END_OF_RE))
3765 return REG_ERROR; 3638 return -2;
3766 if (token->type == OP_CLOSE_DUP_NUM || c == ',') 3639 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3767 break; 3640 break;
3768 num = ((token->type != CHARACTER || c < '0' || '9' < c 3641 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3769 || num == REG_ERROR) 3642 ? -2
3770 ? REG_ERROR 3643 : num == -1
3771 : num == REG_MISSING
3772 ? c - '0' 3644 ? c - '0'
3773 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); 3645 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
3774 } 3646 }
3775 return num; 3647 return num;
3776} 3648}
3777 3649
3778#ifdef RE_ENABLE_I18N
3779static void 3650static void
3780free_charset (re_charset_t *cset) 3651free_charset (re_charset_t *cset)
3781{ 3652{
3782 re_free (cset->mbchars); 3653 re_free (cset->mbchars);
3783# ifdef _LIBC 3654#ifdef _LIBC
3784 re_free (cset->coll_syms); 3655 re_free (cset->coll_syms);
3785 re_free (cset->equiv_classes); 3656 re_free (cset->equiv_classes);
3657#endif
3786 re_free (cset->range_starts); 3658 re_free (cset->range_starts);
3787 re_free (cset->range_ends); 3659 re_free (cset->range_ends);
3788# endif
3789 re_free (cset->char_classes); 3660 re_free (cset->char_classes);
3790 re_free (cset); 3661 re_free (cset);
3791} 3662}
3792#endif /* RE_ENABLE_I18N */
3793 3663
3794/* Functions for binary tree operation. */ 3664/* Functions for binary tree operation. */
3795 3665
@@ -3799,8 +3669,7 @@ static bin_tree_t *
3799create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3669create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3800 re_token_type_t type) 3670 re_token_type_t type)
3801{ 3671{
3802 re_token_t t; 3672 re_token_t t = { .type = type };
3803 t.type = type;
3804 return create_token_tree (dfa, left, right, &t); 3673 return create_token_tree (dfa, left, right, &t);
3805} 3674}
3806 3675
@@ -3809,7 +3678,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3809 const re_token_t *token) 3678 const re_token_t *token)
3810{ 3679{
3811 bin_tree_t *tree; 3680 bin_tree_t *tree;
3812 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) 3681 if (__glibc_unlikely (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE))
3813 { 3682 {
3814 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); 3683 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3815 3684
@@ -3829,7 +3698,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3829 tree->token.opt_subexp = 0; 3698 tree->token.opt_subexp = 0;
3830 tree->first = NULL; 3699 tree->first = NULL;
3831 tree->next = NULL; 3700 tree->next = NULL;
3832 tree->node_idx = REG_MISSING; 3701 tree->node_idx = -1;
3833 3702
3834 if (left != NULL) 3703 if (left != NULL)
3835 left->parent = tree; 3704 left->parent = tree;
@@ -3856,13 +3725,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node)
3856static void 3725static void
3857free_token (re_token_t *node) 3726free_token (re_token_t *node)
3858{ 3727{
3859#ifdef RE_ENABLE_I18N
3860 if (node->type == COMPLEX_BRACKET && node->duplicated == 0) 3728 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3861 free_charset (node->opr.mbcset); 3729 free_charset (node->opr.mbcset);
3862 else 3730 else if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3863#endif /* RE_ENABLE_I18N */ 3731 re_free (node->opr.sbcset);
3864 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3865 re_free (node->opr.sbcset);
3866} 3732}
3867 3733
3868/* Worker function for tree walking. Free the allocated memory inside NODE 3734/* Worker function for tree walking. Free the allocated memory inside NODE