summaryrefslogtreecommitdiffstats
path: root/gl/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regcomp.c')
-rw-r--r--gl/regcomp.c607
1 files changed, 301 insertions, 306 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c
index f0b2e522..887e5b50 100644
--- a/gl/regcomp.c
+++ b/gl/regcomp.c
@@ -1,21 +1,25 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc. 2 Copyright (C) 2002-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
6 The GNU C Library is free software; you can redistribute it and/or 6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public 7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 3 of the License, or (at your option) any later version. 9 version 2.1 of the License, or (at your option) any later version.
10 10
11 The GNU C Library is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details. 14 Lesser General Public License for more details.
15 15
16 You should have received a copy of the GNU General Public 16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see 17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */ 18 <https://www.gnu.org/licenses/>. */
19
20#ifdef _LIBC
21# include <locale/weight.h>
22#endif
19 23
20static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, 24static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
21 size_t length, reg_syntax_t syntax); 25 size_t length, reg_syntax_t syntax);
@@ -55,7 +59,7 @@ static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
55static Idx fetch_number (re_string_t *input, re_token_t *token, 59static Idx fetch_number (re_string_t *input, re_token_t *token,
56 reg_syntax_t syntax); 60 reg_syntax_t syntax);
57static int peek_token (re_token_t *token, re_string_t *input, 61static int peek_token (re_token_t *token, re_string_t *input,
58 reg_syntax_t syntax) internal_function; 62 reg_syntax_t syntax);
59static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, 63static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
60 reg_syntax_t syntax, reg_errcode_t *err); 64 reg_syntax_t syntax, reg_errcode_t *err);
61static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, 65static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
@@ -149,9 +153,9 @@ static const char __re_error_msgid[] =
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */ 153 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
150 "\0" 154 "\0"
151#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 155#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ 156 gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */
153 "\0" 157 "\0"
154#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") 158#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=")
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ 159 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
156 "\0" 160 "\0"
157#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") 161#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
@@ -209,17 +213,9 @@ static const size_t __re_error_msgid_idx[] =
209 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields 213 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
210 are set in BUFP on entry. */ 214 are set in BUFP on entry. */
211 215
212#ifdef _LIBC
213const char *
214re_compile_pattern (pattern, length, bufp)
215 const char *pattern;
216 size_t length;
217 struct re_pattern_buffer *bufp;
218#else /* size_t might promote */
219const char * 216const char *
220re_compile_pattern (const char *pattern, size_t length, 217re_compile_pattern (const char *pattern, size_t length,
221 struct re_pattern_buffer *bufp) 218 struct re_pattern_buffer *bufp)
222#endif
223{ 219{
224 reg_errcode_t ret; 220 reg_errcode_t ret;
225 221
@@ -237,9 +233,7 @@ re_compile_pattern (const char *pattern, size_t length,
237 return NULL; 233 return NULL;
238 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 234 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
239} 235}
240#ifdef _LIBC
241weak_alias (__re_compile_pattern, re_compile_pattern) 236weak_alias (__re_compile_pattern, re_compile_pattern)
242#endif
243 237
244/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can 238/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
245 also be assigned to arbitrarily: each pattern buffer stores its own 239 also be assigned to arbitrarily: each pattern buffer stores its own
@@ -257,21 +251,17 @@ reg_syntax_t re_syntax_options;
257 defined in regex.h. We return the old syntax. */ 251 defined in regex.h. We return the old syntax. */
258 252
259reg_syntax_t 253reg_syntax_t
260re_set_syntax (syntax) 254re_set_syntax (reg_syntax_t syntax)
261 reg_syntax_t syntax;
262{ 255{
263 reg_syntax_t ret = re_syntax_options; 256 reg_syntax_t ret = re_syntax_options;
264 257
265 re_syntax_options = syntax; 258 re_syntax_options = syntax;
266 return ret; 259 return ret;
267} 260}
268#ifdef _LIBC
269weak_alias (__re_set_syntax, re_set_syntax) 261weak_alias (__re_set_syntax, re_set_syntax)
270#endif
271 262
272int 263int
273re_compile_fastmap (bufp) 264re_compile_fastmap (struct re_pattern_buffer *bufp)
274 struct re_pattern_buffer *bufp;
275{ 265{
276 re_dfa_t *dfa = bufp->buffer; 266 re_dfa_t *dfa = bufp->buffer;
277 char *fastmap = bufp->fastmap; 267 char *fastmap = bufp->fastmap;
@@ -287,9 +277,7 @@ re_compile_fastmap (bufp)
287 bufp->fastmap_accurate = 1; 277 bufp->fastmap_accurate = 1;
288 return 0; 278 return 0;
289} 279}
290#ifdef _LIBC
291weak_alias (__re_compile_fastmap, re_compile_fastmap) 280weak_alias (__re_compile_fastmap, re_compile_fastmap)
292#endif
293 281
294static inline void 282static inline void
295__attribute__ ((always_inline)) 283__attribute__ ((always_inline))
@@ -335,7 +323,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
335 memset (&state, '\0', sizeof (state)); 323 memset (&state, '\0', sizeof (state));
336 if (__mbrtowc (&wc, (const char *) buf, p - buf, 324 if (__mbrtowc (&wc, (const char *) buf, p - buf,
337 &state) == p - buf 325 &state) == p - buf
338 && (__wcrtomb ((char *) buf, towlower (wc), &state) 326 && (__wcrtomb ((char *) buf, __towlower (wc), &state)
339 != (size_t) -1)) 327 != (size_t) -1))
340 re_set_fastmap (fastmap, false, buf[0]); 328 re_set_fastmap (fastmap, false, buf[0]);
341 } 329 }
@@ -411,7 +399,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
411 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); 399 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
412 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 400 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
413 { 401 {
414 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) 402 if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
415 != (size_t) -1) 403 != (size_t) -1)
416 re_set_fastmap (fastmap, false, *(unsigned char *) buf); 404 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
417 } 405 }
@@ -470,10 +458,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
470 the return codes and their meanings.) */ 458 the return codes and their meanings.) */
471 459
472int 460int
473regcomp (preg, pattern, cflags) 461regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
474 regex_t *_Restrict_ preg;
475 const char *_Restrict_ pattern;
476 int cflags;
477{ 462{
478 reg_errcode_t ret; 463 reg_errcode_t ret;
479 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED 464 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
@@ -485,7 +470,7 @@ regcomp (preg, pattern, cflags)
485 470
486 /* Try to allocate space for the fastmap. */ 471 /* Try to allocate space for the fastmap. */
487 preg->fastmap = re_malloc (char, SBC_MAX); 472 preg->fastmap = re_malloc (char, SBC_MAX);
488 if (BE (preg->fastmap == NULL, 0)) 473 if (__glibc_unlikely (preg->fastmap == NULL))
489 return REG_ESPACE; 474 return REG_ESPACE;
490 475
491 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; 476 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
@@ -511,7 +496,7 @@ regcomp (preg, pattern, cflags)
511 ret = REG_EPAREN; 496 ret = REG_EPAREN;
512 497
513 /* We have already checked preg->fastmap != NULL. */ 498 /* We have already checked preg->fastmap != NULL. */
514 if (BE (ret == REG_NOERROR, 1)) 499 if (__glibc_likely (ret == REG_NOERROR))
515 /* Compute the fastmap now, since regexec cannot modify the pattern 500 /* Compute the fastmap now, since regexec cannot modify the pattern
516 buffer. This function never fails in this implementation. */ 501 buffer. This function never fails in this implementation. */
517 (void) re_compile_fastmap (preg); 502 (void) re_compile_fastmap (preg);
@@ -524,32 +509,21 @@ regcomp (preg, pattern, cflags)
524 509
525 return (int) ret; 510 return (int) ret;
526} 511}
527#ifdef _LIBC 512libc_hidden_def (__regcomp)
528weak_alias (__regcomp, regcomp) 513weak_alias (__regcomp, regcomp)
529#endif
530 514
531/* Returns a message corresponding to an error code, ERRCODE, returned 515/* Returns a message corresponding to an error code, ERRCODE, returned
532 from either regcomp or regexec. We don't use PREG here. */ 516 from either regcomp or regexec. We don't use PREG here. */
533 517
534#ifdef _LIBC
535size_t
536regerror (errcode, preg, errbuf, errbuf_size)
537 int errcode;
538 const regex_t *_Restrict_ preg;
539 char *_Restrict_ errbuf;
540 size_t errbuf_size;
541#else /* size_t might promote */
542size_t 518size_t
543regerror (int errcode, const regex_t *_Restrict_ preg, 519regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf,
544 char *_Restrict_ errbuf, size_t errbuf_size) 520 size_t errbuf_size)
545#endif
546{ 521{
547 const char *msg; 522 const char *msg;
548 size_t msg_size; 523 size_t msg_size;
524 int nerrcodes = sizeof __re_error_msgid_idx / sizeof __re_error_msgid_idx[0];
549 525
550 if (BE (errcode < 0 526 if (__glibc_unlikely (errcode < 0 || errcode >= nerrcodes))
551 || errcode >= (int) (sizeof (__re_error_msgid_idx)
552 / sizeof (__re_error_msgid_idx[0])), 0))
553 /* Only error codes returned by the rest of the code should be passed 527 /* Only error codes returned by the rest of the code should be passed
554 to this routine. If we are given anything else, or if other regex 528 to this routine. If we are given anything else, or if other regex
555 code generates an invalid error code, then the program has a bug. 529 code generates an invalid error code, then the program has a bug.
@@ -560,10 +534,10 @@ regerror (int errcode, const regex_t *_Restrict_ preg,
560 534
561 msg_size = strlen (msg) + 1; /* Includes the null. */ 535 msg_size = strlen (msg) + 1; /* Includes the null. */
562 536
563 if (BE (errbuf_size != 0, 1)) 537 if (__glibc_likely (errbuf_size != 0))
564 { 538 {
565 size_t cpy_size = msg_size; 539 size_t cpy_size = msg_size;
566 if (BE (msg_size > errbuf_size, 0)) 540 if (__glibc_unlikely (msg_size > errbuf_size))
567 { 541 {
568 cpy_size = errbuf_size - 1; 542 cpy_size = errbuf_size - 1;
569 errbuf[cpy_size] = '\0'; 543 errbuf[cpy_size] = '\0';
@@ -573,9 +547,7 @@ regerror (int errcode, const regex_t *_Restrict_ preg,
573 547
574 return msg_size; 548 return msg_size;
575} 549}
576#ifdef _LIBC
577weak_alias (__regerror, regerror) 550weak_alias (__regerror, regerror)
578#endif
579 551
580 552
581#ifdef RE_ENABLE_I18N 553#ifdef RE_ENABLE_I18N
@@ -586,7 +558,7 @@ weak_alias (__regerror, regerror)
586static const bitset_t utf8_sb_map = 558static const bitset_t utf8_sb_map =
587{ 559{
588 /* Set the first 128 bits. */ 560 /* Set the first 128 bits. */
589# if defined __GNUC__ && !defined __STRICT_ANSI__ 561# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
590 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 562 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
591# else 563# else
592# if 4 * BITSET_WORD_BITS < ASCII_CHARS 564# if 4 * BITSET_WORD_BITS < ASCII_CHARS
@@ -658,11 +630,10 @@ free_dfa_content (re_dfa_t *dfa)
658/* Free dynamically allocated space used by PREG. */ 630/* Free dynamically allocated space used by PREG. */
659 631
660void 632void
661regfree (preg) 633regfree (regex_t *preg)
662 regex_t *preg;
663{ 634{
664 re_dfa_t *dfa = preg->buffer; 635 re_dfa_t *dfa = preg->buffer;
665 if (BE (dfa != NULL, 1)) 636 if (__glibc_likely (dfa != NULL))
666 { 637 {
667 lock_fini (dfa->lock); 638 lock_fini (dfa->lock);
668 free_dfa_content (dfa); 639 free_dfa_content (dfa);
@@ -676,9 +647,8 @@ regfree (preg)
676 re_free (preg->translate); 647 re_free (preg->translate);
677 preg->translate = NULL; 648 preg->translate = NULL;
678} 649}
679#ifdef _LIBC 650libc_hidden_def (__regfree)
680weak_alias (__regfree, regfree) 651weak_alias (__regfree, regfree)
681#endif
682 652
683/* Entry points compatible with 4.2 BSD regex library. We don't define 653/* Entry points compatible with 4.2 BSD regex library. We don't define
684 them unless specifically requested. */ 654 them unless specifically requested. */
@@ -695,8 +665,7 @@ char *
695 regcomp/regexec above without link errors. */ 665 regcomp/regexec above without link errors. */
696weak_function 666weak_function
697# endif 667# endif
698re_comp (s) 668re_comp (const char *s)
699 const char *s;
700{ 669{
701 reg_errcode_t ret; 670 reg_errcode_t ret;
702 char *fastmap; 671 char *fastmap;
@@ -719,7 +688,7 @@ re_comp (s)
719 688
720 if (re_comp_buf.fastmap == NULL) 689 if (re_comp_buf.fastmap == NULL)
721 { 690 {
722 re_comp_buf.fastmap = (char *) malloc (SBC_MAX); 691 re_comp_buf.fastmap = re_malloc (char, SBC_MAX);
723 if (re_comp_buf.fastmap == NULL) 692 if (re_comp_buf.fastmap == NULL)
724 return (char *) gettext (__re_error_msgid 693 return (char *) gettext (__re_error_msgid
725 + __re_error_msgid_idx[(int) REG_ESPACE]); 694 + __re_error_msgid_idx[(int) REG_ESPACE]);
@@ -772,7 +741,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
772 741
773 /* Initialize the dfa. */ 742 /* Initialize the dfa. */
774 dfa = preg->buffer; 743 dfa = preg->buffer;
775 if (BE (preg->allocated < sizeof (re_dfa_t), 0)) 744 if (__glibc_unlikely (preg->allocated < sizeof (re_dfa_t)))
776 { 745 {
777 /* If zero allocated, but buffer is non-null, try to realloc 746 /* If zero allocated, but buffer is non-null, try to realloc
778 enough space. This loses if buffer's address is bogus, but 747 enough space. This loses if buffer's address is bogus, but
@@ -787,9 +756,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
787 preg->used = sizeof (re_dfa_t); 756 preg->used = sizeof (re_dfa_t);
788 757
789 err = init_dfa (dfa, length); 758 err = init_dfa (dfa, length);
790 if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0)) 759 if (__glibc_unlikely (err == REG_NOERROR && lock_init (dfa->lock) != 0))
791 err = REG_ESPACE; 760 err = REG_ESPACE;
792 if (BE (err != REG_NOERROR, 0)) 761 if (__glibc_unlikely (err != REG_NOERROR))
793 { 762 {
794 free_dfa_content (dfa); 763 free_dfa_content (dfa);
795 preg->buffer = NULL; 764 preg->buffer = NULL;
@@ -804,7 +773,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
804 773
805 err = re_string_construct (&regexp, pattern, length, preg->translate, 774 err = re_string_construct (&regexp, pattern, length, preg->translate,
806 (syntax & RE_ICASE) != 0, dfa); 775 (syntax & RE_ICASE) != 0, dfa);
807 if (BE (err != REG_NOERROR, 0)) 776 if (__glibc_unlikely (err != REG_NOERROR))
808 { 777 {
809 re_compile_internal_free_return: 778 re_compile_internal_free_return:
810 free_workarea_compile (preg); 779 free_workarea_compile (preg);
@@ -819,12 +788,12 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
819 /* Parse the regular expression, and build a structure tree. */ 788 /* Parse the regular expression, and build a structure tree. */
820 preg->re_nsub = 0; 789 preg->re_nsub = 0;
821 dfa->str_tree = parse (&regexp, preg, syntax, &err); 790 dfa->str_tree = parse (&regexp, preg, syntax, &err);
822 if (BE (dfa->str_tree == NULL, 0)) 791 if (__glibc_unlikely (dfa->str_tree == NULL))
823 goto re_compile_internal_free_return; 792 goto re_compile_internal_free_return;
824 793
825 /* Analyze the tree and create the nfa. */ 794 /* Analyze the tree and create the nfa. */
826 err = analyze (preg); 795 err = analyze (preg);
827 if (BE (err != REG_NOERROR, 0)) 796 if (__glibc_unlikely (err != REG_NOERROR))
828 goto re_compile_internal_free_return; 797 goto re_compile_internal_free_return;
829 798
830#ifdef RE_ENABLE_I18N 799#ifdef RE_ENABLE_I18N
@@ -840,7 +809,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
840 free_workarea_compile (preg); 809 free_workarea_compile (preg);
841 re_string_destruct (&regexp); 810 re_string_destruct (&regexp);
842 811
843 if (BE (err != REG_NOERROR, 0)) 812 if (__glibc_unlikely (err != REG_NOERROR))
844 { 813 {
845 lock_fini (dfa->lock); 814 lock_fini (dfa->lock);
846 free_dfa_content (dfa); 815 free_dfa_content (dfa);
@@ -882,7 +851,8 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
882 calculation below, and for similar doubling calculations 851 calculation below, and for similar doubling calculations
883 elsewhere. And it's <= rather than <, because some of the 852 elsewhere. And it's <= rather than <, because some of the
884 doubling calculations add 1 afterwards. */ 853 doubling calculations add 1 afterwards. */
885 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0)) 854 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2
855 <= pat_len))
886 return REG_ESPACE; 856 return REG_ESPACE;
887 857
888 dfa->nodes_alloc = pat_len + 1; 858 dfa->nodes_alloc = pat_len + 1;
@@ -926,7 +896,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
926 int i, j, ch; 896 int i, j, ch;
927 897
928 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 898 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
929 if (BE (dfa->sb_char == NULL, 0)) 899 if (__glibc_unlikely (dfa->sb_char == NULL))
930 return REG_ESPACE; 900 return REG_ESPACE;
931 901
932 /* Set the bits corresponding to single byte chars. */ 902 /* Set the bits corresponding to single byte chars. */
@@ -945,7 +915,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
945 } 915 }
946#endif 916#endif
947 917
948 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) 918 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
949 return REG_ESPACE; 919 return REG_ESPACE;
950 return REG_NOERROR; 920 return REG_NOERROR;
951} 921}
@@ -955,21 +925,23 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
955 character used by some operators like "\<", "\>", etc. */ 925 character used by some operators like "\<", "\>", etc. */
956 926
957static void 927static void
958internal_function
959init_word_char (re_dfa_t *dfa) 928init_word_char (re_dfa_t *dfa)
960{ 929{
961 int i = 0; 930 int i = 0;
962 int j; 931 int j;
963 int ch = 0; 932 int ch = 0;
964 dfa->word_ops_used = 1; 933 dfa->word_ops_used = 1;
965 if (BE (dfa->map_notascii == 0, 1)) 934 if (__glibc_likely (dfa->map_notascii == 0))
966 { 935 {
936 /* Avoid uint32_t and uint64_t as some non-GCC platforms lack
937 them, an issue when this code is used in Gnulib. */
967 bitset_word_t bits0 = 0x00000000; 938 bitset_word_t bits0 = 0x00000000;
968 bitset_word_t bits1 = 0x03ff0000; 939 bitset_word_t bits1 = 0x03ff0000;
969 bitset_word_t bits2 = 0x87fffffe; 940 bitset_word_t bits2 = 0x87fffffe;
970 bitset_word_t bits3 = 0x07fffffe; 941 bitset_word_t bits3 = 0x07fffffe;
971 if (BITSET_WORD_BITS == 64) 942 if (BITSET_WORD_BITS == 64)
972 { 943 {
944 /* Pacify gcc -Woverflow on 32-bit platformns. */
973 dfa->word_char[0] = bits1 << 31 << 1 | bits0; 945 dfa->word_char[0] = bits1 << 31 << 1 | bits0;
974 dfa->word_char[1] = bits3 << 31 << 1 | bits2; 946 dfa->word_char[1] = bits3 << 31 << 1 | bits2;
975 i = 2; 947 i = 2;
@@ -986,7 +958,7 @@ init_word_char (re_dfa_t *dfa)
986 goto general_case; 958 goto general_case;
987 ch = 128; 959 ch = 128;
988 960
989 if (BE (dfa->is_utf8, 1)) 961 if (__glibc_likely (dfa->is_utf8))
990 { 962 {
991 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); 963 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
992 return; 964 return;
@@ -1033,7 +1005,7 @@ create_initial_state (re_dfa_t *dfa)
1033 first = dfa->str_tree->first->node_idx; 1005 first = dfa->str_tree->first->node_idx;
1034 dfa->init_node = first; 1006 dfa->init_node = first;
1035 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); 1007 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1036 if (BE (err != REG_NOERROR, 0)) 1008 if (__glibc_unlikely (err != REG_NOERROR))
1037 return err; 1009 return err;
1038 1010
1039 /* The back-references which are in initial states can epsilon transit, 1011 /* The back-references which are in initial states can epsilon transit,
@@ -1077,7 +1049,7 @@ create_initial_state (re_dfa_t *dfa)
1077 /* It must be the first time to invoke acquire_state. */ 1049 /* It must be the first time to invoke acquire_state. */
1078 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); 1050 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1079 /* We don't check ERR here, since the initial state must not be NULL. */ 1051 /* We don't check ERR here, since the initial state must not be NULL. */
1080 if (BE (dfa->init_state == NULL, 0)) 1052 if (__glibc_unlikely (dfa->init_state == NULL))
1081 return err; 1053 return err;
1082 if (dfa->init_state->has_constraint) 1054 if (dfa->init_state->has_constraint)
1083 { 1055 {
@@ -1089,8 +1061,9 @@ create_initial_state (re_dfa_t *dfa)
1089 &init_nodes, 1061 &init_nodes,
1090 CONTEXT_NEWLINE 1062 CONTEXT_NEWLINE
1091 | CONTEXT_BEGBUF); 1063 | CONTEXT_BEGBUF);
1092 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL 1064 if (__glibc_unlikely (dfa->init_state_word == NULL
1093 || dfa->init_state_begbuf == NULL, 0)) 1065 || dfa->init_state_nl == NULL
1066 || dfa->init_state_begbuf == NULL))
1094 return err; 1067 return err;
1095 } 1068 }
1096 else 1069 else
@@ -1197,8 +1170,8 @@ analyze (regex_t *preg)
1197 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); 1170 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1198 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); 1171 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1199 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); 1172 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1200 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL 1173 if (__glibc_unlikely (dfa->nexts == NULL || dfa->org_indices == NULL
1201 || dfa->eclosures == NULL, 0)) 1174 || dfa->edests == NULL || dfa->eclosures == NULL))
1202 return REG_ESPACE; 1175 return REG_ESPACE;
1203 1176
1204 dfa->subexp_map = re_malloc (Idx, preg->re_nsub); 1177 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
@@ -1213,23 +1186,23 @@ analyze (regex_t *preg)
1213 break; 1186 break;
1214 if (i == preg->re_nsub) 1187 if (i == preg->re_nsub)
1215 { 1188 {
1216 free (dfa->subexp_map); 1189 re_free (dfa->subexp_map);
1217 dfa->subexp_map = NULL; 1190 dfa->subexp_map = NULL;
1218 } 1191 }
1219 } 1192 }
1220 1193
1221 ret = postorder (dfa->str_tree, lower_subexps, preg); 1194 ret = postorder (dfa->str_tree, lower_subexps, preg);
1222 if (BE (ret != REG_NOERROR, 0)) 1195 if (__glibc_unlikely (ret != REG_NOERROR))
1223 return ret; 1196 return ret;
1224 ret = postorder (dfa->str_tree, calc_first, dfa); 1197 ret = postorder (dfa->str_tree, calc_first, dfa);
1225 if (BE (ret != REG_NOERROR, 0)) 1198 if (__glibc_unlikely (ret != REG_NOERROR))
1226 return ret; 1199 return ret;
1227 preorder (dfa->str_tree, calc_next, dfa); 1200 preorder (dfa->str_tree, calc_next, dfa);
1228 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); 1201 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1229 if (BE (ret != REG_NOERROR, 0)) 1202 if (__glibc_unlikely (ret != REG_NOERROR))
1230 return ret; 1203 return ret;
1231 ret = calc_eclosure (dfa); 1204 ret = calc_eclosure (dfa);
1232 if (BE (ret != REG_NOERROR, 0)) 1205 if (__glibc_unlikely (ret != REG_NOERROR))
1233 return ret; 1206 return ret;
1234 1207
1235 /* We only need this during the prune_impossible_nodes pass in regexec.c; 1208 /* We only need this during the prune_impossible_nodes pass in regexec.c;
@@ -1238,7 +1211,7 @@ analyze (regex_t *preg)
1238 || dfa->nbackref) 1211 || dfa->nbackref)
1239 { 1212 {
1240 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); 1213 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1241 if (BE (dfa->inveclosures == NULL, 0)) 1214 if (__glibc_unlikely (dfa->inveclosures == NULL))
1242 return REG_ESPACE; 1215 return REG_ESPACE;
1243 ret = calc_inveclosure (dfa); 1216 ret = calc_inveclosure (dfa);
1244 } 1217 }
@@ -1268,7 +1241,7 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1268 do 1241 do
1269 { 1242 {
1270 reg_errcode_t err = fn (extra, node); 1243 reg_errcode_t err = fn (extra, node);
1271 if (BE (err != REG_NOERROR, 0)) 1244 if (__glibc_unlikely (err != REG_NOERROR))
1272 return err; 1245 return err;
1273 if (node->parent == NULL) 1246 if (node->parent == NULL)
1274 return REG_NOERROR; 1247 return REG_NOERROR;
@@ -1290,7 +1263,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1290 for (node = root; ; ) 1263 for (node = root; ; )
1291 { 1264 {
1292 reg_errcode_t err = fn (extra, node); 1265 reg_errcode_t err = fn (extra, node);
1293 if (BE (err != REG_NOERROR, 0)) 1266 if (__glibc_unlikely (err != REG_NOERROR))
1294 return err; 1267 return err;
1295 1268
1296 /* Go to the left node, or up and to the right. */ 1269 /* Go to the left node, or up and to the right. */
@@ -1391,7 +1364,8 @@ lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1391 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); 1364 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1392 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; 1365 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1393 tree = create_tree (dfa, op, tree1, CONCAT); 1366 tree = create_tree (dfa, op, tree1, CONCAT);
1394 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) 1367 if (__glibc_unlikely (tree == NULL || tree1 == NULL
1368 || op == NULL || cls == NULL))
1395 { 1369 {
1396 *err = REG_ESPACE; 1370 *err = REG_ESPACE;
1397 return NULL; 1371 return NULL;
@@ -1417,7 +1391,7 @@ calc_first (void *extra, bin_tree_t *node)
1417 { 1391 {
1418 node->first = node; 1392 node->first = node;
1419 node->node_idx = re_dfa_add_node (dfa, node->token); 1393 node->node_idx = re_dfa_add_node (dfa, node->token);
1420 if (BE (node->node_idx == REG_MISSING, 0)) 1394 if (__glibc_unlikely (node->node_idx == -1))
1421 return REG_ESPACE; 1395 return REG_ESPACE;
1422 if (node->token.type == ANCHOR) 1396 if (node->token.type == ANCHOR)
1423 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; 1397 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
@@ -1462,7 +1436,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1462 break; 1436 break;
1463 1437
1464 case END_OF_RE: 1438 case END_OF_RE:
1465 assert (node->next == NULL); 1439 DEBUG_ASSERT (node->next == NULL);
1466 break; 1440 break;
1467 1441
1468 case OP_DUP_ASTERISK: 1442 case OP_DUP_ASTERISK:
@@ -1478,8 +1452,8 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1478 right = node->right->first->node_idx; 1452 right = node->right->first->node_idx;
1479 else 1453 else
1480 right = node->next->node_idx; 1454 right = node->next->node_idx;
1481 assert (REG_VALID_INDEX (left)); 1455 DEBUG_ASSERT (left > -1);
1482 assert (REG_VALID_INDEX (right)); 1456 DEBUG_ASSERT (right > -1);
1483 err = re_node_set_init_2 (dfa->edests + idx, left, right); 1457 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1484 } 1458 }
1485 break; 1459 break;
@@ -1497,7 +1471,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1497 break; 1471 break;
1498 1472
1499 default: 1473 default:
1500 assert (!IS_EPSILON_NODE (node->token.type)); 1474 DEBUG_ASSERT (!IS_EPSILON_NODE (node->token.type));
1501 dfa->nexts[idx] = node->next->node_idx; 1475 dfa->nexts[idx] = node->next->node_idx;
1502 break; 1476 break;
1503 } 1477 }
@@ -1510,7 +1484,6 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1510 to their own constraint. */ 1484 to their own constraint. */
1511 1485
1512static reg_errcode_t 1486static reg_errcode_t
1513internal_function
1514duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, 1487duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1515 Idx root_node, unsigned int init_constraint) 1488 Idx root_node, unsigned int init_constraint)
1516{ 1489{
@@ -1529,11 +1502,11 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1529 org_dest = dfa->nexts[org_node]; 1502 org_dest = dfa->nexts[org_node];
1530 re_node_set_empty (dfa->edests + clone_node); 1503 re_node_set_empty (dfa->edests + clone_node);
1531 clone_dest = duplicate_node (dfa, org_dest, constraint); 1504 clone_dest = duplicate_node (dfa, org_dest, constraint);
1532 if (BE (clone_dest == REG_MISSING, 0)) 1505 if (__glibc_unlikely (clone_dest == -1))
1533 return REG_ESPACE; 1506 return REG_ESPACE;
1534 dfa->nexts[clone_node] = dfa->nexts[org_node]; 1507 dfa->nexts[clone_node] = dfa->nexts[org_node];
1535 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1508 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1536 if (BE (! ok, 0)) 1509 if (__glibc_unlikely (! ok))
1537 return REG_ESPACE; 1510 return REG_ESPACE;
1538 } 1511 }
1539 else if (dfa->edests[org_node].nelem == 0) 1512 else if (dfa->edests[org_node].nelem == 0)
@@ -1555,17 +1528,17 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1555 if (org_node == root_node && clone_node != org_node) 1528 if (org_node == root_node && clone_node != org_node)
1556 { 1529 {
1557 ok = re_node_set_insert (dfa->edests + clone_node, org_dest); 1530 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
1558 if (BE (! ok, 0)) 1531 if (__glibc_unlikely (! ok))
1559 return REG_ESPACE; 1532 return REG_ESPACE;
1560 break; 1533 break;
1561 } 1534 }
1562 /* In case the node has another constraint, append it. */ 1535 /* In case the node has another constraint, append it. */
1563 constraint |= dfa->nodes[org_node].constraint; 1536 constraint |= dfa->nodes[org_node].constraint;
1564 clone_dest = duplicate_node (dfa, org_dest, constraint); 1537 clone_dest = duplicate_node (dfa, org_dest, constraint);
1565 if (BE (clone_dest == REG_MISSING, 0)) 1538 if (__glibc_unlikely (clone_dest == -1))
1566 return REG_ESPACE; 1539 return REG_ESPACE;
1567 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1540 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1568 if (BE (! ok, 0)) 1541 if (__glibc_unlikely (! ok))
1569 return REG_ESPACE; 1542 return REG_ESPACE;
1570 } 1543 }
1571 else /* dfa->edests[org_node].nelem == 2 */ 1544 else /* dfa->edests[org_node].nelem == 2 */
@@ -1576,19 +1549,19 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1576 re_node_set_empty (dfa->edests + clone_node); 1549 re_node_set_empty (dfa->edests + clone_node);
1577 /* Search for a duplicated node which satisfies the constraint. */ 1550 /* Search for a duplicated node which satisfies the constraint. */
1578 clone_dest = search_duplicated_node (dfa, org_dest, constraint); 1551 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1579 if (clone_dest == REG_MISSING) 1552 if (clone_dest == -1)
1580 { 1553 {
1581 /* There is no such duplicated node, create a new one. */ 1554 /* There is no such duplicated node, create a new one. */
1582 reg_errcode_t err; 1555 reg_errcode_t err;
1583 clone_dest = duplicate_node (dfa, org_dest, constraint); 1556 clone_dest = duplicate_node (dfa, org_dest, constraint);
1584 if (BE (clone_dest == REG_MISSING, 0)) 1557 if (__glibc_unlikely (clone_dest == -1))
1585 return REG_ESPACE; 1558 return REG_ESPACE;
1586 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1559 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1587 if (BE (! ok, 0)) 1560 if (__glibc_unlikely (! ok))
1588 return REG_ESPACE; 1561 return REG_ESPACE;
1589 err = duplicate_node_closure (dfa, org_dest, clone_dest, 1562 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1590 root_node, constraint); 1563 root_node, constraint);
1591 if (BE (err != REG_NOERROR, 0)) 1564 if (__glibc_unlikely (err != REG_NOERROR))
1592 return err; 1565 return err;
1593 } 1566 }
1594 else 1567 else
@@ -1596,16 +1569,16 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1596 /* There is a duplicated node which satisfies the constraint, 1569 /* There is a duplicated node which satisfies the constraint,
1597 use it to avoid infinite loop. */ 1570 use it to avoid infinite loop. */
1598 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1571 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1599 if (BE (! ok, 0)) 1572 if (__glibc_unlikely (! ok))
1600 return REG_ESPACE; 1573 return REG_ESPACE;
1601 } 1574 }
1602 1575
1603 org_dest = dfa->edests[org_node].elems[1]; 1576 org_dest = dfa->edests[org_node].elems[1];
1604 clone_dest = duplicate_node (dfa, org_dest, constraint); 1577 clone_dest = duplicate_node (dfa, org_dest, constraint);
1605 if (BE (clone_dest == REG_MISSING, 0)) 1578 if (__glibc_unlikely (clone_dest == -1))
1606 return REG_ESPACE; 1579 return REG_ESPACE;
1607 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1580 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1608 if (BE (! ok, 0)) 1581 if (__glibc_unlikely (! ok))
1609 return REG_ESPACE; 1582 return REG_ESPACE;
1610 } 1583 }
1611 org_node = org_dest; 1584 org_node = org_dest;
@@ -1628,18 +1601,18 @@ search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1628 && constraint == dfa->nodes[idx].constraint) 1601 && constraint == dfa->nodes[idx].constraint)
1629 return idx; /* Found. */ 1602 return idx; /* Found. */
1630 } 1603 }
1631 return REG_MISSING; /* Not found. */ 1604 return -1; /* Not found. */
1632} 1605}
1633 1606
1634/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. 1607/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1635 Return the index of the new node, or REG_MISSING if insufficient storage is 1608 Return the index of the new node, or -1 if insufficient storage is
1636 available. */ 1609 available. */
1637 1610
1638static Idx 1611static Idx
1639duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) 1612duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
1640{ 1613{
1641 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1614 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1642 if (BE (dup_idx != REG_MISSING, 1)) 1615 if (__glibc_likely (dup_idx != -1))
1643 { 1616 {
1644 dfa->nodes[dup_idx].constraint = constraint; 1617 dfa->nodes[dup_idx].constraint = constraint;
1645 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; 1618 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
@@ -1665,7 +1638,7 @@ calc_inveclosure (re_dfa_t *dfa)
1665 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) 1638 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1666 { 1639 {
1667 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); 1640 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1668 if (BE (! ok, 0)) 1641 if (__glibc_unlikely (! ok))
1669 return REG_ESPACE; 1642 return REG_ESPACE;
1670 } 1643 }
1671 } 1644 }
@@ -1680,9 +1653,7 @@ calc_eclosure (re_dfa_t *dfa)
1680{ 1653{
1681 Idx node_idx; 1654 Idx node_idx;
1682 bool incomplete; 1655 bool incomplete;
1683#ifdef DEBUG 1656 DEBUG_ASSERT (dfa->nodes_len > 0);
1684 assert (dfa->nodes_len > 0);
1685#endif
1686 incomplete = false; 1657 incomplete = false;
1687 /* For each nodes, calculate epsilon closure. */ 1658 /* For each nodes, calculate epsilon closure. */
1688 for (node_idx = 0; ; ++node_idx) 1659 for (node_idx = 0; ; ++node_idx)
@@ -1697,16 +1668,14 @@ calc_eclosure (re_dfa_t *dfa)
1697 node_idx = 0; 1668 node_idx = 0;
1698 } 1669 }
1699 1670
1700#ifdef DEBUG 1671 DEBUG_ASSERT (dfa->eclosures[node_idx].nelem != -1);
1701 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1702#endif
1703 1672
1704 /* If we have already calculated, skip it. */ 1673 /* If we have already calculated, skip it. */
1705 if (dfa->eclosures[node_idx].nelem != 0) 1674 if (dfa->eclosures[node_idx].nelem != 0)
1706 continue; 1675 continue;
1707 /* Calculate epsilon closure of 'node_idx'. */ 1676 /* Calculate epsilon closure of 'node_idx'. */
1708 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); 1677 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1709 if (BE (err != REG_NOERROR, 0)) 1678 if (__glibc_unlikely (err != REG_NOERROR))
1710 return err; 1679 return err;
1711 1680
1712 if (dfa->eclosures[node_idx].nelem == 0) 1681 if (dfa->eclosures[node_idx].nelem == 0)
@@ -1726,15 +1695,17 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1726 reg_errcode_t err; 1695 reg_errcode_t err;
1727 Idx i; 1696 Idx i;
1728 re_node_set eclosure; 1697 re_node_set eclosure;
1729 bool ok;
1730 bool incomplete = false; 1698 bool incomplete = false;
1731 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); 1699 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1732 if (BE (err != REG_NOERROR, 0)) 1700 if (__glibc_unlikely (err != REG_NOERROR))
1733 return err; 1701 return err;
1734 1702
1703 /* An epsilon closure includes itself. */
1704 eclosure.elems[eclosure.nelem++] = node;
1705
1735 /* This indicates that we are calculating this node now. 1706 /* This indicates that we are calculating this node now.
1736 We reference this value to avoid infinite loop. */ 1707 We reference this value to avoid infinite loop. */
1737 dfa->eclosures[node].nelem = REG_MISSING; 1708 dfa->eclosures[node].nelem = -1;
1738 1709
1739 /* If the current node has constraints, duplicate all nodes 1710 /* If the current node has constraints, duplicate all nodes
1740 since they must inherit the constraints. */ 1711 since they must inherit the constraints. */
@@ -1744,7 +1715,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1744 { 1715 {
1745 err = duplicate_node_closure (dfa, node, node, node, 1716 err = duplicate_node_closure (dfa, node, node, node,
1746 dfa->nodes[node].constraint); 1717 dfa->nodes[node].constraint);
1747 if (BE (err != REG_NOERROR, 0)) 1718 if (__glibc_unlikely (err != REG_NOERROR))
1748 return err; 1719 return err;
1749 } 1720 }
1750 1721
@@ -1756,7 +1727,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1756 Idx edest = dfa->edests[node].elems[i]; 1727 Idx edest = dfa->edests[node].elems[i];
1757 /* If calculating the epsilon closure of 'edest' is in progress, 1728 /* If calculating the epsilon closure of 'edest' is in progress,
1758 return intermediate result. */ 1729 return intermediate result. */
1759 if (dfa->eclosures[edest].nelem == REG_MISSING) 1730 if (dfa->eclosures[edest].nelem == -1)
1760 { 1731 {
1761 incomplete = true; 1732 incomplete = true;
1762 continue; 1733 continue;
@@ -1766,14 +1737,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1766 if (dfa->eclosures[edest].nelem == 0) 1737 if (dfa->eclosures[edest].nelem == 0)
1767 { 1738 {
1768 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); 1739 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1769 if (BE (err != REG_NOERROR, 0)) 1740 if (__glibc_unlikely (err != REG_NOERROR))
1770 return err; 1741 return err;
1771 } 1742 }
1772 else 1743 else
1773 eclosure_elem = dfa->eclosures[edest]; 1744 eclosure_elem = dfa->eclosures[edest];
1774 /* Merge the epsilon closure of 'edest'. */ 1745 /* Merge the epsilon closure of 'edest'. */
1775 err = re_node_set_merge (&eclosure, &eclosure_elem); 1746 err = re_node_set_merge (&eclosure, &eclosure_elem);
1776 if (BE (err != REG_NOERROR, 0)) 1747 if (__glibc_unlikely (err != REG_NOERROR))
1777 return err; 1748 return err;
1778 /* If the epsilon closure of 'edest' is incomplete, 1749 /* If the epsilon closure of 'edest' is incomplete,
1779 the epsilon closure of this node is also incomplete. */ 1750 the epsilon closure of this node is also incomplete. */
@@ -1784,10 +1755,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1784 } 1755 }
1785 } 1756 }
1786 1757
1787 /* An epsilon closure includes itself. */
1788 ok = re_node_set_insert (&eclosure, node);
1789 if (BE (! ok, 0))
1790 return REG_ESPACE;
1791 if (incomplete && !root) 1758 if (incomplete && !root)
1792 dfa->eclosures[node].nelem = 0; 1759 dfa->eclosures[node].nelem = 0;
1793 else 1760 else
@@ -1802,7 +1769,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1802 We must not use this function inside bracket expressions. */ 1769 We must not use this function inside bracket expressions. */
1803 1770
1804static void 1771static void
1805internal_function
1806fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) 1772fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1807{ 1773{
1808 re_string_skip_bytes (input, peek_token (result, input, syntax)); 1774 re_string_skip_bytes (input, peek_token (result, input, syntax));
@@ -1812,7 +1778,6 @@ fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1812 We must not use this function inside bracket expressions. */ 1778 We must not use this function inside bracket expressions. */
1813 1779
1814static int 1780static int
1815internal_function
1816peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1781peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1817{ 1782{
1818 unsigned char c; 1783 unsigned char c;
@@ -1829,8 +1794,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1829 token->word_char = 0; 1794 token->word_char = 0;
1830#ifdef RE_ENABLE_I18N 1795#ifdef RE_ENABLE_I18N
1831 token->mb_partial = 0; 1796 token->mb_partial = 0;
1832 if (input->mb_cur_max > 1 && 1797 if (input->mb_cur_max > 1
1833 !re_string_first_byte (input, re_string_cur_idx (input))) 1798 && !re_string_first_byte (input, re_string_cur_idx (input)))
1834 { 1799 {
1835 token->type = CHARACTER; 1800 token->type = CHARACTER;
1836 token->mb_partial = 1; 1801 token->mb_partial = 1;
@@ -2017,8 +1982,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2017 token->type = OP_PERIOD; 1982 token->type = OP_PERIOD;
2018 break; 1983 break;
2019 case '^': 1984 case '^':
2020 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && 1985 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE))
2021 re_string_cur_idx (input) != 0) 1986 && re_string_cur_idx (input) != 0)
2022 { 1987 {
2023 char prev = re_string_peek_byte (input, -1); 1988 char prev = re_string_peek_byte (input, -1);
2024 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') 1989 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
@@ -2028,8 +1993,8 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2028 token->opr.ctx_type = LINE_FIRST; 1993 token->opr.ctx_type = LINE_FIRST;
2029 break; 1994 break;
2030 case '$': 1995 case '$':
2031 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && 1996 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS)
2032 re_string_cur_idx (input) + 1 != re_string_length (input)) 1997 && re_string_cur_idx (input) + 1 != re_string_length (input))
2033 { 1998 {
2034 re_token_t next; 1999 re_token_t next;
2035 re_string_skip_bytes (input, 1); 2000 re_string_skip_bytes (input, 1);
@@ -2051,7 +2016,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2051 We must not use this function out of bracket expressions. */ 2016 We must not use this function out of bracket expressions. */
2052 2017
2053static int 2018static int
2054internal_function
2055peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 2019peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2056{ 2020{
2057 unsigned char c; 2021 unsigned char c;
@@ -2064,8 +2028,8 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2064 token->opr.c = c; 2028 token->opr.c = c;
2065 2029
2066#ifdef RE_ENABLE_I18N 2030#ifdef RE_ENABLE_I18N
2067 if (input->mb_cur_max > 1 && 2031 if (input->mb_cur_max > 1
2068 !re_string_first_byte (input, re_string_cur_idx (input))) 2032 && !re_string_first_byte (input, re_string_cur_idx (input)))
2069 { 2033 {
2070 token->type = CHARACTER; 2034 token->type = CHARACTER;
2071 return 1; 2035 return 1;
@@ -2098,16 +2062,18 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2098 case '.': 2062 case '.':
2099 token->type = OP_OPEN_COLL_ELEM; 2063 token->type = OP_OPEN_COLL_ELEM;
2100 break; 2064 break;
2065
2101 case '=': 2066 case '=':
2102 token->type = OP_OPEN_EQUIV_CLASS; 2067 token->type = OP_OPEN_EQUIV_CLASS;
2103 break; 2068 break;
2069
2104 case ':': 2070 case ':':
2105 if (syntax & RE_CHAR_CLASSES) 2071 if (syntax & RE_CHAR_CLASSES)
2106 { 2072 {
2107 token->type = OP_OPEN_CHAR_CLASS; 2073 token->type = OP_OPEN_CHAR_CLASS;
2108 break; 2074 break;
2109 } 2075 }
2110 /* else fall through. */ 2076 FALLTHROUGH;
2111 default: 2077 default:
2112 token->type = CHARACTER; 2078 token->type = CHARACTER;
2113 token->opr.c = c; 2079 token->opr.c = c;
@@ -2157,14 +2123,14 @@ parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2157 dfa->syntax = syntax; 2123 dfa->syntax = syntax;
2158 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2124 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
2159 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err); 2125 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2160 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2126 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2161 return NULL; 2127 return NULL;
2162 eor = create_tree (dfa, NULL, NULL, END_OF_RE); 2128 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2163 if (tree != NULL) 2129 if (tree != NULL)
2164 root = create_tree (dfa, tree, eor, CONCAT); 2130 root = create_tree (dfa, tree, eor, CONCAT);
2165 else 2131 else
2166 root = eor; 2132 root = eor;
2167 if (BE (eor == NULL || root == NULL, 0)) 2133 if (__glibc_unlikely (eor == NULL || root == NULL))
2168 { 2134 {
2169 *err = REG_ESPACE; 2135 *err = REG_ESPACE;
2170 return NULL; 2136 return NULL;
@@ -2187,8 +2153,9 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2187{ 2153{
2188 re_dfa_t *dfa = preg->buffer; 2154 re_dfa_t *dfa = preg->buffer;
2189 bin_tree_t *tree, *branch = NULL; 2155 bin_tree_t *tree, *branch = NULL;
2156 bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
2190 tree = parse_branch (regexp, preg, token, syntax, nest, err); 2157 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2191 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2158 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2192 return NULL; 2159 return NULL;
2193 2160
2194 while (token->type == OP_ALT) 2161 while (token->type == OP_ALT)
@@ -2197,14 +2164,21 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2197 if (token->type != OP_ALT && token->type != END_OF_RE 2164 if (token->type != OP_ALT && token->type != END_OF_RE
2198 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2165 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2199 { 2166 {
2167 bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
2168 dfa->completed_bkref_map = initial_bkref_map;
2200 branch = parse_branch (regexp, preg, token, syntax, nest, err); 2169 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2201 if (BE (*err != REG_NOERROR && branch == NULL, 0)) 2170 if (__glibc_unlikely (*err != REG_NOERROR && branch == NULL))
2202 return NULL; 2171 {
2172 if (tree != NULL)
2173 postorder (tree, free_tree, NULL);
2174 return NULL;
2175 }
2176 dfa->completed_bkref_map |= accumulated_bkref_map;
2203 } 2177 }
2204 else 2178 else
2205 branch = NULL; 2179 branch = NULL;
2206 tree = create_tree (dfa, tree, branch, OP_ALT); 2180 tree = create_tree (dfa, tree, branch, OP_ALT);
2207 if (BE (tree == NULL, 0)) 2181 if (__glibc_unlikely (tree == NULL))
2208 { 2182 {
2209 *err = REG_ESPACE; 2183 *err = REG_ESPACE;
2210 return NULL; 2184 return NULL;
@@ -2229,14 +2203,14 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2229 bin_tree_t *tree, *expr; 2203 bin_tree_t *tree, *expr;
2230 re_dfa_t *dfa = preg->buffer; 2204 re_dfa_t *dfa = preg->buffer;
2231 tree = parse_expression (regexp, preg, token, syntax, nest, err); 2205 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2232 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2206 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2233 return NULL; 2207 return NULL;
2234 2208
2235 while (token->type != OP_ALT && token->type != END_OF_RE 2209 while (token->type != OP_ALT && token->type != END_OF_RE
2236 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2210 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2237 { 2211 {
2238 expr = parse_expression (regexp, preg, token, syntax, nest, err); 2212 expr = parse_expression (regexp, preg, token, syntax, nest, err);
2239 if (BE (*err != REG_NOERROR && expr == NULL, 0)) 2213 if (__glibc_unlikely (*err != REG_NOERROR && expr == NULL))
2240 { 2214 {
2241 if (tree != NULL) 2215 if (tree != NULL)
2242 postorder (tree, free_tree, NULL); 2216 postorder (tree, free_tree, NULL);
@@ -2277,7 +2251,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2277 { 2251 {
2278 case CHARACTER: 2252 case CHARACTER:
2279 tree = create_token_tree (dfa, NULL, NULL, token); 2253 tree = create_token_tree (dfa, NULL, NULL, token);
2280 if (BE (tree == NULL, 0)) 2254 if (__glibc_unlikely (tree == NULL))
2281 { 2255 {
2282 *err = REG_ESPACE; 2256 *err = REG_ESPACE;
2283 return NULL; 2257 return NULL;
@@ -2292,7 +2266,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2292 fetch_token (token, regexp, syntax); 2266 fetch_token (token, regexp, syntax);
2293 mbc_remain = create_token_tree (dfa, NULL, NULL, token); 2267 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2294 tree = create_tree (dfa, tree, mbc_remain, CONCAT); 2268 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2295 if (BE (mbc_remain == NULL || tree == NULL, 0)) 2269 if (__glibc_unlikely (mbc_remain == NULL || tree == NULL))
2296 { 2270 {
2297 *err = REG_ESPACE; 2271 *err = REG_ESPACE;
2298 return NULL; 2272 return NULL;
@@ -2301,25 +2275,28 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2301 } 2275 }
2302#endif 2276#endif
2303 break; 2277 break;
2278
2304 case OP_OPEN_SUBEXP: 2279 case OP_OPEN_SUBEXP:
2305 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); 2280 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2306 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2281 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2307 return NULL; 2282 return NULL;
2308 break; 2283 break;
2284
2309 case OP_OPEN_BRACKET: 2285 case OP_OPEN_BRACKET:
2310 tree = parse_bracket_exp (regexp, dfa, token, syntax, err); 2286 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2311 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2287 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2312 return NULL; 2288 return NULL;
2313 break; 2289 break;
2290
2314 case OP_BACK_REF: 2291 case OP_BACK_REF:
2315 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) 2292 if (!__glibc_likely (dfa->completed_bkref_map & (1 << token->opr.idx)))
2316 { 2293 {
2317 *err = REG_ESUBREG; 2294 *err = REG_ESUBREG;
2318 return NULL; 2295 return NULL;
2319 } 2296 }
2320 dfa->used_bkref_map |= 1 << token->opr.idx; 2297 dfa->used_bkref_map |= 1 << token->opr.idx;
2321 tree = create_token_tree (dfa, NULL, NULL, token); 2298 tree = create_token_tree (dfa, NULL, NULL, token);
2322 if (BE (tree == NULL, 0)) 2299 if (__glibc_unlikely (tree == NULL))
2323 { 2300 {
2324 *err = REG_ESPACE; 2301 *err = REG_ESPACE;
2325 return NULL; 2302 return NULL;
@@ -2327,13 +2304,14 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2327 ++dfa->nbackref; 2304 ++dfa->nbackref;
2328 dfa->has_mb_node = 1; 2305 dfa->has_mb_node = 1;
2329 break; 2306 break;
2307
2330 case OP_OPEN_DUP_NUM: 2308 case OP_OPEN_DUP_NUM:
2331 if (syntax & RE_CONTEXT_INVALID_DUP) 2309 if (syntax & RE_CONTEXT_INVALID_DUP)
2332 { 2310 {
2333 *err = REG_BADRPT; 2311 *err = REG_BADRPT;
2334 return NULL; 2312 return NULL;
2335 } 2313 }
2336 /* FALLTHROUGH */ 2314 FALLTHROUGH;
2337 case OP_DUP_ASTERISK: 2315 case OP_DUP_ASTERISK:
2338 case OP_DUP_PLUS: 2316 case OP_DUP_PLUS:
2339 case OP_DUP_QUESTION: 2317 case OP_DUP_QUESTION:
@@ -2347,15 +2325,15 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2347 fetch_token (token, regexp, syntax); 2325 fetch_token (token, regexp, syntax);
2348 return parse_expression (regexp, preg, token, syntax, nest, err); 2326 return parse_expression (regexp, preg, token, syntax, nest, err);
2349 } 2327 }
2350 /* else fall through */ 2328 FALLTHROUGH;
2351 case OP_CLOSE_SUBEXP: 2329 case OP_CLOSE_SUBEXP:
2352 if ((token->type == OP_CLOSE_SUBEXP) && 2330 if ((token->type == OP_CLOSE_SUBEXP)
2353 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) 2331 && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2354 { 2332 {
2355 *err = REG_ERPAREN; 2333 *err = REG_ERPAREN;
2356 return NULL; 2334 return NULL;
2357 } 2335 }
2358 /* else fall through */ 2336 FALLTHROUGH;
2359 case OP_CLOSE_DUP_NUM: 2337 case OP_CLOSE_DUP_NUM:
2360 /* We treat it as a normal character. */ 2338 /* We treat it as a normal character. */
2361 2339
@@ -2364,12 +2342,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2364 /* mb_partial and word_char bits should be initialized already 2342 /* mb_partial and word_char bits should be initialized already
2365 by peek_token. */ 2343 by peek_token. */
2366 tree = create_token_tree (dfa, NULL, NULL, token); 2344 tree = create_token_tree (dfa, NULL, NULL, token);
2367 if (BE (tree == NULL, 0)) 2345 if (__glibc_unlikely (tree == NULL))
2368 { 2346 {
2369 *err = REG_ESPACE; 2347 *err = REG_ESPACE;
2370 return NULL; 2348 return NULL;
2371 } 2349 }
2372 break; 2350 break;
2351
2373 case ANCHOR: 2352 case ANCHOR:
2374 if ((token->opr.ctx_type 2353 if ((token->opr.ctx_type
2375 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) 2354 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
@@ -2393,7 +2372,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2393 } 2372 }
2394 tree_last = create_token_tree (dfa, NULL, NULL, token); 2373 tree_last = create_token_tree (dfa, NULL, NULL, token);
2395 tree = create_tree (dfa, tree_first, tree_last, OP_ALT); 2374 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2396 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) 2375 if (__glibc_unlikely (tree_first == NULL || tree_last == NULL
2376 || tree == NULL))
2397 { 2377 {
2398 *err = REG_ESPACE; 2378 *err = REG_ESPACE;
2399 return NULL; 2379 return NULL;
@@ -2402,7 +2382,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2402 else 2382 else
2403 { 2383 {
2404 tree = create_token_tree (dfa, NULL, NULL, token); 2384 tree = create_token_tree (dfa, NULL, NULL, token);
2405 if (BE (tree == NULL, 0)) 2385 if (__glibc_unlikely (tree == NULL))
2406 { 2386 {
2407 *err = REG_ESPACE; 2387 *err = REG_ESPACE;
2408 return NULL; 2388 return NULL;
@@ -2414,9 +2394,10 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2414 it must not be "<ANCHOR(^)><REPEAT(*)>". */ 2394 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2415 fetch_token (token, regexp, syntax); 2395 fetch_token (token, regexp, syntax);
2416 return tree; 2396 return tree;
2397
2417 case OP_PERIOD: 2398 case OP_PERIOD:
2418 tree = create_token_tree (dfa, NULL, NULL, token); 2399 tree = create_token_tree (dfa, NULL, NULL, token);
2419 if (BE (tree == NULL, 0)) 2400 if (__glibc_unlikely (tree == NULL))
2420 { 2401 {
2421 *err = REG_ESPACE; 2402 *err = REG_ESPACE;
2422 return NULL; 2403 return NULL;
@@ -2424,35 +2405,38 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2424 if (dfa->mb_cur_max > 1) 2405 if (dfa->mb_cur_max > 1)
2425 dfa->has_mb_node = 1; 2406 dfa->has_mb_node = 1;
2426 break; 2407 break;
2408
2427 case OP_WORD: 2409 case OP_WORD:
2428 case OP_NOTWORD: 2410 case OP_NOTWORD:
2429 tree = build_charclass_op (dfa, regexp->trans, 2411 tree = build_charclass_op (dfa, regexp->trans,
2430 "alnum", 2412 "alnum",
2431 "_", 2413 "_",
2432 token->type == OP_NOTWORD, err); 2414 token->type == OP_NOTWORD, err);
2433 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2415 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2434 return NULL; 2416 return NULL;
2435 break; 2417 break;
2418
2436 case OP_SPACE: 2419 case OP_SPACE:
2437 case OP_NOTSPACE: 2420 case OP_NOTSPACE:
2438 tree = build_charclass_op (dfa, regexp->trans, 2421 tree = build_charclass_op (dfa, regexp->trans,
2439 "space", 2422 "space",
2440 "", 2423 "",
2441 token->type == OP_NOTSPACE, err); 2424 token->type == OP_NOTSPACE, err);
2442 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2425 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL))
2443 return NULL; 2426 return NULL;
2444 break; 2427 break;
2428
2445 case OP_ALT: 2429 case OP_ALT:
2446 case END_OF_RE: 2430 case END_OF_RE:
2447 return NULL; 2431 return NULL;
2432
2448 case BACK_SLASH: 2433 case BACK_SLASH:
2449 *err = REG_EESCAPE; 2434 *err = REG_EESCAPE;
2450 return NULL; 2435 return NULL;
2436
2451 default: 2437 default:
2452 /* Must not happen? */ 2438 /* Must not happen? */
2453#ifdef DEBUG 2439 DEBUG_ASSERT (false);
2454 assert (0);
2455#endif
2456 return NULL; 2440 return NULL;
2457 } 2441 }
2458 fetch_token (token, regexp, syntax); 2442 fetch_token (token, regexp, syntax);
@@ -2460,14 +2444,22 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2460 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS 2444 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2461 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) 2445 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2462 { 2446 {
2463 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); 2447 bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
2464 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2448 syntax, err);
2465 return NULL; 2449 if (__glibc_unlikely (*err != REG_NOERROR && dup_tree == NULL))
2450 {
2451 if (tree != NULL)
2452 postorder (tree, free_tree, NULL);
2453 return NULL;
2454 }
2455 tree = dup_tree;
2466 /* In BRE consecutive duplications are not allowed. */ 2456 /* In BRE consecutive duplications are not allowed. */
2467 if ((syntax & RE_CONTEXT_INVALID_DUP) 2457 if ((syntax & RE_CONTEXT_INVALID_DUP)
2468 && (token->type == OP_DUP_ASTERISK 2458 && (token->type == OP_DUP_ASTERISK
2469 || token->type == OP_OPEN_DUP_NUM)) 2459 || token->type == OP_OPEN_DUP_NUM))
2470 { 2460 {
2461 if (tree != NULL)
2462 postorder (tree, free_tree, NULL);
2471 *err = REG_BADRPT; 2463 *err = REG_BADRPT;
2472 return NULL; 2464 return NULL;
2473 } 2465 }
@@ -2500,13 +2492,14 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2500 else 2492 else
2501 { 2493 {
2502 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); 2494 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2503 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) 2495 if (__glibc_unlikely (*err == REG_NOERROR
2496 && token->type != OP_CLOSE_SUBEXP))
2504 { 2497 {
2505 if (tree != NULL) 2498 if (tree != NULL)
2506 postorder (tree, free_tree, NULL); 2499 postorder (tree, free_tree, NULL);
2507 *err = REG_EPAREN; 2500 *err = REG_EPAREN;
2508 } 2501 }
2509 if (BE (*err != REG_NOERROR, 0)) 2502 if (__glibc_unlikely (*err != REG_NOERROR))
2510 return NULL; 2503 return NULL;
2511 } 2504 }
2512 2505
@@ -2514,7 +2507,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2514 dfa->completed_bkref_map |= 1 << cur_nsub; 2507 dfa->completed_bkref_map |= 1 << cur_nsub;
2515 2508
2516 tree = create_tree (dfa, tree, NULL, SUBEXP); 2509 tree = create_tree (dfa, tree, NULL, SUBEXP);
2517 if (BE (tree == NULL, 0)) 2510 if (__glibc_unlikely (tree == NULL))
2518 { 2511 {
2519 *err = REG_ESPACE; 2512 *err = REG_ESPACE;
2520 return NULL; 2513 return NULL;
@@ -2537,7 +2530,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2537 { 2530 {
2538 end = 0; 2531 end = 0;
2539 start = fetch_number (regexp, token, syntax); 2532 start = fetch_number (regexp, token, syntax);
2540 if (start == REG_MISSING) 2533 if (start == -1)
2541 { 2534 {
2542 if (token->type == CHARACTER && token->opr.c == ',') 2535 if (token->type == CHARACTER && token->opr.c == ',')
2543 start = 0; /* We treat "{,m}" as "{0,m}". */ 2536 start = 0; /* We treat "{,m}" as "{0,m}". */
@@ -2547,17 +2540,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2547 return NULL; 2540 return NULL;
2548 } 2541 }
2549 } 2542 }
2550 if (BE (start != REG_ERROR, 1)) 2543 if (__glibc_likely (start != -2))
2551 { 2544 {
2552 /* We treat "{n}" as "{n,n}". */ 2545 /* We treat "{n}" as "{n,n}". */
2553 end = ((token->type == OP_CLOSE_DUP_NUM) ? start 2546 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2554 : ((token->type == CHARACTER && token->opr.c == ',') 2547 : ((token->type == CHARACTER && token->opr.c == ',')
2555 ? fetch_number (regexp, token, syntax) : REG_ERROR)); 2548 ? fetch_number (regexp, token, syntax) : -2));
2556 } 2549 }
2557 if (BE (start == REG_ERROR || end == REG_ERROR, 0)) 2550 if (__glibc_unlikely (start == -2 || end == -2))
2558 { 2551 {
2559 /* Invalid sequence. */ 2552 /* Invalid sequence. */
2560 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) 2553 if (__glibc_unlikely (!(syntax & RE_INVALID_INTERVAL_ORD)))
2561 { 2554 {
2562 if (token->type == END_OF_RE) 2555 if (token->type == END_OF_RE)
2563 *err = REG_EBRACE; 2556 *err = REG_EBRACE;
@@ -2576,15 +2569,15 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2576 return elem; 2569 return elem;
2577 } 2570 }
2578 2571
2579 if (BE ((end != REG_MISSING && start > end) 2572 if (__glibc_unlikely ((end != -1 && start > end)
2580 || token->type != OP_CLOSE_DUP_NUM, 0)) 2573 || token->type != OP_CLOSE_DUP_NUM))
2581 { 2574 {
2582 /* First number greater than second. */ 2575 /* First number greater than second. */
2583 *err = REG_BADBR; 2576 *err = REG_BADBR;
2584 return NULL; 2577 return NULL;
2585 } 2578 }
2586 2579
2587 if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0)) 2580 if (__glibc_unlikely (RE_DUP_MAX < (end == -1 ? start : end)))
2588 { 2581 {
2589 *err = REG_ESIZE; 2582 *err = REG_ESIZE;
2590 return NULL; 2583 return NULL;
@@ -2593,28 +2586,28 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2593 else 2586 else
2594 { 2587 {
2595 start = (token->type == OP_DUP_PLUS) ? 1 : 0; 2588 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2596 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING; 2589 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2597 } 2590 }
2598 2591
2599 fetch_token (token, regexp, syntax); 2592 fetch_token (token, regexp, syntax);
2600 2593
2601 if (BE (elem == NULL, 0)) 2594 if (__glibc_unlikely (elem == NULL))
2602 return NULL; 2595 return NULL;
2603 if (BE (start == 0 && end == 0, 0)) 2596 if (__glibc_unlikely (start == 0 && end == 0))
2604 { 2597 {
2605 postorder (elem, free_tree, NULL); 2598 postorder (elem, free_tree, NULL);
2606 return NULL; 2599 return NULL;
2607 } 2600 }
2608 2601
2609 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ 2602 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2610 if (BE (start > 0, 0)) 2603 if (__glibc_unlikely (start > 0))
2611 { 2604 {
2612 tree = elem; 2605 tree = elem;
2613 for (i = 2; i <= start; ++i) 2606 for (i = 2; i <= start; ++i)
2614 { 2607 {
2615 elem = duplicate_tree (elem, dfa); 2608 elem = duplicate_tree (elem, dfa);
2616 tree = create_tree (dfa, tree, elem, CONCAT); 2609 tree = create_tree (dfa, tree, elem, CONCAT);
2617 if (BE (elem == NULL || tree == NULL, 0)) 2610 if (__glibc_unlikely (elem == NULL || tree == NULL))
2618 goto parse_dup_op_espace; 2611 goto parse_dup_op_espace;
2619 } 2612 }
2620 2613
@@ -2623,6 +2616,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2623 2616
2624 /* Duplicate ELEM before it is marked optional. */ 2617 /* Duplicate ELEM before it is marked optional. */
2625 elem = duplicate_tree (elem, dfa); 2618 elem = duplicate_tree (elem, dfa);
2619 if (__glibc_unlikely (elem == NULL))
2620 goto parse_dup_op_espace;
2626 old_tree = tree; 2621 old_tree = tree;
2627 } 2622 }
2628 else 2623 else
@@ -2635,27 +2630,23 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2635 } 2630 }
2636 2631
2637 tree = create_tree (dfa, elem, NULL, 2632 tree = create_tree (dfa, elem, NULL,
2638 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); 2633 (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2639 if (BE (tree == NULL, 0)) 2634 if (__glibc_unlikely (tree == NULL))
2640 goto parse_dup_op_espace; 2635 goto parse_dup_op_espace;
2641 2636
2642/* From gnulib's "intprops.h": 2637 /* This loop is actually executed only when end != -1,
2643 True if the arithmetic type T is signed. */
2644#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
2645
2646 /* This loop is actually executed only when end != REG_MISSING,
2647 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have 2638 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2648 already created the start+1-th copy. */ 2639 already created the start+1-th copy. */
2649 if (TYPE_SIGNED (Idx) || end != REG_MISSING) 2640 if (TYPE_SIGNED (Idx) || end != -1)
2650 for (i = start + 2; i <= end; ++i) 2641 for (i = start + 2; i <= end; ++i)
2651 { 2642 {
2652 elem = duplicate_tree (elem, dfa); 2643 elem = duplicate_tree (elem, dfa);
2653 tree = create_tree (dfa, tree, elem, CONCAT); 2644 tree = create_tree (dfa, tree, elem, CONCAT);
2654 if (BE (elem == NULL || tree == NULL, 0)) 2645 if (__glibc_unlikely (elem == NULL || tree == NULL))
2655 goto parse_dup_op_espace; 2646 goto parse_dup_op_espace;
2656 2647
2657 tree = create_tree (dfa, tree, NULL, OP_ALT); 2648 tree = create_tree (dfa, tree, NULL, OP_ALT);
2658 if (BE (tree == NULL, 0)) 2649 if (__glibc_unlikely (tree == NULL))
2659 goto parse_dup_op_espace; 2650 goto parse_dup_op_espace;
2660 } 2651 }
2661 2652
@@ -2674,6 +2665,18 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2674#define BRACKET_NAME_BUF_SIZE 32 2665#define BRACKET_NAME_BUF_SIZE 32
2675 2666
2676#ifndef _LIBC 2667#ifndef _LIBC
2668
2669# ifdef RE_ENABLE_I18N
2670/* Convert the byte B to the corresponding wide character. In a
2671 unibyte locale, treat B as itself. In a multibyte locale, return
2672 WEOF if B is an encoding error. */
2673static wint_t
2674parse_byte (unsigned char b, re_charset_t *mbcset)
2675{
2676 return mbcset == NULL ? b : __btowc (b);
2677}
2678# endif
2679
2677 /* Local function for parse_bracket_exp only used in case of NOT _LIBC. 2680 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2678 Build the range expression which starts from START_ELEM, and ends 2681 Build the range expression which starts from START_ELEM, and ends
2679 at END_ELEM. The result are written to MBCSET and SBCSET. 2682 at END_ELEM. The result are written to MBCSET and SBCSET.
@@ -2682,7 +2685,6 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2682 update it. */ 2685 update it. */
2683 2686
2684static reg_errcode_t 2687static reg_errcode_t
2685internal_function
2686# ifdef RE_ENABLE_I18N 2688# ifdef RE_ENABLE_I18N
2687build_range_exp (const reg_syntax_t syntax, 2689build_range_exp (const reg_syntax_t syntax,
2688 bitset_t sbcset, 2690 bitset_t sbcset,
@@ -2699,17 +2701,18 @@ build_range_exp (const reg_syntax_t syntax,
2699{ 2701{
2700 unsigned int start_ch, end_ch; 2702 unsigned int start_ch, end_ch;
2701 /* Equivalence Classes and Character Classes can't be a range start/end. */ 2703 /* Equivalence Classes and Character Classes can't be a range start/end. */
2702 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2704 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2703 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2705 || start_elem->type == CHAR_CLASS
2704 0)) 2706 || end_elem->type == EQUIV_CLASS
2707 || end_elem->type == CHAR_CLASS))
2705 return REG_ERANGE; 2708 return REG_ERANGE;
2706 2709
2707 /* We can handle no multi character collating elements without libc 2710 /* We can handle no multi character collating elements without libc
2708 support. */ 2711 support. */
2709 if (BE ((start_elem->type == COLL_SYM 2712 if (__glibc_unlikely ((start_elem->type == COLL_SYM
2710 && strlen ((char *) start_elem->opr.name) > 1) 2713 && strlen ((char *) start_elem->opr.name) > 1)
2711 || (end_elem->type == COLL_SYM 2714 || (end_elem->type == COLL_SYM
2712 && strlen ((char *) end_elem->opr.name) > 1), 0)) 2715 && strlen ((char *) end_elem->opr.name) > 1)))
2713 return REG_ECOLLATE; 2716 return REG_ECOLLATE;
2714 2717
2715# ifdef RE_ENABLE_I18N 2718# ifdef RE_ENABLE_I18N
@@ -2725,12 +2728,13 @@ build_range_exp (const reg_syntax_t syntax,
2725 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2728 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2726 : 0)); 2729 : 0));
2727 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) 2730 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2728 ? __btowc (start_ch) : start_elem->opr.wch); 2731 ? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
2729 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) 2732 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2730 ? __btowc (end_ch) : end_elem->opr.wch); 2733 ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
2731 if (start_wc == WEOF || end_wc == WEOF) 2734 if (start_wc == WEOF || end_wc == WEOF)
2732 return REG_ECOLLATE; 2735 return REG_ECOLLATE;
2733 else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) 2736 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2737 && start_wc > end_wc))
2734 return REG_ERANGE; 2738 return REG_ERANGE;
2735 2739
2736 /* Got valid collation sequence values, add them as a new entry. 2740 /* Got valid collation sequence values, add them as a new entry.
@@ -2741,7 +2745,7 @@ build_range_exp (const reg_syntax_t syntax,
2741 if (mbcset) 2745 if (mbcset)
2742 { 2746 {
2743 /* Check the space of the arrays. */ 2747 /* Check the space of the arrays. */
2744 if (BE (*range_alloc == mbcset->nranges, 0)) 2748 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2745 { 2749 {
2746 /* There is not enough space, need realloc. */ 2750 /* There is not enough space, need realloc. */
2747 wchar_t *new_array_start, *new_array_end; 2751 wchar_t *new_array_start, *new_array_end;
@@ -2756,8 +2760,13 @@ build_range_exp (const reg_syntax_t syntax,
2756 new_array_end = re_realloc (mbcset->range_ends, wchar_t, 2760 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2757 new_nranges); 2761 new_nranges);
2758 2762
2759 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2763 if (__glibc_unlikely (new_array_start == NULL
2760 return REG_ESPACE; 2764 || new_array_end == NULL))
2765 {
2766 re_free (new_array_start);
2767 re_free (new_array_end);
2768 return REG_ESPACE;
2769 }
2761 2770
2762 mbcset->range_starts = new_array_start; 2771 mbcset->range_starts = new_array_start;
2763 mbcset->range_ends = new_array_end; 2772 mbcset->range_ends = new_array_end;
@@ -2804,7 +2813,6 @@ build_range_exp (const reg_syntax_t syntax,
2804 pointer argument since we may update it. */ 2813 pointer argument since we may update it. */
2805 2814
2806static reg_errcode_t 2815static reg_errcode_t
2807internal_function
2808# ifdef RE_ENABLE_I18N 2816# ifdef RE_ENABLE_I18N
2809build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2817build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2810 Idx *coll_sym_alloc, const unsigned char *name) 2818 Idx *coll_sym_alloc, const unsigned char *name)
@@ -2813,7 +2821,7 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2813# endif /* not RE_ENABLE_I18N */ 2821# endif /* not RE_ENABLE_I18N */
2814{ 2822{
2815 size_t name_len = strlen ((const char *) name); 2823 size_t name_len = strlen ((const char *) name);
2816 if (BE (name_len != 1, 0)) 2824 if (__glibc_unlikely (name_len != 1))
2817 return REG_ECOLLATE; 2825 return REG_ECOLLATE;
2818 else 2826 else
2819 { 2827 {
@@ -2948,18 +2956,21 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2948 2956
2949 /* Equivalence Classes and Character Classes can't be a range 2957 /* Equivalence Classes and Character Classes can't be a range
2950 start/end. */ 2958 start/end. */
2951 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2959 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2952 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2960 || start_elem->type == CHAR_CLASS
2953 0)) 2961 || end_elem->type == EQUIV_CLASS
2962 || end_elem->type == CHAR_CLASS))
2954 return REG_ERANGE; 2963 return REG_ERANGE;
2955 2964
2956 /* FIXME: Implement rational ranges here, too. */ 2965 /* FIXME: Implement rational ranges here, too. */
2957 start_collseq = lookup_collation_sequence_value (start_elem); 2966 start_collseq = lookup_collation_sequence_value (start_elem);
2958 end_collseq = lookup_collation_sequence_value (end_elem); 2967 end_collseq = lookup_collation_sequence_value (end_elem);
2959 /* Check start/end collation sequence values. */ 2968 /* Check start/end collation sequence values. */
2960 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) 2969 if (__glibc_unlikely (start_collseq == UINT_MAX
2970 || end_collseq == UINT_MAX))
2961 return REG_ECOLLATE; 2971 return REG_ECOLLATE;
2962 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) 2972 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2973 && start_collseq > end_collseq))
2963 return REG_ERANGE; 2974 return REG_ERANGE;
2964 2975
2965 /* Got valid collation sequence values, add them as a new entry. 2976 /* Got valid collation sequence values, add them as a new entry.
@@ -2969,7 +2980,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2969 if (nrules > 0 || dfa->mb_cur_max > 1) 2980 if (nrules > 0 || dfa->mb_cur_max > 1)
2970 { 2981 {
2971 /* Check the space of the arrays. */ 2982 /* Check the space of the arrays. */
2972 if (BE (*range_alloc == mbcset->nranges, 0)) 2983 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2973 { 2984 {
2974 /* There is not enough space, need realloc. */ 2985 /* There is not enough space, need realloc. */
2975 uint32_t *new_array_start; 2986 uint32_t *new_array_start;
@@ -2983,7 +2994,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2983 new_array_end = re_realloc (mbcset->range_ends, uint32_t, 2994 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2984 new_nranges); 2995 new_nranges);
2985 2996
2986 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2997 if (__glibc_unlikely (new_array_start == NULL
2998 || new_array_end == NULL))
2987 return REG_ESPACE; 2999 return REG_ESPACE;
2988 3000
2989 mbcset->range_starts = new_array_start; 3001 mbcset->range_starts = new_array_start;
@@ -3047,7 +3059,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3047 3059
3048 /* Got valid collation sequence, add it as a new entry. */ 3060 /* Got valid collation sequence, add it as a new entry. */
3049 /* Check the space of the arrays. */ 3061 /* Check the space of the arrays. */
3050 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) 3062 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
3051 { 3063 {
3052 /* Not enough, realloc it. */ 3064 /* Not enough, realloc it. */
3053 /* +1 in case of mbcset->ncoll_syms is 0. */ 3065 /* +1 in case of mbcset->ncoll_syms is 0. */
@@ -3056,7 +3068,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3056 if *alloc == 0. */ 3068 if *alloc == 0. */
3057 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, 3069 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3058 new_coll_sym_alloc); 3070 new_coll_sym_alloc);
3059 if (BE (new_coll_syms == NULL, 0)) 3071 if (__glibc_unlikely (new_coll_syms == NULL))
3060 return REG_ESPACE; 3072 return REG_ESPACE;
3061 mbcset->coll_syms = new_coll_syms; 3073 mbcset->coll_syms = new_coll_syms;
3062 *coll_sym_alloc = new_coll_sym_alloc; 3074 *coll_sym_alloc = new_coll_sym_alloc;
@@ -3066,7 +3078,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3066 } 3078 }
3067 else 3079 else
3068 { 3080 {
3069 if (BE (name_len != 1, 0)) 3081 if (__glibc_unlikely (name_len != 1))
3070 return REG_ECOLLATE; 3082 return REG_ECOLLATE;
3071 else 3083 else
3072 { 3084 {
@@ -3110,9 +3122,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3110 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3122 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3111#endif /* RE_ENABLE_I18N */ 3123#endif /* RE_ENABLE_I18N */
3112#ifdef RE_ENABLE_I18N 3124#ifdef RE_ENABLE_I18N
3113 if (BE (sbcset == NULL || mbcset == NULL, 0)) 3125 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
3114#else 3126#else
3115 if (BE (sbcset == NULL, 0)) 3127 if (__glibc_unlikely (sbcset == NULL))
3116#endif /* RE_ENABLE_I18N */ 3128#endif /* RE_ENABLE_I18N */
3117 { 3129 {
3118 re_free (sbcset); 3130 re_free (sbcset);
@@ -3124,7 +3136,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3124 } 3136 }
3125 3137
3126 token_len = peek_token_bracket (token, regexp, syntax); 3138 token_len = peek_token_bracket (token, regexp, syntax);
3127 if (BE (token->type == END_OF_RE, 0)) 3139 if (__glibc_unlikely (token->type == END_OF_RE))
3128 { 3140 {
3129 *err = REG_BADPAT; 3141 *err = REG_BADPAT;
3130 goto parse_bracket_exp_free_return; 3142 goto parse_bracket_exp_free_return;
@@ -3139,7 +3151,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3139 bitset_set (sbcset, '\n'); 3151 bitset_set (sbcset, '\n');
3140 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3152 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3141 token_len = peek_token_bracket (token, regexp, syntax); 3153 token_len = peek_token_bracket (token, regexp, syntax);
3142 if (BE (token->type == END_OF_RE, 0)) 3154 if (__glibc_unlikely (token->type == END_OF_RE))
3143 { 3155 {
3144 *err = REG_BADPAT; 3156 *err = REG_BADPAT;
3145 goto parse_bracket_exp_free_return; 3157 goto parse_bracket_exp_free_return;
@@ -3161,9 +3173,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3161 re_token_t token2; 3173 re_token_t token2;
3162 3174
3163 start_elem.opr.name = start_name_buf; 3175 start_elem.opr.name = start_name_buf;
3176 start_elem.type = COLL_SYM;
3164 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, 3177 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3165 syntax, first_round); 3178 syntax, first_round);
3166 if (BE (ret != REG_NOERROR, 0)) 3179 if (__glibc_unlikely (ret != REG_NOERROR))
3167 { 3180 {
3168 *err = ret; 3181 *err = ret;
3169 goto parse_bracket_exp_free_return; 3182 goto parse_bracket_exp_free_return;
@@ -3176,7 +3189,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3176 /* Do not check for ranges if we know they are not allowed. */ 3189 /* Do not check for ranges if we know they are not allowed. */
3177 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) 3190 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3178 { 3191 {
3179 if (BE (token->type == END_OF_RE, 0)) 3192 if (__glibc_unlikely (token->type == END_OF_RE))
3180 { 3193 {
3181 *err = REG_EBRACK; 3194 *err = REG_EBRACK;
3182 goto parse_bracket_exp_free_return; 3195 goto parse_bracket_exp_free_return;
@@ -3185,7 +3198,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3185 { 3198 {
3186 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ 3199 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3187 token_len2 = peek_token_bracket (&token2, regexp, syntax); 3200 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3188 if (BE (token2.type == END_OF_RE, 0)) 3201 if (__glibc_unlikely (token2.type == END_OF_RE))
3189 { 3202 {
3190 *err = REG_EBRACK; 3203 *err = REG_EBRACK;
3191 goto parse_bracket_exp_free_return; 3204 goto parse_bracket_exp_free_return;
@@ -3204,9 +3217,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3204 if (is_range_exp == true) 3217 if (is_range_exp == true)
3205 { 3218 {
3206 end_elem.opr.name = end_name_buf; 3219 end_elem.opr.name = end_name_buf;
3220 end_elem.type = COLL_SYM;
3207 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, 3221 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3208 dfa, syntax, true); 3222 dfa, syntax, true);
3209 if (BE (ret != REG_NOERROR, 0)) 3223 if (__glibc_unlikely (ret != REG_NOERROR))
3210 { 3224 {
3211 *err = ret; 3225 *err = ret;
3212 goto parse_bracket_exp_free_return; 3226 goto parse_bracket_exp_free_return;
@@ -3226,7 +3240,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3226 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem); 3240 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
3227# endif 3241# endif
3228#endif /* RE_ENABLE_I18N */ 3242#endif /* RE_ENABLE_I18N */
3229 if (BE (*err != REG_NOERROR, 0)) 3243 if (__glibc_unlikely (*err != REG_NOERROR))
3230 goto parse_bracket_exp_free_return; 3244 goto parse_bracket_exp_free_return;
3231 } 3245 }
3232 else 3246 else
@@ -3239,7 +3253,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3239#ifdef RE_ENABLE_I18N 3253#ifdef RE_ENABLE_I18N
3240 case MB_CHAR: 3254 case MB_CHAR:
3241 /* Check whether the array has enough space. */ 3255 /* Check whether the array has enough space. */
3242 if (BE (mbchar_alloc == mbcset->nmbchars, 0)) 3256 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
3243 { 3257 {
3244 wchar_t *new_mbchars; 3258 wchar_t *new_mbchars;
3245 /* Not enough, realloc it. */ 3259 /* Not enough, realloc it. */
@@ -3248,7 +3262,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3248 /* Use realloc since array is NULL if *alloc == 0. */ 3262 /* Use realloc since array is NULL if *alloc == 0. */
3249 new_mbchars = re_realloc (mbcset->mbchars, wchar_t, 3263 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3250 mbchar_alloc); 3264 mbchar_alloc);
3251 if (BE (new_mbchars == NULL, 0)) 3265 if (__glibc_unlikely (new_mbchars == NULL))
3252 goto parse_bracket_exp_espace; 3266 goto parse_bracket_exp_espace;
3253 mbcset->mbchars = new_mbchars; 3267 mbcset->mbchars = new_mbchars;
3254 } 3268 }
@@ -3261,7 +3275,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3261 mbcset, &equiv_class_alloc, 3275 mbcset, &equiv_class_alloc,
3262#endif /* RE_ENABLE_I18N */ 3276#endif /* RE_ENABLE_I18N */
3263 start_elem.opr.name); 3277 start_elem.opr.name);
3264 if (BE (*err != REG_NOERROR, 0)) 3278 if (__glibc_unlikely (*err != REG_NOERROR))
3265 goto parse_bracket_exp_free_return; 3279 goto parse_bracket_exp_free_return;
3266 break; 3280 break;
3267 case COLL_SYM: 3281 case COLL_SYM:
@@ -3270,7 +3284,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3270 mbcset, &coll_sym_alloc, 3284 mbcset, &coll_sym_alloc,
3271#endif /* RE_ENABLE_I18N */ 3285#endif /* RE_ENABLE_I18N */
3272 start_elem.opr.name); 3286 start_elem.opr.name);
3273 if (BE (*err != REG_NOERROR, 0)) 3287 if (__glibc_unlikely (*err != REG_NOERROR))
3274 goto parse_bracket_exp_free_return; 3288 goto parse_bracket_exp_free_return;
3275 break; 3289 break;
3276 case CHAR_CLASS: 3290 case CHAR_CLASS:
@@ -3280,15 +3294,15 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3280#endif /* RE_ENABLE_I18N */ 3294#endif /* RE_ENABLE_I18N */
3281 (const char *) start_elem.opr.name, 3295 (const char *) start_elem.opr.name,
3282 syntax); 3296 syntax);
3283 if (BE (*err != REG_NOERROR, 0)) 3297 if (__glibc_unlikely (*err != REG_NOERROR))
3284 goto parse_bracket_exp_free_return; 3298 goto parse_bracket_exp_free_return;
3285 break; 3299 break;
3286 default: 3300 default:
3287 assert (0); 3301 DEBUG_ASSERT (false);
3288 break; 3302 break;
3289 } 3303 }
3290 } 3304 }
3291 if (BE (token->type == END_OF_RE, 0)) 3305 if (__glibc_unlikely (token->type == END_OF_RE))
3292 { 3306 {
3293 *err = REG_EBRACK; 3307 *err = REG_EBRACK;
3294 goto parse_bracket_exp_free_return; 3308 goto parse_bracket_exp_free_return;
@@ -3319,7 +3333,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3319 br_token.type = COMPLEX_BRACKET; 3333 br_token.type = COMPLEX_BRACKET;
3320 br_token.opr.mbcset = mbcset; 3334 br_token.opr.mbcset = mbcset;
3321 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3335 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3322 if (BE (mbc_tree == NULL, 0)) 3336 if (__glibc_unlikely (mbc_tree == NULL))
3323 goto parse_bracket_exp_espace; 3337 goto parse_bracket_exp_espace;
3324 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) 3338 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3325 if (sbcset[sbc_idx]) 3339 if (sbcset[sbc_idx])
@@ -3332,12 +3346,12 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3332 br_token.type = SIMPLE_BRACKET; 3346 br_token.type = SIMPLE_BRACKET;
3333 br_token.opr.sbcset = sbcset; 3347 br_token.opr.sbcset = sbcset;
3334 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3348 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3335 if (BE (work_tree == NULL, 0)) 3349 if (__glibc_unlikely (work_tree == NULL))
3336 goto parse_bracket_exp_espace; 3350 goto parse_bracket_exp_espace;
3337 3351
3338 /* Then join them by ALT node. */ 3352 /* Then join them by ALT node. */
3339 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); 3353 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3340 if (BE (work_tree == NULL, 0)) 3354 if (__glibc_unlikely (work_tree == NULL))
3341 goto parse_bracket_exp_espace; 3355 goto parse_bracket_exp_espace;
3342 } 3356 }
3343 else 3357 else
@@ -3356,7 +3370,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3356 br_token.type = SIMPLE_BRACKET; 3370 br_token.type = SIMPLE_BRACKET;
3357 br_token.opr.sbcset = sbcset; 3371 br_token.opr.sbcset = sbcset;
3358 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3372 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3359 if (BE (work_tree == NULL, 0)) 3373 if (__glibc_unlikely (work_tree == NULL))
3360 goto parse_bracket_exp_espace; 3374 goto parse_bracket_exp_espace;
3361 } 3375 }
3362 return work_tree; 3376 return work_tree;
@@ -3393,7 +3407,7 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3393 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS 3407 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3394 || token->type == OP_OPEN_EQUIV_CLASS) 3408 || token->type == OP_OPEN_EQUIV_CLASS)
3395 return parse_bracket_symbol (elem, regexp, token); 3409 return parse_bracket_symbol (elem, regexp, token);
3396 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) 3410 if (__glibc_unlikely (token->type == OP_CHARSET_RANGE) && !accept_hyphen)
3397 { 3411 {
3398 /* A '-' must only appear as anything but a range indicator before 3412 /* A '-' must only appear as anything but a range indicator before
3399 the closing bracket. Everything else is an error. */ 3413 the closing bracket. Everything else is an error. */
@@ -3478,8 +3492,6 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3478 int32_t idx1, idx2; 3492 int32_t idx1, idx2;
3479 unsigned int ch; 3493 unsigned int ch;
3480 size_t len; 3494 size_t len;
3481 /* This #include defines a local function! */
3482# include <locale/weight.h>
3483 /* Calculate the index for equivalence class. */ 3495 /* Calculate the index for equivalence class. */
3484 cp = name; 3496 cp = name;
3485 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3497 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
@@ -3489,8 +3501,8 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3489 _NL_COLLATE_EXTRAMB); 3501 _NL_COLLATE_EXTRAMB);
3490 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3502 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3491 _NL_COLLATE_INDIRECTMB); 3503 _NL_COLLATE_INDIRECTMB);
3492 idx1 = findidx (&cp, -1); 3504 idx1 = findidx (table, indirect, extra, &cp, -1);
3493 if (BE (idx1 == 0 || *cp != '\0', 0)) 3505 if (__glibc_unlikely (idx1 == 0 || *cp != '\0'))
3494 /* This isn't a valid character. */ 3506 /* This isn't a valid character. */
3495 return REG_ECOLLATE; 3507 return REG_ECOLLATE;
3496 3508
@@ -3500,7 +3512,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3500 { 3512 {
3501 char_buf[0] = ch; 3513 char_buf[0] = ch;
3502 cp = char_buf; 3514 cp = char_buf;
3503 idx2 = findidx (&cp, 1); 3515 idx2 = findidx (table, indirect, extra, &cp, 1);
3504/* 3516/*
3505 idx2 = table[ch]; 3517 idx2 = table[ch];
3506*/ 3518*/
@@ -3509,21 +3521,13 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3509 continue; 3521 continue;
3510 /* Compare only if the length matches and the collation rule 3522 /* Compare only if the length matches and the collation rule
3511 index is the same. */ 3523 index is the same. */
3512 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) 3524 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)
3513 { 3525 && memcmp (weights + (idx1 & 0xffffff) + 1,
3514 int cnt = 0; 3526 weights + (idx2 & 0xffffff) + 1, len) == 0)
3515 3527 bitset_set (sbcset, ch);
3516 while (cnt <= len &&
3517 weights[(idx1 & 0xffffff) + 1 + cnt]
3518 == weights[(idx2 & 0xffffff) + 1 + cnt])
3519 ++cnt;
3520
3521 if (cnt > len)
3522 bitset_set (sbcset, ch);
3523 }
3524 } 3528 }
3525 /* Check whether the array has enough space. */ 3529 /* Check whether the array has enough space. */
3526 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) 3530 if (__glibc_unlikely (*equiv_class_alloc == mbcset->nequiv_classes))
3527 { 3531 {
3528 /* Not enough, realloc it. */ 3532 /* Not enough, realloc it. */
3529 /* +1 in case of mbcset->nequiv_classes is 0. */ 3533 /* +1 in case of mbcset->nequiv_classes is 0. */
@@ -3532,7 +3536,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3532 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, 3536 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3533 int32_t, 3537 int32_t,
3534 new_equiv_class_alloc); 3538 new_equiv_class_alloc);
3535 if (BE (new_equiv_classes == NULL, 0)) 3539 if (__glibc_unlikely (new_equiv_classes == NULL))
3536 return REG_ESPACE; 3540 return REG_ESPACE;
3537 mbcset->equiv_classes = new_equiv_classes; 3541 mbcset->equiv_classes = new_equiv_classes;
3538 *equiv_class_alloc = new_equiv_class_alloc; 3542 *equiv_class_alloc = new_equiv_class_alloc;
@@ -3542,7 +3546,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3542 else 3546 else
3543#endif /* _LIBC */ 3547#endif /* _LIBC */
3544 { 3548 {
3545 if (BE (strlen ((const char *) name) != 1, 0)) 3549 if (__glibc_unlikely (strlen ((const char *) name) != 1))
3546 return REG_ECOLLATE; 3550 return REG_ECOLLATE;
3547 bitset_set (sbcset, *name); 3551 bitset_set (sbcset, *name);
3548 } 3552 }
@@ -3576,7 +3580,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3576 3580
3577#ifdef RE_ENABLE_I18N 3581#ifdef RE_ENABLE_I18N
3578 /* Check the space of the arrays. */ 3582 /* Check the space of the arrays. */
3579 if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) 3583 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
3580 { 3584 {
3581 /* Not enough, realloc it. */ 3585 /* Not enough, realloc it. */
3582 /* +1 in case of mbcset->nchar_classes is 0. */ 3586 /* +1 in case of mbcset->nchar_classes is 0. */
@@ -3584,7 +3588,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3584 /* Use realloc since array is NULL if *alloc == 0. */ 3588 /* Use realloc since array is NULL if *alloc == 0. */
3585 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, 3589 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3586 new_char_class_alloc); 3590 new_char_class_alloc);
3587 if (BE (new_char_classes == NULL, 0)) 3591 if (__glibc_unlikely (new_char_classes == NULL))
3588 return REG_ESPACE; 3592 return REG_ESPACE;
3589 mbcset->char_classes = new_char_classes; 3593 mbcset->char_classes = new_char_classes;
3590 *char_class_alloc = new_char_class_alloc; 3594 *char_class_alloc = new_char_class_alloc;
@@ -3594,7 +3598,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3594 3598
3595#define BUILD_CHARCLASS_LOOP(ctype_func) \ 3599#define BUILD_CHARCLASS_LOOP(ctype_func) \
3596 do { \ 3600 do { \
3597 if (BE (trans != NULL, 0)) \ 3601 if (__glibc_unlikely (trans != NULL)) \
3598 { \ 3602 { \
3599 for (i = 0; i < SBC_MAX; ++i) \ 3603 for (i = 0; i < SBC_MAX; ++i) \
3600 if (ctype_func (i)) \ 3604 if (ctype_func (i)) \
@@ -3650,30 +3654,24 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3650 Idx alloc = 0; 3654 Idx alloc = 0;
3651#endif /* not RE_ENABLE_I18N */ 3655#endif /* not RE_ENABLE_I18N */
3652 reg_errcode_t ret; 3656 reg_errcode_t ret;
3653 re_token_t br_token;
3654 bin_tree_t *tree; 3657 bin_tree_t *tree;
3655 3658
3656 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3659 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3657#ifdef RE_ENABLE_I18N 3660 if (__glibc_unlikely (sbcset == NULL))
3658 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3659#endif /* RE_ENABLE_I18N */
3660
3661#ifdef RE_ENABLE_I18N
3662 if (BE (sbcset == NULL || mbcset == NULL, 0))
3663#else /* not RE_ENABLE_I18N */
3664 if (BE (sbcset == NULL, 0))
3665#endif /* not RE_ENABLE_I18N */
3666 { 3661 {
3667 *err = REG_ESPACE; 3662 *err = REG_ESPACE;
3668 return NULL; 3663 return NULL;
3669 } 3664 }
3670
3671 if (non_match)
3672 {
3673#ifdef RE_ENABLE_I18N 3665#ifdef RE_ENABLE_I18N
3674 mbcset->non_match = 1; 3666 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3675#endif /* not RE_ENABLE_I18N */ 3667 if (__glibc_unlikely (mbcset == NULL))
3668 {
3669 re_free (sbcset);
3670 *err = REG_ESPACE;
3671 return NULL;
3676 } 3672 }
3673 mbcset->non_match = non_match;
3674#endif /* RE_ENABLE_I18N */
3677 3675
3678 /* We don't care the syntax in this case. */ 3676 /* We don't care the syntax in this case. */
3679 ret = build_charclass (trans, sbcset, 3677 ret = build_charclass (trans, sbcset,
@@ -3682,7 +3680,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3682#endif /* RE_ENABLE_I18N */ 3680#endif /* RE_ENABLE_I18N */
3683 class_name, 0); 3681 class_name, 0);
3684 3682
3685 if (BE (ret != REG_NOERROR, 0)) 3683 if (__glibc_unlikely (ret != REG_NOERROR))
3686 { 3684 {
3687 re_free (sbcset); 3685 re_free (sbcset);
3688#ifdef RE_ENABLE_I18N 3686#ifdef RE_ENABLE_I18N
@@ -3706,10 +3704,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3706#endif 3704#endif
3707 3705
3708 /* Build a tree for simple bracket. */ 3706 /* Build a tree for simple bracket. */
3709 br_token.type = SIMPLE_BRACKET; 3707 re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
3710 br_token.opr.sbcset = sbcset;
3711 tree = create_token_tree (dfa, NULL, NULL, &br_token); 3708 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3712 if (BE (tree == NULL, 0)) 3709 if (__glibc_unlikely (tree == NULL))
3713 goto build_word_op_espace; 3710 goto build_word_op_espace;
3714 3711
3715#ifdef RE_ENABLE_I18N 3712#ifdef RE_ENABLE_I18N
@@ -3721,11 +3718,11 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3721 br_token.opr.mbcset = mbcset; 3718 br_token.opr.mbcset = mbcset;
3722 dfa->has_mb_node = 1; 3719 dfa->has_mb_node = 1;
3723 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3720 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3724 if (BE (mbc_tree == NULL, 0)) 3721 if (__glibc_unlikely (mbc_tree == NULL))
3725 goto build_word_op_espace; 3722 goto build_word_op_espace;
3726 /* Then join them by ALT node. */ 3723 /* Then join them by ALT node. */
3727 tree = create_tree (dfa, tree, mbc_tree, OP_ALT); 3724 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3728 if (BE (mbc_tree != NULL, 1)) 3725 if (__glibc_likely (mbc_tree != NULL))
3729 return tree; 3726 return tree;
3730 } 3727 }
3731 else 3728 else
@@ -3748,27 +3745,26 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3748 3745
3749/* This is intended for the expressions like "a{1,3}". 3746/* This is intended for the expressions like "a{1,3}".
3750 Fetch a number from 'input', and return the number. 3747 Fetch a number from 'input', and return the number.
3751 Return REG_MISSING if the number field is empty like "{,1}". 3748 Return -1 if the number field is empty like "{,1}".
3752 Return RE_DUP_MAX + 1 if the number field is too large. 3749 Return RE_DUP_MAX + 1 if the number field is too large.
3753 Return REG_ERROR if an error occurred. */ 3750 Return -2 if an error occurred. */
3754 3751
3755static Idx 3752static Idx
3756fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) 3753fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3757{ 3754{
3758 Idx num = REG_MISSING; 3755 Idx num = -1;
3759 unsigned char c; 3756 unsigned char c;
3760 while (1) 3757 while (1)
3761 { 3758 {
3762 fetch_token (token, input, syntax); 3759 fetch_token (token, input, syntax);
3763 c = token->opr.c; 3760 c = token->opr.c;
3764 if (BE (token->type == END_OF_RE, 0)) 3761 if (__glibc_unlikely (token->type == END_OF_RE))
3765 return REG_ERROR; 3762 return -2;
3766 if (token->type == OP_CLOSE_DUP_NUM || c == ',') 3763 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3767 break; 3764 break;
3768 num = ((token->type != CHARACTER || c < '0' || '9' < c 3765 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3769 || num == REG_ERROR) 3766 ? -2
3770 ? REG_ERROR 3767 : num == -1
3771 : num == REG_MISSING
3772 ? c - '0' 3768 ? c - '0'
3773 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); 3769 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
3774 } 3770 }
@@ -3783,9 +3779,9 @@ free_charset (re_charset_t *cset)
3783# ifdef _LIBC 3779# ifdef _LIBC
3784 re_free (cset->coll_syms); 3780 re_free (cset->coll_syms);
3785 re_free (cset->equiv_classes); 3781 re_free (cset->equiv_classes);
3782# endif
3786 re_free (cset->range_starts); 3783 re_free (cset->range_starts);
3787 re_free (cset->range_ends); 3784 re_free (cset->range_ends);
3788# endif
3789 re_free (cset->char_classes); 3785 re_free (cset->char_classes);
3790 re_free (cset); 3786 re_free (cset);
3791} 3787}
@@ -3799,8 +3795,7 @@ static bin_tree_t *
3799create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3795create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3800 re_token_type_t type) 3796 re_token_type_t type)
3801{ 3797{
3802 re_token_t t; 3798 re_token_t t = { .type = type };
3803 t.type = type;
3804 return create_token_tree (dfa, left, right, &t); 3799 return create_token_tree (dfa, left, right, &t);
3805} 3800}
3806 3801
@@ -3809,7 +3804,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3809 const re_token_t *token) 3804 const re_token_t *token)
3810{ 3805{
3811 bin_tree_t *tree; 3806 bin_tree_t *tree;
3812 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) 3807 if (__glibc_unlikely (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE))
3813 { 3808 {
3814 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); 3809 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3815 3810
@@ -3829,7 +3824,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3829 tree->token.opt_subexp = 0; 3824 tree->token.opt_subexp = 0;
3830 tree->first = NULL; 3825 tree->first = NULL;
3831 tree->next = NULL; 3826 tree->next = NULL;
3832 tree->node_idx = REG_MISSING; 3827 tree->node_idx = -1;
3833 3828
3834 if (left != NULL) 3829 if (left != NULL)
3835 left->parent = tree; 3830 left->parent = tree;