diff options
Diffstat (limited to 'gl/regcomp.c')
| -rw-r--r-- | gl/regcomp.c | 831 |
1 files changed, 351 insertions, 480 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c index 887e5b50..122c3de5 100644 --- a/gl/regcomp.c +++ b/gl/regcomp.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
| 2 | Copyright (C) 2002-2021 Free Software Foundation, Inc. | 2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
| 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
| 5 | 5 | ||
| @@ -27,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp, | |||
| 27 | const re_dfastate_t *init_state, | 27 | const re_dfastate_t *init_state, |
| 28 | char *fastmap); | 28 | char *fastmap); |
| 29 | static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); | 29 | static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); |
| 30 | #ifdef RE_ENABLE_I18N | ||
| 31 | static void free_charset (re_charset_t *cset); | 30 | static void free_charset (re_charset_t *cset); |
| 32 | #endif /* RE_ENABLE_I18N */ | ||
| 33 | static void free_workarea_compile (regex_t *preg); | 31 | static void free_workarea_compile (regex_t *preg); |
| 34 | static reg_errcode_t create_initial_state (re_dfa_t *dfa); | 32 | static reg_errcode_t create_initial_state (re_dfa_t *dfa); |
| 35 | #ifdef RE_ENABLE_I18N | ||
| 36 | static void optimize_utf8 (re_dfa_t *dfa); | 33 | static void optimize_utf8 (re_dfa_t *dfa); |
| 37 | #endif | ||
| 38 | static reg_errcode_t analyze (regex_t *preg); | 34 | static reg_errcode_t analyze (regex_t *preg); |
| 39 | static reg_errcode_t preorder (bin_tree_t *root, | 35 | static reg_errcode_t preorder (bin_tree_t *root, |
| 40 | reg_errcode_t (fn (void *, bin_tree_t *)), | 36 | reg_errcode_t (fn (void *, bin_tree_t *)), |
| @@ -89,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, | |||
| 89 | static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, | 85 | static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, |
| 90 | re_string_t *regexp, | 86 | re_string_t *regexp, |
| 91 | re_token_t *token); | 87 | re_token_t *token); |
| 92 | #ifdef RE_ENABLE_I18N | ||
| 93 | static reg_errcode_t build_equiv_class (bitset_t sbcset, | 88 | static reg_errcode_t build_equiv_class (bitset_t sbcset, |
| 94 | re_charset_t *mbcset, | 89 | re_charset_t *mbcset, |
| 95 | Idx *equiv_class_alloc, | 90 | Idx *equiv_class_alloc, |
| @@ -100,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, | |||
| 100 | Idx *char_class_alloc, | 95 | Idx *char_class_alloc, |
| 101 | const char *class_name, | 96 | const char *class_name, |
| 102 | reg_syntax_t syntax); | 97 | reg_syntax_t syntax); |
| 103 | #else /* not RE_ENABLE_I18N */ | ||
| 104 | static reg_errcode_t build_equiv_class (bitset_t sbcset, | ||
| 105 | const unsigned char *name); | ||
| 106 | static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, | ||
| 107 | bitset_t sbcset, | ||
| 108 | const char *class_name, | ||
| 109 | reg_syntax_t syntax); | ||
| 110 | #endif /* not RE_ENABLE_I18N */ | ||
| 111 | static bin_tree_t *build_charclass_op (re_dfa_t *dfa, | 98 | static bin_tree_t *build_charclass_op (re_dfa_t *dfa, |
| 112 | RE_TRANSLATE_TYPE trans, | 99 | RE_TRANSLATE_TYPE trans, |
| 113 | const char *class_name, | 100 | const char *class_name, |
| @@ -279,8 +266,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) | |||
| 279 | } | 266 | } |
| 280 | weak_alias (__re_compile_fastmap, re_compile_fastmap) | 267 | weak_alias (__re_compile_fastmap, re_compile_fastmap) |
| 281 | 268 | ||
| 282 | static inline void | 269 | static __always_inline void |
| 283 | __attribute__ ((always_inline)) | ||
| 284 | re_set_fastmap (char *fastmap, bool icase, int ch) | 270 | re_set_fastmap (char *fastmap, bool icase, int ch) |
| 285 | { | 271 | { |
| 286 | fastmap[ch] = 1; | 272 | fastmap[ch] = 1; |
| @@ -306,7 +292,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 306 | if (type == CHARACTER) | 292 | if (type == CHARACTER) |
| 307 | { | 293 | { |
| 308 | re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); | 294 | re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); |
| 309 | #ifdef RE_ENABLE_I18N | ||
| 310 | if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) | 295 | if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) |
| 311 | { | 296 | { |
| 312 | unsigned char buf[MB_LEN_MAX]; | 297 | unsigned char buf[MB_LEN_MAX]; |
| @@ -327,7 +312,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 327 | != (size_t) -1)) | 312 | != (size_t) -1)) |
| 328 | re_set_fastmap (fastmap, false, buf[0]); | 313 | re_set_fastmap (fastmap, false, buf[0]); |
| 329 | } | 314 | } |
| 330 | #endif | ||
| 331 | } | 315 | } |
| 332 | else if (type == SIMPLE_BRACKET) | 316 | else if (type == SIMPLE_BRACKET) |
| 333 | { | 317 | { |
| @@ -341,13 +325,12 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 341 | re_set_fastmap (fastmap, icase, ch); | 325 | re_set_fastmap (fastmap, icase, ch); |
| 342 | } | 326 | } |
| 343 | } | 327 | } |
| 344 | #ifdef RE_ENABLE_I18N | ||
| 345 | else if (type == COMPLEX_BRACKET) | 328 | else if (type == COMPLEX_BRACKET) |
| 346 | { | 329 | { |
| 347 | re_charset_t *cset = dfa->nodes[node].opr.mbcset; | 330 | re_charset_t *cset = dfa->nodes[node].opr.mbcset; |
| 348 | Idx i; | 331 | Idx i; |
| 349 | 332 | ||
| 350 | # ifdef _LIBC | 333 | #ifdef _LIBC |
| 351 | /* See if we have to try all bytes which start multiple collation | 334 | /* See if we have to try all bytes which start multiple collation |
| 352 | elements. | 335 | elements. |
| 353 | e.g. In da_DK, we want to catch 'a' since "aa" is a valid | 336 | e.g. In da_DK, we want to catch 'a' since "aa" is a valid |
| @@ -363,7 +346,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 363 | if (table[i] < 0) | 346 | if (table[i] < 0) |
| 364 | re_set_fastmap (fastmap, icase, i); | 347 | re_set_fastmap (fastmap, icase, i); |
| 365 | } | 348 | } |
| 366 | # endif /* _LIBC */ | 349 | #endif /* _LIBC */ |
| 367 | 350 | ||
| 368 | /* See if we have to start the match at all multibyte characters, | 351 | /* See if we have to start the match at all multibyte characters, |
| 369 | i.e. where we would not find an invalid sequence. This only | 352 | i.e. where we would not find an invalid sequence. This only |
| @@ -371,9 +354,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 371 | sets, the SIMPLE_BRACKET again suffices. */ | 354 | sets, the SIMPLE_BRACKET again suffices. */ |
| 372 | if (dfa->mb_cur_max > 1 | 355 | if (dfa->mb_cur_max > 1 |
| 373 | && (cset->nchar_classes || cset->non_match || cset->nranges | 356 | && (cset->nchar_classes || cset->non_match || cset->nranges |
| 374 | # ifdef _LIBC | 357 | #ifdef _LIBC |
| 375 | || cset->nequiv_classes | 358 | || cset->nequiv_classes |
| 376 | # endif /* _LIBC */ | 359 | #endif /* _LIBC */ |
| 377 | )) | 360 | )) |
| 378 | { | 361 | { |
| 379 | unsigned char c = 0; | 362 | unsigned char c = 0; |
| @@ -406,12 +389,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
| 406 | } | 389 | } |
| 407 | } | 390 | } |
| 408 | } | 391 | } |
| 409 | #endif /* RE_ENABLE_I18N */ | 392 | else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE) |
| 410 | else if (type == OP_PERIOD | ||
| 411 | #ifdef RE_ENABLE_I18N | ||
| 412 | || type == OP_UTF8_PERIOD | ||
| 413 | #endif /* RE_ENABLE_I18N */ | ||
| 414 | || type == END_OF_RE) | ||
| 415 | { | 393 | { |
| 416 | memset (fastmap, '\1', sizeof (char) * SBC_MAX); | 394 | memset (fastmap, '\1', sizeof (char) * SBC_MAX); |
| 417 | if (type == END_OF_RE) | 395 | if (type == END_OF_RE) |
| @@ -550,7 +528,6 @@ regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf, | |||
| 550 | weak_alias (__regerror, regerror) | 528 | weak_alias (__regerror, regerror) |
| 551 | 529 | ||
| 552 | 530 | ||
| 553 | #ifdef RE_ENABLE_I18N | ||
| 554 | /* This static array is used for the map to single-byte characters when | 531 | /* This static array is used for the map to single-byte characters when |
| 555 | UTF-8 is used. Otherwise we would allocate memory just to initialize | 532 | UTF-8 is used. Otherwise we would allocate memory just to initialize |
| 556 | it the same all the time. UTF-8 is the preferred encoding so this is | 533 | it the same all the time. UTF-8 is the preferred encoding so this is |
| @@ -558,25 +535,24 @@ weak_alias (__regerror, regerror) | |||
| 558 | static const bitset_t utf8_sb_map = | 535 | static const bitset_t utf8_sb_map = |
| 559 | { | 536 | { |
| 560 | /* Set the first 128 bits. */ | 537 | /* Set the first 128 bits. */ |
| 561 | # if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__ | 538 | #if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__ |
| 562 | [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX | 539 | [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX |
| 563 | # else | 540 | #else |
| 564 | # if 4 * BITSET_WORD_BITS < ASCII_CHARS | 541 | # if 4 * BITSET_WORD_BITS < ASCII_CHARS |
| 565 | # error "bitset_word_t is narrower than 32 bits" | 542 | # error "bitset_word_t is narrower than 32 bits" |
| 566 | # elif 3 * BITSET_WORD_BITS < ASCII_CHARS | 543 | # elif 3 * BITSET_WORD_BITS < ASCII_CHARS |
| 567 | BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, | 544 | BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, |
| 568 | # elif 2 * BITSET_WORD_BITS < ASCII_CHARS | 545 | # elif 2 * BITSET_WORD_BITS < ASCII_CHARS |
| 569 | BITSET_WORD_MAX, BITSET_WORD_MAX, | 546 | BITSET_WORD_MAX, BITSET_WORD_MAX, |
| 570 | # elif 1 * BITSET_WORD_BITS < ASCII_CHARS | 547 | # elif 1 * BITSET_WORD_BITS < ASCII_CHARS |
| 571 | BITSET_WORD_MAX, | 548 | BITSET_WORD_MAX, |
| 572 | # endif | 549 | # endif |
| 573 | (BITSET_WORD_MAX | 550 | (BITSET_WORD_MAX |
| 574 | >> (SBC_MAX % BITSET_WORD_BITS == 0 | 551 | >> (SBC_MAX % BITSET_WORD_BITS == 0 |
| 575 | ? 0 | 552 | ? 0 |
| 576 | : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) | 553 | : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) |
| 577 | # endif | ||
| 578 | }; | ||
| 579 | #endif | 554 | #endif |
| 555 | }; | ||
| 580 | 556 | ||
| 581 | 557 | ||
| 582 | static void | 558 | static void |
| @@ -614,10 +590,8 @@ free_dfa_content (re_dfa_t *dfa) | |||
| 614 | re_free (entry->array); | 590 | re_free (entry->array); |
| 615 | } | 591 | } |
| 616 | re_free (dfa->state_table); | 592 | re_free (dfa->state_table); |
| 617 | #ifdef RE_ENABLE_I18N | ||
| 618 | if (dfa->sb_char != utf8_sb_map) | 593 | if (dfa->sb_char != utf8_sb_map) |
| 619 | re_free (dfa->sb_char); | 594 | re_free (dfa->sb_char); |
| 620 | #endif | ||
| 621 | re_free (dfa->subexp_map); | 595 | re_free (dfa->subexp_map); |
| 622 | #ifdef DEBUG | 596 | #ifdef DEBUG |
| 623 | re_free (dfa->re_str); | 597 | re_free (dfa->re_str); |
| @@ -796,11 +770,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, | |||
| 796 | if (__glibc_unlikely (err != REG_NOERROR)) | 770 | if (__glibc_unlikely (err != REG_NOERROR)) |
| 797 | goto re_compile_internal_free_return; | 771 | goto re_compile_internal_free_return; |
| 798 | 772 | ||
| 799 | #ifdef RE_ENABLE_I18N | ||
| 800 | /* If possible, do searching in single byte encoding to speed things up. */ | 773 | /* If possible, do searching in single byte encoding to speed things up. */ |
| 801 | if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) | 774 | if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) |
| 802 | optimize_utf8 (dfa); | 775 | optimize_utf8 (dfa); |
| 803 | #endif | ||
| 804 | 776 | ||
| 805 | /* Then create the initial state of the dfa. */ | 777 | /* Then create the initial state of the dfa. */ |
| 806 | err = create_initial_state (dfa); | 778 | err = create_initial_state (dfa); |
| @@ -830,11 +802,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) | |||
| 830 | #ifndef _LIBC | 802 | #ifndef _LIBC |
| 831 | const char *codeset_name; | 803 | const char *codeset_name; |
| 832 | #endif | 804 | #endif |
| 833 | #ifdef RE_ENABLE_I18N | ||
| 834 | size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); | 805 | size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); |
| 835 | #else | ||
| 836 | size_t max_i18n_object_size = 0; | ||
| 837 | #endif | ||
| 838 | size_t max_object_size = | 806 | size_t max_object_size = |
| 839 | MAX (sizeof (struct re_state_table_entry), | 807 | MAX (sizeof (struct re_state_table_entry), |
| 840 | MAX (sizeof (re_token_t), | 808 | MAX (sizeof (re_token_t), |
| @@ -886,7 +854,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) | |||
| 886 | dfa->map_notascii = 0; | 854 | dfa->map_notascii = 0; |
| 887 | #endif | 855 | #endif |
| 888 | 856 | ||
| 889 | #ifdef RE_ENABLE_I18N | ||
| 890 | if (dfa->mb_cur_max > 1) | 857 | if (dfa->mb_cur_max > 1) |
| 891 | { | 858 | { |
| 892 | if (dfa->is_utf8) | 859 | if (dfa->is_utf8) |
| @@ -906,14 +873,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) | |||
| 906 | wint_t wch = __btowc (ch); | 873 | wint_t wch = __btowc (ch); |
| 907 | if (wch != WEOF) | 874 | if (wch != WEOF) |
| 908 | dfa->sb_char[i] |= (bitset_word_t) 1 << j; | 875 | dfa->sb_char[i] |= (bitset_word_t) 1 << j; |
| 909 | # ifndef _LIBC | 876 | #ifndef _LIBC |
| 910 | if (isascii (ch) && wch != ch) | 877 | if (isascii (ch) && wch != ch) |
| 911 | dfa->map_notascii = 1; | 878 | dfa->map_notascii = 1; |
| 912 | # endif | 879 | #endif |
| 913 | } | 880 | } |
| 914 | } | 881 | } |
| 915 | } | 882 | } |
| 916 | #endif | ||
| 917 | 883 | ||
| 918 | if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) | 884 | if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) |
| 919 | return REG_ESPACE; | 885 | return REG_ESPACE; |
| @@ -933,8 +899,6 @@ init_word_char (re_dfa_t *dfa) | |||
| 933 | dfa->word_ops_used = 1; | 899 | dfa->word_ops_used = 1; |
| 934 | if (__glibc_likely (dfa->map_notascii == 0)) | 900 | if (__glibc_likely (dfa->map_notascii == 0)) |
| 935 | { | 901 | { |
| 936 | /* Avoid uint32_t and uint64_t as some non-GCC platforms lack | ||
| 937 | them, an issue when this code is used in Gnulib. */ | ||
| 938 | bitset_word_t bits0 = 0x00000000; | 902 | bitset_word_t bits0 = 0x00000000; |
| 939 | bitset_word_t bits1 = 0x03ff0000; | 903 | bitset_word_t bits1 = 0x03ff0000; |
| 940 | bitset_word_t bits2 = 0x87fffffe; | 904 | bitset_word_t bits2 = 0x87fffffe; |
| @@ -1074,7 +1038,6 @@ create_initial_state (re_dfa_t *dfa) | |||
| 1074 | return REG_NOERROR; | 1038 | return REG_NOERROR; |
| 1075 | } | 1039 | } |
| 1076 | 1040 | ||
| 1077 | #ifdef RE_ENABLE_I18N | ||
| 1078 | /* If it is possible to do searching in single byte encoding instead of UTF-8 | 1041 | /* If it is possible to do searching in single byte encoding instead of UTF-8 |
| 1079 | to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change | 1042 | to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change |
| 1080 | DFA nodes where needed. */ | 1043 | DFA nodes where needed. */ |
| @@ -1154,7 +1117,6 @@ optimize_utf8 (re_dfa_t *dfa) | |||
| 1154 | dfa->is_utf8 = 0; | 1117 | dfa->is_utf8 = 0; |
| 1155 | dfa->has_mb_node = dfa->nbackref > 0 || has_period; | 1118 | dfa->has_mb_node = dfa->nbackref > 0 || has_period; |
| 1156 | } | 1119 | } |
| 1157 | #endif | ||
| 1158 | 1120 | ||
| 1159 | /* Analyze the structure tree, and calculate "first", "next", "edest", | 1121 | /* Analyze the structure tree, and calculate "first", "next", "edest", |
| 1160 | "eclosure", and "inveclosure". */ | 1122 | "eclosure", and "inveclosure". */ |
| @@ -1792,7 +1754,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 1792 | token->opr.c = c; | 1754 | token->opr.c = c; |
| 1793 | 1755 | ||
| 1794 | token->word_char = 0; | 1756 | token->word_char = 0; |
| 1795 | #ifdef RE_ENABLE_I18N | ||
| 1796 | token->mb_partial = 0; | 1757 | token->mb_partial = 0; |
| 1797 | if (input->mb_cur_max > 1 | 1758 | if (input->mb_cur_max > 1 |
| 1798 | && !re_string_first_byte (input, re_string_cur_idx (input))) | 1759 | && !re_string_first_byte (input, re_string_cur_idx (input))) |
| @@ -1801,7 +1762,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 1801 | token->mb_partial = 1; | 1762 | token->mb_partial = 1; |
| 1802 | return 1; | 1763 | return 1; |
| 1803 | } | 1764 | } |
| 1804 | #endif | ||
| 1805 | if (c == '\\') | 1765 | if (c == '\\') |
| 1806 | { | 1766 | { |
| 1807 | unsigned char c2; | 1767 | unsigned char c2; |
| @@ -1814,7 +1774,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 1814 | c2 = re_string_peek_byte_case (input, 1); | 1774 | c2 = re_string_peek_byte_case (input, 1); |
| 1815 | token->opr.c = c2; | 1775 | token->opr.c = c2; |
| 1816 | token->type = CHARACTER; | 1776 | token->type = CHARACTER; |
| 1817 | #ifdef RE_ENABLE_I18N | ||
| 1818 | if (input->mb_cur_max > 1) | 1777 | if (input->mb_cur_max > 1) |
| 1819 | { | 1778 | { |
| 1820 | wint_t wc = re_string_wchar_at (input, | 1779 | wint_t wc = re_string_wchar_at (input, |
| @@ -1822,7 +1781,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 1822 | token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; | 1781 | token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; |
| 1823 | } | 1782 | } |
| 1824 | else | 1783 | else |
| 1825 | #endif | ||
| 1826 | token->word_char = IS_WORD_CHAR (c2) != 0; | 1784 | token->word_char = IS_WORD_CHAR (c2) != 0; |
| 1827 | 1785 | ||
| 1828 | switch (c2) | 1786 | switch (c2) |
| @@ -1928,14 +1886,12 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 1928 | } | 1886 | } |
| 1929 | 1887 | ||
| 1930 | token->type = CHARACTER; | 1888 | token->type = CHARACTER; |
| 1931 | #ifdef RE_ENABLE_I18N | ||
| 1932 | if (input->mb_cur_max > 1) | 1889 | if (input->mb_cur_max > 1) |
| 1933 | { | 1890 | { |
| 1934 | wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); | 1891 | wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); |
| 1935 | token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; | 1892 | token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; |
| 1936 | } | 1893 | } |
| 1937 | else | 1894 | else |
| 1938 | #endif | ||
| 1939 | token->word_char = IS_WORD_CHAR (token->opr.c); | 1895 | token->word_char = IS_WORD_CHAR (token->opr.c); |
| 1940 | 1896 | ||
| 1941 | switch (c) | 1897 | switch (c) |
| @@ -2027,14 +1983,12 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 2027 | c = re_string_peek_byte (input, 0); | 1983 | c = re_string_peek_byte (input, 0); |
| 2028 | token->opr.c = c; | 1984 | token->opr.c = c; |
| 2029 | 1985 | ||
| 2030 | #ifdef RE_ENABLE_I18N | ||
| 2031 | if (input->mb_cur_max > 1 | 1986 | if (input->mb_cur_max > 1 |
| 2032 | && !re_string_first_byte (input, re_string_cur_idx (input))) | 1987 | && !re_string_first_byte (input, re_string_cur_idx (input))) |
| 2033 | { | 1988 | { |
| 2034 | token->type = CHARACTER; | 1989 | token->type = CHARACTER; |
| 2035 | return 1; | 1990 | return 1; |
| 2036 | } | 1991 | } |
| 2037 | #endif /* RE_ENABLE_I18N */ | ||
| 2038 | 1992 | ||
| 2039 | if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) | 1993 | if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) |
| 2040 | && re_string_cur_idx (input) + 1 < re_string_length (input)) | 1994 | && re_string_cur_idx (input) + 1 < re_string_length (input)) |
| @@ -2084,15 +2038,25 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) | |||
| 2084 | } | 2038 | } |
| 2085 | switch (c) | 2039 | switch (c) |
| 2086 | { | 2040 | { |
| 2087 | case '-': | ||
| 2088 | token->type = OP_CHARSET_RANGE; | ||
| 2089 | break; | ||
| 2090 | case ']': | 2041 | case ']': |
| 2091 | token->type = OP_CLOSE_BRACKET; | 2042 | token->type = OP_CLOSE_BRACKET; |
| 2092 | break; | 2043 | break; |
| 2093 | case '^': | 2044 | case '^': |
| 2094 | token->type = OP_NON_MATCH_LIST; | 2045 | token->type = OP_NON_MATCH_LIST; |
| 2095 | break; | 2046 | break; |
| 2047 | case '-': | ||
| 2048 | /* In V7 Unix grep and Unix awk and mawk, [...---...] | ||
| 2049 | (3 adjacent minus signs) stands for a single minus sign. | ||
| 2050 | Support that without breaking anything else. */ | ||
| 2051 | if (! (re_string_cur_idx (input) + 2 < re_string_length (input) | ||
| 2052 | && re_string_peek_byte (input, 1) == '-' | ||
| 2053 | && re_string_peek_byte (input, 2) == '-')) | ||
| 2054 | { | ||
| 2055 | token->type = OP_CHARSET_RANGE; | ||
| 2056 | break; | ||
| 2057 | } | ||
| 2058 | re_string_skip_bytes (input, 2); | ||
| 2059 | FALLTHROUGH; | ||
| 2096 | default: | 2060 | default: |
| 2097 | token->type = CHARACTER; | 2061 | token->type = CHARACTER; |
| 2098 | } | 2062 | } |
| @@ -2256,7 +2220,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, | |||
| 2256 | *err = REG_ESPACE; | 2220 | *err = REG_ESPACE; |
| 2257 | return NULL; | 2221 | return NULL; |
| 2258 | } | 2222 | } |
| 2259 | #ifdef RE_ENABLE_I18N | ||
| 2260 | if (dfa->mb_cur_max > 1) | 2223 | if (dfa->mb_cur_max > 1) |
| 2261 | { | 2224 | { |
| 2262 | while (!re_string_eoi (regexp) | 2225 | while (!re_string_eoi (regexp) |
| @@ -2273,7 +2236,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, | |||
| 2273 | } | 2236 | } |
| 2274 | } | 2237 | } |
| 2275 | } | 2238 | } |
| 2276 | #endif | ||
| 2277 | break; | 2239 | break; |
| 2278 | 2240 | ||
| 2279 | case OP_OPEN_SUBEXP: | 2241 | case OP_OPEN_SUBEXP: |
| @@ -2666,40 +2628,30 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, | |||
| 2666 | 2628 | ||
| 2667 | #ifndef _LIBC | 2629 | #ifndef _LIBC |
| 2668 | 2630 | ||
| 2669 | # ifdef RE_ENABLE_I18N | ||
| 2670 | /* Convert the byte B to the corresponding wide character. In a | 2631 | /* Convert the byte B to the corresponding wide character. In a |
| 2671 | unibyte locale, treat B as itself. In a multibyte locale, return | 2632 | unibyte locale, treat B as itself. In a multibyte locale, return |
| 2672 | WEOF if B is an encoding error. */ | 2633 | WEOF if B is an encoding error. */ |
| 2673 | static wint_t | 2634 | static wint_t |
| 2674 | parse_byte (unsigned char b, re_charset_t *mbcset) | 2635 | parse_byte (unsigned char b, re_dfa_t const *dfa) |
| 2675 | { | 2636 | { |
| 2676 | return mbcset == NULL ? b : __btowc (b); | 2637 | return dfa->mb_cur_max > 1 ? __btowc (b) : b; |
| 2677 | } | 2638 | } |
| 2678 | # endif | ||
| 2679 | 2639 | ||
| 2680 | /* Local function for parse_bracket_exp only used in case of NOT _LIBC. | 2640 | /* Local function for parse_bracket_exp used in _LIBC environment. |
| 2681 | Build the range expression which starts from START_ELEM, and ends | 2641 | Build the range expression which starts from START_ELEM, and ends |
| 2682 | at END_ELEM. The result are written to MBCSET and SBCSET. | 2642 | at END_ELEM. The result are written to MBCSET and SBCSET. |
| 2683 | RANGE_ALLOC is the allocated size of mbcset->range_starts, and | 2643 | RANGE_ALLOC is the allocated size of mbcset->range_starts, and |
| 2684 | mbcset->range_ends, is a pointer argument since we may | 2644 | mbcset->range_ends, is a pointer argument since we may |
| 2685 | update it. */ | 2645 | update it. */ |
| 2686 | 2646 | ||
| 2687 | static reg_errcode_t | 2647 | static reg_errcode_t |
| 2688 | # ifdef RE_ENABLE_I18N | 2648 | build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, |
| 2689 | build_range_exp (const reg_syntax_t syntax, | 2649 | bracket_elem_t *start_elem, bracket_elem_t *end_elem, |
| 2690 | bitset_t sbcset, | 2650 | re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules, |
| 2691 | re_charset_t *mbcset, | 2651 | const unsigned char *collseqmb, const char *collseqwc, |
| 2692 | Idx *range_alloc, | 2652 | int_fast32_t table_size, const void *symb_table, |
| 2693 | const bracket_elem_t *start_elem, | 2653 | const unsigned char *extra) |
| 2694 | const bracket_elem_t *end_elem) | ||
| 2695 | # else /* not RE_ENABLE_I18N */ | ||
| 2696 | build_range_exp (const reg_syntax_t syntax, | ||
| 2697 | bitset_t sbcset, | ||
| 2698 | const bracket_elem_t *start_elem, | ||
| 2699 | const bracket_elem_t *end_elem) | ||
| 2700 | # endif /* not RE_ENABLE_I18N */ | ||
| 2701 | { | 2654 | { |
| 2702 | unsigned int start_ch, end_ch; | ||
| 2703 | /* Equivalence Classes and Character Classes can't be a range start/end. */ | 2655 | /* Equivalence Classes and Character Classes can't be a range start/end. */ |
| 2704 | if (__glibc_unlikely (start_elem->type == EQUIV_CLASS | 2656 | if (__glibc_unlikely (start_elem->type == EQUIV_CLASS |
| 2705 | || start_elem->type == CHAR_CLASS | 2657 | || start_elem->type == CHAR_CLASS |
| @@ -2715,110 +2667,88 @@ build_range_exp (const reg_syntax_t syntax, | |||
| 2715 | && strlen ((char *) end_elem->opr.name) > 1))) | 2667 | && strlen ((char *) end_elem->opr.name) > 1))) |
| 2716 | return REG_ECOLLATE; | 2668 | return REG_ECOLLATE; |
| 2717 | 2669 | ||
| 2718 | # ifdef RE_ENABLE_I18N | 2670 | unsigned int |
| 2719 | { | ||
| 2720 | wchar_t wc; | ||
| 2721 | wint_t start_wc; | ||
| 2722 | wint_t end_wc; | ||
| 2723 | |||
| 2724 | start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch | 2671 | start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch |
| 2725 | : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] | 2672 | : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] |
| 2726 | : 0)); | 2673 | : 0)), |
| 2727 | end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch | 2674 | end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch |
| 2728 | : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] | 2675 | : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] |
| 2729 | : 0)); | 2676 | : 0)); |
| 2677 | wint_t | ||
| 2730 | start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) | 2678 | start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) |
| 2731 | ? parse_byte (start_ch, mbcset) : start_elem->opr.wch); | 2679 | ? parse_byte (start_ch, dfa) : start_elem->opr.wch), |
| 2732 | end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) | 2680 | end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) |
| 2733 | ? parse_byte (end_ch, mbcset) : end_elem->opr.wch); | 2681 | ? parse_byte (end_ch, dfa) : end_elem->opr.wch); |
| 2734 | if (start_wc == WEOF || end_wc == WEOF) | ||
| 2735 | return REG_ECOLLATE; | ||
| 2736 | else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) | ||
| 2737 | && start_wc > end_wc)) | ||
| 2738 | return REG_ERANGE; | ||
| 2739 | |||
| 2740 | /* Got valid collation sequence values, add them as a new entry. | ||
| 2741 | However, for !_LIBC we have no collation elements: if the | ||
| 2742 | character set is single byte, the single byte character set | ||
| 2743 | that we build below suffices. parse_bracket_exp passes | ||
| 2744 | no MBCSET if dfa->mb_cur_max == 1. */ | ||
| 2745 | if (mbcset) | ||
| 2746 | { | ||
| 2747 | /* Check the space of the arrays. */ | ||
| 2748 | if (__glibc_unlikely (*range_alloc == mbcset->nranges)) | ||
| 2749 | { | ||
| 2750 | /* There is not enough space, need realloc. */ | ||
| 2751 | wchar_t *new_array_start, *new_array_end; | ||
| 2752 | Idx new_nranges; | ||
| 2753 | |||
| 2754 | /* +1 in case of mbcset->nranges is 0. */ | ||
| 2755 | new_nranges = 2 * mbcset->nranges + 1; | ||
| 2756 | /* Use realloc since mbcset->range_starts and mbcset->range_ends | ||
| 2757 | are NULL if *range_alloc == 0. */ | ||
| 2758 | new_array_start = re_realloc (mbcset->range_starts, wchar_t, | ||
| 2759 | new_nranges); | ||
| 2760 | new_array_end = re_realloc (mbcset->range_ends, wchar_t, | ||
| 2761 | new_nranges); | ||
| 2762 | 2682 | ||
| 2763 | if (__glibc_unlikely (new_array_start == NULL | 2683 | if (start_wc == WEOF || end_wc == WEOF) |
| 2764 | || new_array_end == NULL)) | 2684 | return REG_ECOLLATE; |
| 2765 | { | 2685 | else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) |
| 2766 | re_free (new_array_start); | 2686 | && start_wc > end_wc)) |
| 2767 | re_free (new_array_end); | 2687 | return REG_ERANGE; |
| 2768 | return REG_ESPACE; | ||
| 2769 | } | ||
| 2770 | 2688 | ||
| 2771 | mbcset->range_starts = new_array_start; | 2689 | /* Got valid collation sequence values, add them as a new entry. |
| 2772 | mbcset->range_ends = new_array_end; | 2690 | However, for !_LIBC we have no collation elements: if the |
| 2773 | *range_alloc = new_nranges; | 2691 | character set is single byte, the single byte character set |
| 2774 | } | 2692 | that we build below suffices. parse_bracket_exp passes |
| 2693 | no MBCSET if dfa->mb_cur_max == 1. */ | ||
| 2694 | if (dfa->mb_cur_max > 1) | ||
| 2695 | { | ||
| 2696 | /* Check the space of the arrays. */ | ||
| 2697 | if (__glibc_unlikely (*range_alloc == mbcset->nranges)) | ||
| 2698 | { | ||
| 2699 | /* There is not enough space, need realloc. */ | ||
| 2700 | wchar_t *new_array_start, *new_array_end; | ||
| 2701 | Idx new_nranges; | ||
| 2775 | 2702 | ||
| 2776 | mbcset->range_starts[mbcset->nranges] = start_wc; | 2703 | /* +1 in case of mbcset->nranges is 0. */ |
| 2777 | mbcset->range_ends[mbcset->nranges++] = end_wc; | 2704 | new_nranges = 2 * mbcset->nranges + 1; |
| 2778 | } | 2705 | /* Use realloc since mbcset->range_starts and mbcset->range_ends |
| 2706 | are NULL if *range_alloc == 0. */ | ||
| 2707 | new_array_start = re_realloc (mbcset->range_starts, wchar_t, | ||
| 2708 | new_nranges); | ||
| 2709 | new_array_end = re_realloc (mbcset->range_ends, wchar_t, | ||
| 2710 | new_nranges); | ||
| 2711 | |||
| 2712 | if (__glibc_unlikely (new_array_start == NULL | ||
| 2713 | || new_array_end == NULL)) | ||
| 2714 | { | ||
| 2715 | re_free (new_array_start); | ||
| 2716 | re_free (new_array_end); | ||
| 2717 | return REG_ESPACE; | ||
| 2718 | } | ||
| 2719 | |||
| 2720 | mbcset->range_starts = new_array_start; | ||
| 2721 | mbcset->range_ends = new_array_end; | ||
| 2722 | *range_alloc = new_nranges; | ||
| 2723 | } | ||
| 2724 | |||
| 2725 | mbcset->range_starts[mbcset->nranges] = start_wc; | ||
| 2726 | mbcset->range_ends[mbcset->nranges++] = end_wc; | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | /* Build the table for single byte characters. */ | ||
| 2730 | for (wchar_t wc = 0; wc < SBC_MAX; ++wc) | ||
| 2731 | { | ||
| 2732 | if (start_wc <= wc && wc <= end_wc) | ||
| 2733 | bitset_set (sbcset, wc); | ||
| 2734 | } | ||
| 2779 | 2735 | ||
| 2780 | /* Build the table for single byte characters. */ | ||
| 2781 | for (wc = 0; wc < SBC_MAX; ++wc) | ||
| 2782 | { | ||
| 2783 | if (start_wc <= wc && wc <= end_wc) | ||
| 2784 | bitset_set (sbcset, wc); | ||
| 2785 | } | ||
| 2786 | } | ||
| 2787 | # else /* not RE_ENABLE_I18N */ | ||
| 2788 | { | ||
| 2789 | unsigned int ch; | ||
| 2790 | start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch | ||
| 2791 | : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] | ||
| 2792 | : 0)); | ||
| 2793 | end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch | ||
| 2794 | : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] | ||
| 2795 | : 0)); | ||
| 2796 | if (start_ch > end_ch) | ||
| 2797 | return REG_ERANGE; | ||
| 2798 | /* Build the table for single byte characters. */ | ||
| 2799 | for (ch = 0; ch < SBC_MAX; ++ch) | ||
| 2800 | if (start_ch <= ch && ch <= end_ch) | ||
| 2801 | bitset_set (sbcset, ch); | ||
| 2802 | } | ||
| 2803 | # endif /* not RE_ENABLE_I18N */ | ||
| 2804 | return REG_NOERROR; | 2736 | return REG_NOERROR; |
| 2805 | } | 2737 | } |
| 2806 | #endif /* not _LIBC */ | 2738 | #endif /* not _LIBC */ |
| 2807 | 2739 | ||
| 2808 | #ifndef _LIBC | 2740 | #ifndef _LIBC |
| 2809 | /* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. | 2741 | /* Helper function for parse_bracket_exp only used in case of NOT _LIBC. |
| 2810 | Build the collating element which is represented by NAME. | 2742 | Build the collating element which is represented by NAME. |
| 2811 | The result are written to MBCSET and SBCSET. | 2743 | The result are written to MBCSET and SBCSET. |
| 2812 | COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a | 2744 | COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a |
| 2813 | pointer argument since we may update it. */ | 2745 | pointer argument since we may update it. */ |
| 2814 | 2746 | ||
| 2815 | static reg_errcode_t | 2747 | static reg_errcode_t |
| 2816 | # ifdef RE_ENABLE_I18N | ||
| 2817 | build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, | 2748 | build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, |
| 2818 | Idx *coll_sym_alloc, const unsigned char *name) | 2749 | Idx *coll_sym_alloc, const unsigned char *name, |
| 2819 | # else /* not RE_ENABLE_I18N */ | 2750 | uint_fast32_t nrules, int_fast32_t table_size, |
| 2820 | build_collating_symbol (bitset_t sbcset, const unsigned char *name) | 2751 | const void *symb_table, const unsigned char *extra) |
| 2821 | # endif /* not RE_ENABLE_I18N */ | ||
| 2822 | { | 2752 | { |
| 2823 | size_t name_len = strlen ((const char *) name); | 2753 | size_t name_len = strlen ((const char *) name); |
| 2824 | if (__glibc_unlikely (name_len != 1)) | 2754 | if (__glibc_unlikely (name_len != 1)) |
| @@ -2831,271 +2761,280 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name) | |||
| 2831 | } | 2761 | } |
| 2832 | #endif /* not _LIBC */ | 2762 | #endif /* not _LIBC */ |
| 2833 | 2763 | ||
| 2834 | /* This function parse bracket expression like "[abc]", "[a-c]", | ||
| 2835 | "[[.a-a.]]" etc. */ | ||
| 2836 | |||
| 2837 | static bin_tree_t * | ||
| 2838 | parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | ||
| 2839 | reg_syntax_t syntax, reg_errcode_t *err) | ||
| 2840 | { | ||
| 2841 | #ifdef _LIBC | 2764 | #ifdef _LIBC |
| 2842 | const unsigned char *collseqmb; | 2765 | /* Local function for parse_bracket_exp used in _LIBC environment. |
| 2843 | const char *collseqwc; | 2766 | Seek the collating symbol entry corresponding to NAME. |
| 2844 | uint32_t nrules; | 2767 | Return the index of the symbol in the SYMB_TABLE, |
| 2845 | int32_t table_size; | 2768 | or -1 if not found. */ |
| 2846 | const int32_t *symb_table; | 2769 | |
| 2847 | const unsigned char *extra; | 2770 | static __always_inline int32_t |
| 2848 | 2771 | seek_collating_symbol_entry (const unsigned char *name, size_t name_len, | |
| 2849 | /* Local function for parse_bracket_exp used in _LIBC environment. | 2772 | const int32_t *symb_table, |
| 2850 | Seek the collating symbol entry corresponding to NAME. | 2773 | int_fast32_t table_size, |
| 2851 | Return the index of the symbol in the SYMB_TABLE, | 2774 | const unsigned char *extra) |
| 2852 | or -1 if not found. */ | 2775 | { |
| 2853 | 2776 | int_fast32_t elem; | |
| 2854 | auto inline int32_t | ||
| 2855 | __attribute__ ((always_inline)) | ||
| 2856 | seek_collating_symbol_entry (const unsigned char *name, size_t name_len) | ||
| 2857 | { | ||
| 2858 | int32_t elem; | ||
| 2859 | |||
| 2860 | for (elem = 0; elem < table_size; elem++) | ||
| 2861 | if (symb_table[2 * elem] != 0) | ||
| 2862 | { | ||
| 2863 | int32_t idx = symb_table[2 * elem + 1]; | ||
| 2864 | /* Skip the name of collating element name. */ | ||
| 2865 | idx += 1 + extra[idx]; | ||
| 2866 | if (/* Compare the length of the name. */ | ||
| 2867 | name_len == extra[idx] | ||
| 2868 | /* Compare the name. */ | ||
| 2869 | && memcmp (name, &extra[idx + 1], name_len) == 0) | ||
| 2870 | /* Yep, this is the entry. */ | ||
| 2871 | return elem; | ||
| 2872 | } | ||
| 2873 | return -1; | ||
| 2874 | } | ||
| 2875 | 2777 | ||
| 2876 | /* Local function for parse_bracket_exp used in _LIBC environment. | 2778 | for (elem = 0; elem < table_size; elem++) |
| 2877 | Look up the collation sequence value of BR_ELEM. | 2779 | if (symb_table[2 * elem] != 0) |
| 2878 | Return the value if succeeded, UINT_MAX otherwise. */ | 2780 | { |
| 2781 | int32_t idx = symb_table[2 * elem + 1]; | ||
| 2782 | /* Skip the name of collating element name. */ | ||
| 2783 | idx += 1 + extra[idx]; | ||
| 2784 | if (/* Compare the length of the name. */ | ||
| 2785 | name_len == extra[idx] | ||
| 2786 | /* Compare the name. */ | ||
| 2787 | && memcmp (name, &extra[idx + 1], name_len) == 0) | ||
| 2788 | /* Yep, this is the entry. */ | ||
| 2789 | return elem; | ||
| 2790 | } | ||
| 2791 | return -1; | ||
| 2792 | } | ||
| 2879 | 2793 | ||
| 2880 | auto inline unsigned int | 2794 | /* Local function for parse_bracket_exp used in _LIBC environment. |
| 2881 | __attribute__ ((always_inline)) | 2795 | Look up the collation sequence value of BR_ELEM. |
| 2882 | lookup_collation_sequence_value (bracket_elem_t *br_elem) | 2796 | Return the value if succeeded, UINT_MAX otherwise. */ |
| 2797 | |||
| 2798 | static __always_inline unsigned int | ||
| 2799 | lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules, | ||
| 2800 | const unsigned char *collseqmb, | ||
| 2801 | const char *collseqwc, | ||
| 2802 | int_fast32_t table_size, | ||
| 2803 | const int32_t *symb_table, | ||
| 2804 | const unsigned char *extra) | ||
| 2805 | { | ||
| 2806 | if (br_elem->type == SB_CHAR) | ||
| 2883 | { | 2807 | { |
| 2884 | if (br_elem->type == SB_CHAR) | 2808 | /* if (MB_CUR_MAX == 1) */ |
| 2885 | { | 2809 | if (nrules == 0) |
| 2886 | /* | 2810 | return collseqmb[br_elem->opr.ch]; |
| 2887 | if (MB_CUR_MAX == 1) | 2811 | else |
| 2888 | */ | ||
| 2889 | if (nrules == 0) | ||
| 2890 | return collseqmb[br_elem->opr.ch]; | ||
| 2891 | else | ||
| 2892 | { | ||
| 2893 | wint_t wc = __btowc (br_elem->opr.ch); | ||
| 2894 | return __collseq_table_lookup (collseqwc, wc); | ||
| 2895 | } | ||
| 2896 | } | ||
| 2897 | else if (br_elem->type == MB_CHAR) | ||
| 2898 | { | 2812 | { |
| 2899 | if (nrules != 0) | 2813 | wint_t wc = __btowc (br_elem->opr.ch); |
| 2900 | return __collseq_table_lookup (collseqwc, br_elem->opr.wch); | 2814 | return __collseq_table_lookup (collseqwc, wc); |
| 2901 | } | 2815 | } |
| 2902 | else if (br_elem->type == COLL_SYM) | 2816 | } |
| 2817 | else if (br_elem->type == MB_CHAR) | ||
| 2818 | { | ||
| 2819 | if (nrules != 0) | ||
| 2820 | return __collseq_table_lookup (collseqwc, br_elem->opr.wch); | ||
| 2821 | } | ||
| 2822 | else if (br_elem->type == COLL_SYM) | ||
| 2823 | { | ||
| 2824 | size_t sym_name_len = strlen ((char *) br_elem->opr.name); | ||
| 2825 | if (nrules != 0) | ||
| 2903 | { | 2826 | { |
| 2904 | size_t sym_name_len = strlen ((char *) br_elem->opr.name); | 2827 | int32_t elem, idx; |
| 2905 | if (nrules != 0) | 2828 | elem = seek_collating_symbol_entry (br_elem->opr.name, |
| 2829 | sym_name_len, | ||
| 2830 | symb_table, table_size, | ||
| 2831 | extra); | ||
| 2832 | if (elem != -1) | ||
| 2906 | { | 2833 | { |
| 2907 | int32_t elem, idx; | 2834 | /* We found the entry. */ |
| 2908 | elem = seek_collating_symbol_entry (br_elem->opr.name, | 2835 | idx = symb_table[2 * elem + 1]; |
| 2909 | sym_name_len); | 2836 | /* Skip the name of collating element name. */ |
| 2910 | if (elem != -1) | 2837 | idx += 1 + extra[idx]; |
| 2911 | { | 2838 | /* Skip the byte sequence of the collating element. */ |
| 2912 | /* We found the entry. */ | 2839 | idx += 1 + extra[idx]; |
| 2913 | idx = symb_table[2 * elem + 1]; | 2840 | /* Adjust for the alignment. */ |
| 2914 | /* Skip the name of collating element name. */ | 2841 | idx = (idx + 3) & ~3; |
| 2915 | idx += 1 + extra[idx]; | 2842 | /* Skip the multibyte collation sequence value. */ |
| 2916 | /* Skip the byte sequence of the collating element. */ | 2843 | idx += sizeof (unsigned int); |
| 2917 | idx += 1 + extra[idx]; | 2844 | /* Skip the wide char sequence of the collating element. */ |
| 2918 | /* Adjust for the alignment. */ | 2845 | idx += sizeof (unsigned int) * |
| 2919 | idx = (idx + 3) & ~3; | 2846 | (1 + *(unsigned int *) (extra + idx)); |
| 2920 | /* Skip the multibyte collation sequence value. */ | 2847 | /* Return the collation sequence value. */ |
| 2921 | idx += sizeof (unsigned int); | 2848 | return *(unsigned int *) (extra + idx); |
| 2922 | /* Skip the wide char sequence of the collating element. */ | ||
| 2923 | idx += sizeof (unsigned int) * | ||
| 2924 | (1 + *(unsigned int *) (extra + idx)); | ||
| 2925 | /* Return the collation sequence value. */ | ||
| 2926 | return *(unsigned int *) (extra + idx); | ||
| 2927 | } | ||
| 2928 | else if (sym_name_len == 1) | ||
| 2929 | { | ||
| 2930 | /* No valid character. Match it as a single byte | ||
| 2931 | character. */ | ||
| 2932 | return collseqmb[br_elem->opr.name[0]]; | ||
| 2933 | } | ||
| 2934 | } | 2849 | } |
| 2935 | else if (sym_name_len == 1) | 2850 | else if (sym_name_len == 1) |
| 2936 | return collseqmb[br_elem->opr.name[0]]; | 2851 | { |
| 2852 | /* No valid character. Match it as a single byte | ||
| 2853 | character. */ | ||
| 2854 | return collseqmb[br_elem->opr.name[0]]; | ||
| 2855 | } | ||
| 2937 | } | 2856 | } |
| 2938 | return UINT_MAX; | 2857 | else if (sym_name_len == 1) |
| 2858 | return collseqmb[br_elem->opr.name[0]]; | ||
| 2939 | } | 2859 | } |
| 2860 | return UINT_MAX; | ||
| 2861 | } | ||
| 2940 | 2862 | ||
| 2941 | /* Local function for parse_bracket_exp used in _LIBC environment. | 2863 | /* Local function for parse_bracket_exp used in _LIBC environment. |
| 2942 | Build the range expression which starts from START_ELEM, and ends | 2864 | Build the range expression which starts from START_ELEM, and ends |
| 2943 | at END_ELEM. The result are written to MBCSET and SBCSET. | 2865 | at END_ELEM. The result are written to MBCSET and SBCSET. |
| 2944 | RANGE_ALLOC is the allocated size of mbcset->range_starts, and | 2866 | RANGE_ALLOC is the allocated size of mbcset->range_starts, and |
| 2945 | mbcset->range_ends, is a pointer argument since we may | 2867 | mbcset->range_ends, is a pointer argument since we may |
| 2946 | update it. */ | 2868 | update it. */ |
| 2869 | |||
| 2870 | static __always_inline reg_errcode_t | ||
| 2871 | build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, | ||
| 2872 | bracket_elem_t *start_elem, bracket_elem_t *end_elem, | ||
| 2873 | re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules, | ||
| 2874 | const unsigned char *collseqmb, const char *collseqwc, | ||
| 2875 | int_fast32_t table_size, const int32_t *symb_table, | ||
| 2876 | const unsigned char *extra) | ||
| 2877 | { | ||
| 2878 | unsigned int ch; | ||
| 2879 | uint32_t start_collseq; | ||
| 2880 | uint32_t end_collseq; | ||
| 2947 | 2881 | ||
| 2948 | auto inline reg_errcode_t | 2882 | /* Equivalence Classes and Character Classes can't be a range |
| 2949 | __attribute__ ((always_inline)) | 2883 | start/end. */ |
| 2950 | build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, | 2884 | if (__glibc_unlikely (start_elem->type == EQUIV_CLASS |
| 2951 | bracket_elem_t *start_elem, bracket_elem_t *end_elem) | 2885 | || start_elem->type == CHAR_CLASS |
| 2952 | { | 2886 | || end_elem->type == EQUIV_CLASS |
| 2953 | unsigned int ch; | 2887 | || end_elem->type == CHAR_CLASS)) |
| 2954 | uint32_t start_collseq; | 2888 | return REG_ERANGE; |
| 2955 | uint32_t end_collseq; | ||
| 2956 | |||
| 2957 | /* Equivalence Classes and Character Classes can't be a range | ||
| 2958 | start/end. */ | ||
| 2959 | if (__glibc_unlikely (start_elem->type == EQUIV_CLASS | ||
| 2960 | || start_elem->type == CHAR_CLASS | ||
| 2961 | || end_elem->type == EQUIV_CLASS | ||
| 2962 | || end_elem->type == CHAR_CLASS)) | ||
| 2963 | return REG_ERANGE; | ||
| 2964 | 2889 | ||
| 2965 | /* FIXME: Implement rational ranges here, too. */ | 2890 | /* FIXME: Implement rational ranges here, too. */ |
| 2966 | start_collseq = lookup_collation_sequence_value (start_elem); | 2891 | start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc, |
| 2967 | end_collseq = lookup_collation_sequence_value (end_elem); | 2892 | table_size, symb_table, extra); |
| 2968 | /* Check start/end collation sequence values. */ | 2893 | end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc, |
| 2969 | if (__glibc_unlikely (start_collseq == UINT_MAX | 2894 | table_size, symb_table, extra); |
| 2970 | || end_collseq == UINT_MAX)) | 2895 | /* Check start/end collation sequence values. */ |
| 2971 | return REG_ECOLLATE; | 2896 | if (__glibc_unlikely (start_collseq == UINT_MAX |
| 2972 | if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) | 2897 | || end_collseq == UINT_MAX)) |
| 2973 | && start_collseq > end_collseq)) | 2898 | return REG_ECOLLATE; |
| 2974 | return REG_ERANGE; | 2899 | if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) |
| 2900 | && start_collseq > end_collseq)) | ||
| 2901 | return REG_ERANGE; | ||
| 2975 | 2902 | ||
| 2976 | /* Got valid collation sequence values, add them as a new entry. | 2903 | /* Got valid collation sequence values, add them as a new entry. |
| 2977 | However, if we have no collation elements, and the character set | 2904 | However, if we have no collation elements, and the character set |
| 2978 | is single byte, the single byte character set that we | 2905 | is single byte, the single byte character set that we |
| 2979 | build below suffices. */ | 2906 | build below suffices. */ |
| 2980 | if (nrules > 0 || dfa->mb_cur_max > 1) | 2907 | if (nrules > 0 || dfa->mb_cur_max > 1) |
| 2908 | { | ||
| 2909 | /* Check the space of the arrays. */ | ||
| 2910 | if (__glibc_unlikely (*range_alloc == mbcset->nranges)) | ||
| 2981 | { | 2911 | { |
| 2982 | /* Check the space of the arrays. */ | 2912 | /* There is not enough space, need realloc. */ |
| 2983 | if (__glibc_unlikely (*range_alloc == mbcset->nranges)) | 2913 | uint32_t *new_array_start; |
| 2984 | { | 2914 | uint32_t *new_array_end; |
| 2985 | /* There is not enough space, need realloc. */ | 2915 | int new_nranges; |
| 2986 | uint32_t *new_array_start; | ||
| 2987 | uint32_t *new_array_end; | ||
| 2988 | Idx new_nranges; | ||
| 2989 | |||
| 2990 | /* +1 in case of mbcset->nranges is 0. */ | ||
| 2991 | new_nranges = 2 * mbcset->nranges + 1; | ||
| 2992 | new_array_start = re_realloc (mbcset->range_starts, uint32_t, | ||
| 2993 | new_nranges); | ||
| 2994 | new_array_end = re_realloc (mbcset->range_ends, uint32_t, | ||
| 2995 | new_nranges); | ||
| 2996 | |||
| 2997 | if (__glibc_unlikely (new_array_start == NULL | ||
| 2998 | || new_array_end == NULL)) | ||
| 2999 | return REG_ESPACE; | ||
| 3000 | 2916 | ||
| 3001 | mbcset->range_starts = new_array_start; | 2917 | /* +1 in case of mbcset->nranges is 0. */ |
| 3002 | mbcset->range_ends = new_array_end; | 2918 | new_nranges = 2 * mbcset->nranges + 1; |
| 3003 | *range_alloc = new_nranges; | 2919 | new_array_start = re_realloc (mbcset->range_starts, uint32_t, |
| 3004 | } | 2920 | new_nranges); |
| 2921 | new_array_end = re_realloc (mbcset->range_ends, uint32_t, | ||
| 2922 | new_nranges); | ||
| 3005 | 2923 | ||
| 3006 | mbcset->range_starts[mbcset->nranges] = start_collseq; | 2924 | if (__glibc_unlikely (new_array_start == NULL |
| 3007 | mbcset->range_ends[mbcset->nranges++] = end_collseq; | 2925 | || new_array_end == NULL)) |
| 3008 | } | 2926 | return REG_ESPACE; |
| 3009 | 2927 | ||
| 3010 | /* Build the table for single byte characters. */ | 2928 | mbcset->range_starts = new_array_start; |
| 3011 | for (ch = 0; ch < SBC_MAX; ch++) | 2929 | mbcset->range_ends = new_array_end; |
| 3012 | { | 2930 | *range_alloc = new_nranges; |
| 3013 | uint32_t ch_collseq; | ||
| 3014 | /* | ||
| 3015 | if (MB_CUR_MAX == 1) | ||
| 3016 | */ | ||
| 3017 | if (nrules == 0) | ||
| 3018 | ch_collseq = collseqmb[ch]; | ||
| 3019 | else | ||
| 3020 | ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); | ||
| 3021 | if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) | ||
| 3022 | bitset_set (sbcset, ch); | ||
| 3023 | } | 2931 | } |
| 3024 | return REG_NOERROR; | 2932 | |
| 2933 | mbcset->range_starts[mbcset->nranges] = start_collseq; | ||
| 2934 | mbcset->range_ends[mbcset->nranges++] = end_collseq; | ||
| 3025 | } | 2935 | } |
| 3026 | 2936 | ||
| 3027 | /* Local function for parse_bracket_exp used in _LIBC environment. | 2937 | /* Build the table for single byte characters. */ |
| 3028 | Build the collating element which is represented by NAME. | 2938 | for (ch = 0; ch < SBC_MAX; ch++) |
| 3029 | The result are written to MBCSET and SBCSET. | 2939 | { |
| 3030 | COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a | 2940 | uint32_t ch_collseq; |
| 3031 | pointer argument since we may update it. */ | 2941 | /* if (MB_CUR_MAX == 1) */ |
| 2942 | if (nrules == 0) | ||
| 2943 | ch_collseq = collseqmb[ch]; | ||
| 2944 | else | ||
| 2945 | ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); | ||
| 2946 | if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) | ||
| 2947 | bitset_set (sbcset, ch); | ||
| 2948 | } | ||
| 2949 | return REG_NOERROR; | ||
| 2950 | } | ||
| 3032 | 2951 | ||
| 3033 | auto inline reg_errcode_t | 2952 | /* Local function for parse_bracket_exp used in _LIBC environment. |
| 3034 | __attribute__ ((always_inline)) | 2953 | Build the collating element which is represented by NAME. |
| 3035 | build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, | 2954 | The result are written to MBCSET and SBCSET. |
| 3036 | Idx *coll_sym_alloc, const unsigned char *name) | 2955 | COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a |
| 2956 | pointer argument since we may update it. */ | ||
| 2957 | |||
| 2958 | static __always_inline reg_errcode_t | ||
| 2959 | build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, | ||
| 2960 | Idx *coll_sym_alloc, const unsigned char *name, | ||
| 2961 | uint_fast32_t nrules, int_fast32_t table_size, | ||
| 2962 | const int32_t *symb_table, const unsigned char *extra) | ||
| 2963 | { | ||
| 2964 | int32_t elem, idx; | ||
| 2965 | size_t name_len = strlen ((const char *) name); | ||
| 2966 | if (nrules != 0) | ||
| 3037 | { | 2967 | { |
| 3038 | int32_t elem, idx; | 2968 | elem = seek_collating_symbol_entry (name, name_len, symb_table, |
| 3039 | size_t name_len = strlen ((const char *) name); | 2969 | table_size, extra); |
| 3040 | if (nrules != 0) | 2970 | if (elem != -1) |
| 3041 | { | 2971 | { |
| 3042 | elem = seek_collating_symbol_entry (name, name_len); | 2972 | /* We found the entry. */ |
| 3043 | if (elem != -1) | 2973 | idx = symb_table[2 * elem + 1]; |
| 3044 | { | 2974 | /* Skip the name of collating element name. */ |
| 3045 | /* We found the entry. */ | 2975 | idx += 1 + extra[idx]; |
| 3046 | idx = symb_table[2 * elem + 1]; | 2976 | } |
| 3047 | /* Skip the name of collating element name. */ | 2977 | else if (name_len == 1) |
| 3048 | idx += 1 + extra[idx]; | 2978 | { |
| 3049 | } | 2979 | /* No valid character, treat it as a normal |
| 3050 | else if (name_len == 1) | 2980 | character. */ |
| 3051 | { | 2981 | bitset_set (sbcset, name[0]); |
| 3052 | /* No valid character, treat it as a normal | ||
| 3053 | character. */ | ||
| 3054 | bitset_set (sbcset, name[0]); | ||
| 3055 | return REG_NOERROR; | ||
| 3056 | } | ||
| 3057 | else | ||
| 3058 | return REG_ECOLLATE; | ||
| 3059 | |||
| 3060 | /* Got valid collation sequence, add it as a new entry. */ | ||
| 3061 | /* Check the space of the arrays. */ | ||
| 3062 | if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms)) | ||
| 3063 | { | ||
| 3064 | /* Not enough, realloc it. */ | ||
| 3065 | /* +1 in case of mbcset->ncoll_syms is 0. */ | ||
| 3066 | Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; | ||
| 3067 | /* Use realloc since mbcset->coll_syms is NULL | ||
| 3068 | if *alloc == 0. */ | ||
| 3069 | int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, | ||
| 3070 | new_coll_sym_alloc); | ||
| 3071 | if (__glibc_unlikely (new_coll_syms == NULL)) | ||
| 3072 | return REG_ESPACE; | ||
| 3073 | mbcset->coll_syms = new_coll_syms; | ||
| 3074 | *coll_sym_alloc = new_coll_sym_alloc; | ||
| 3075 | } | ||
| 3076 | mbcset->coll_syms[mbcset->ncoll_syms++] = idx; | ||
| 3077 | return REG_NOERROR; | 2982 | return REG_NOERROR; |
| 3078 | } | 2983 | } |
| 3079 | else | 2984 | else |
| 2985 | return REG_ECOLLATE; | ||
| 2986 | |||
| 2987 | /* Got valid collation sequence, add it as a new entry. */ | ||
| 2988 | /* Check the space of the arrays. */ | ||
| 2989 | if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms)) | ||
| 3080 | { | 2990 | { |
| 3081 | if (__glibc_unlikely (name_len != 1)) | 2991 | /* Not enough, realloc it. */ |
| 3082 | return REG_ECOLLATE; | 2992 | /* +1 in case of mbcset->ncoll_syms is 0. */ |
| 3083 | else | 2993 | int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; |
| 3084 | { | 2994 | /* Use realloc since mbcset->coll_syms is NULL |
| 3085 | bitset_set (sbcset, name[0]); | 2995 | if *alloc == 0. */ |
| 3086 | return REG_NOERROR; | 2996 | int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, |
| 3087 | } | 2997 | new_coll_sym_alloc); |
| 2998 | if (__glibc_unlikely (new_coll_syms == NULL)) | ||
| 2999 | return REG_ESPACE; | ||
| 3000 | mbcset->coll_syms = new_coll_syms; | ||
| 3001 | *coll_sym_alloc = new_coll_sym_alloc; | ||
| 3088 | } | 3002 | } |
| 3003 | mbcset->coll_syms[mbcset->ncoll_syms++] = idx; | ||
| 3004 | return REG_NOERROR; | ||
| 3089 | } | 3005 | } |
| 3090 | #endif | 3006 | else |
| 3007 | { | ||
| 3008 | if (__glibc_unlikely (name_len != 1)) | ||
| 3009 | return REG_ECOLLATE; | ||
| 3010 | else | ||
| 3011 | { | ||
| 3012 | bitset_set (sbcset, name[0]); | ||
| 3013 | return REG_NOERROR; | ||
| 3014 | } | ||
| 3015 | } | ||
| 3016 | } | ||
| 3017 | #endif /* _LIBC */ | ||
| 3018 | |||
| 3019 | /* This function parse bracket expression like "[abc]", "[a-c]", | ||
| 3020 | "[[.a-a.]]" etc. */ | ||
| 3021 | |||
| 3022 | static bin_tree_t * | ||
| 3023 | parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | ||
| 3024 | reg_syntax_t syntax, reg_errcode_t *err) | ||
| 3025 | { | ||
| 3026 | const unsigned char *collseqmb = NULL; | ||
| 3027 | const char *collseqwc = NULL; | ||
| 3028 | uint_fast32_t nrules = 0; | ||
| 3029 | int_fast32_t table_size = 0; | ||
| 3030 | const void *symb_table = NULL; | ||
| 3031 | const unsigned char *extra = NULL; | ||
| 3091 | 3032 | ||
| 3092 | re_token_t br_token; | 3033 | re_token_t br_token; |
| 3093 | re_bitset_ptr_t sbcset; | 3034 | re_bitset_ptr_t sbcset; |
| 3094 | #ifdef RE_ENABLE_I18N | ||
| 3095 | re_charset_t *mbcset; | 3035 | re_charset_t *mbcset; |
| 3096 | Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; | 3036 | Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; |
| 3097 | Idx equiv_class_alloc = 0, char_class_alloc = 0; | 3037 | Idx equiv_class_alloc = 0, char_class_alloc = 0; |
| 3098 | #endif /* not RE_ENABLE_I18N */ | ||
| 3099 | bool non_match = false; | 3038 | bool non_match = false; |
| 3100 | bin_tree_t *work_tree; | 3039 | bin_tree_t *work_tree; |
| 3101 | int token_len; | 3040 | int token_len; |
| @@ -3111,26 +3050,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3111 | */ | 3050 | */ |
| 3112 | collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); | 3051 | collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
| 3113 | table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); | 3052 | table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); |
| 3114 | symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, | 3053 | symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB); |
| 3115 | _NL_COLLATE_SYMB_TABLEMB); | ||
| 3116 | extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, | 3054 | extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, |
| 3117 | _NL_COLLATE_SYMB_EXTRAMB); | 3055 | _NL_COLLATE_SYMB_EXTRAMB); |
| 3118 | } | 3056 | } |
| 3119 | #endif | 3057 | #endif |
| 3120 | sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); | 3058 | sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); |
| 3121 | #ifdef RE_ENABLE_I18N | ||
| 3122 | mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); | 3059 | mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); |
| 3123 | #endif /* RE_ENABLE_I18N */ | ||
| 3124 | #ifdef RE_ENABLE_I18N | ||
| 3125 | if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) | 3060 | if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) |
| 3126 | #else | ||
| 3127 | if (__glibc_unlikely (sbcset == NULL)) | ||
| 3128 | #endif /* RE_ENABLE_I18N */ | ||
| 3129 | { | 3061 | { |
| 3130 | re_free (sbcset); | 3062 | re_free (sbcset); |
| 3131 | #ifdef RE_ENABLE_I18N | ||
| 3132 | re_free (mbcset); | 3063 | re_free (mbcset); |
| 3133 | #endif | ||
| 3134 | *err = REG_ESPACE; | 3064 | *err = REG_ESPACE; |
| 3135 | return NULL; | 3065 | return NULL; |
| 3136 | } | 3066 | } |
| @@ -3143,9 +3073,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3143 | } | 3073 | } |
| 3144 | if (token->type == OP_NON_MATCH_LIST) | 3074 | if (token->type == OP_NON_MATCH_LIST) |
| 3145 | { | 3075 | { |
| 3146 | #ifdef RE_ENABLE_I18N | ||
| 3147 | mbcset->non_match = 1; | 3076 | mbcset->non_match = 1; |
| 3148 | #endif /* not RE_ENABLE_I18N */ | ||
| 3149 | non_match = true; | 3077 | non_match = true; |
| 3150 | if (syntax & RE_HAT_LISTS_NOT_NEWLINE) | 3078 | if (syntax & RE_HAT_LISTS_NOT_NEWLINE) |
| 3151 | bitset_set (sbcset, '\n'); | 3079 | bitset_set (sbcset, '\n'); |
| @@ -3228,18 +3156,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3228 | 3156 | ||
| 3229 | token_len = peek_token_bracket (token, regexp, syntax); | 3157 | token_len = peek_token_bracket (token, regexp, syntax); |
| 3230 | 3158 | ||
| 3231 | #ifdef _LIBC | ||
| 3232 | *err = build_range_exp (sbcset, mbcset, &range_alloc, | 3159 | *err = build_range_exp (sbcset, mbcset, &range_alloc, |
| 3233 | &start_elem, &end_elem); | 3160 | &start_elem, &end_elem, |
| 3234 | #else | 3161 | dfa, syntax, nrules, collseqmb, collseqwc, |
| 3235 | # ifdef RE_ENABLE_I18N | 3162 | table_size, symb_table, extra); |
| 3236 | *err = build_range_exp (syntax, sbcset, | ||
| 3237 | dfa->mb_cur_max > 1 ? mbcset : NULL, | ||
| 3238 | &range_alloc, &start_elem, &end_elem); | ||
| 3239 | # else | ||
| 3240 | *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem); | ||
| 3241 | # endif | ||
| 3242 | #endif /* RE_ENABLE_I18N */ | ||
| 3243 | if (__glibc_unlikely (*err != REG_NOERROR)) | 3163 | if (__glibc_unlikely (*err != REG_NOERROR)) |
| 3244 | goto parse_bracket_exp_free_return; | 3164 | goto parse_bracket_exp_free_return; |
| 3245 | } | 3165 | } |
| @@ -3250,7 +3170,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3250 | case SB_CHAR: | 3170 | case SB_CHAR: |
| 3251 | bitset_set (sbcset, start_elem.opr.ch); | 3171 | bitset_set (sbcset, start_elem.opr.ch); |
| 3252 | break; | 3172 | break; |
| 3253 | #ifdef RE_ENABLE_I18N | ||
| 3254 | case MB_CHAR: | 3173 | case MB_CHAR: |
| 3255 | /* Check whether the array has enough space. */ | 3174 | /* Check whether the array has enough space. */ |
| 3256 | if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) | 3175 | if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) |
| @@ -3268,30 +3187,24 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3268 | } | 3187 | } |
| 3269 | mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; | 3188 | mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; |
| 3270 | break; | 3189 | break; |
| 3271 | #endif /* RE_ENABLE_I18N */ | ||
| 3272 | case EQUIV_CLASS: | 3190 | case EQUIV_CLASS: |
| 3273 | *err = build_equiv_class (sbcset, | 3191 | *err = build_equiv_class (sbcset, |
| 3274 | #ifdef RE_ENABLE_I18N | ||
| 3275 | mbcset, &equiv_class_alloc, | 3192 | mbcset, &equiv_class_alloc, |
| 3276 | #endif /* RE_ENABLE_I18N */ | ||
| 3277 | start_elem.opr.name); | 3193 | start_elem.opr.name); |
| 3278 | if (__glibc_unlikely (*err != REG_NOERROR)) | 3194 | if (__glibc_unlikely (*err != REG_NOERROR)) |
| 3279 | goto parse_bracket_exp_free_return; | 3195 | goto parse_bracket_exp_free_return; |
| 3280 | break; | 3196 | break; |
| 3281 | case COLL_SYM: | 3197 | case COLL_SYM: |
| 3282 | *err = build_collating_symbol (sbcset, | 3198 | *err = build_collating_symbol (sbcset, |
| 3283 | #ifdef RE_ENABLE_I18N | ||
| 3284 | mbcset, &coll_sym_alloc, | 3199 | mbcset, &coll_sym_alloc, |
| 3285 | #endif /* RE_ENABLE_I18N */ | 3200 | start_elem.opr.name, |
| 3286 | start_elem.opr.name); | 3201 | nrules, table_size, symb_table, extra); |
| 3287 | if (__glibc_unlikely (*err != REG_NOERROR)) | 3202 | if (__glibc_unlikely (*err != REG_NOERROR)) |
| 3288 | goto parse_bracket_exp_free_return; | 3203 | goto parse_bracket_exp_free_return; |
| 3289 | break; | 3204 | break; |
| 3290 | case CHAR_CLASS: | 3205 | case CHAR_CLASS: |
| 3291 | *err = build_charclass (regexp->trans, sbcset, | 3206 | *err = build_charclass (regexp->trans, sbcset, |
| 3292 | #ifdef RE_ENABLE_I18N | ||
| 3293 | mbcset, &char_class_alloc, | 3207 | mbcset, &char_class_alloc, |
| 3294 | #endif /* RE_ENABLE_I18N */ | ||
| 3295 | (const char *) start_elem.opr.name, | 3208 | (const char *) start_elem.opr.name, |
| 3296 | syntax); | 3209 | syntax); |
| 3297 | if (__glibc_unlikely (*err != REG_NOERROR)) | 3210 | if (__glibc_unlikely (*err != REG_NOERROR)) |
| @@ -3317,7 +3230,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3317 | if (non_match) | 3230 | if (non_match) |
| 3318 | bitset_not (sbcset); | 3231 | bitset_not (sbcset); |
| 3319 | 3232 | ||
| 3320 | #ifdef RE_ENABLE_I18N | ||
| 3321 | /* Ensure only single byte characters are set. */ | 3233 | /* Ensure only single byte characters are set. */ |
| 3322 | if (dfa->mb_cur_max > 1) | 3234 | if (dfa->mb_cur_max > 1) |
| 3323 | bitset_mask (sbcset, dfa->sb_char); | 3235 | bitset_mask (sbcset, dfa->sb_char); |
| @@ -3361,11 +3273,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3361 | } | 3273 | } |
| 3362 | } | 3274 | } |
| 3363 | else | 3275 | else |
| 3364 | #endif /* not RE_ENABLE_I18N */ | ||
| 3365 | { | 3276 | { |
| 3366 | #ifdef RE_ENABLE_I18N | ||
| 3367 | free_charset (mbcset); | 3277 | free_charset (mbcset); |
| 3368 | #endif | ||
| 3369 | /* Build a tree for simple bracket. */ | 3278 | /* Build a tree for simple bracket. */ |
| 3370 | br_token.type = SIMPLE_BRACKET; | 3279 | br_token.type = SIMPLE_BRACKET; |
| 3371 | br_token.opr.sbcset = sbcset; | 3280 | br_token.opr.sbcset = sbcset; |
| @@ -3379,9 +3288,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
| 3379 | *err = REG_ESPACE; | 3288 | *err = REG_ESPACE; |
| 3380 | parse_bracket_exp_free_return: | 3289 | parse_bracket_exp_free_return: |
| 3381 | re_free (sbcset); | 3290 | re_free (sbcset); |
| 3382 | #ifdef RE_ENABLE_I18N | ||
| 3383 | free_charset (mbcset); | 3291 | free_charset (mbcset); |
| 3384 | #endif /* RE_ENABLE_I18N */ | ||
| 3385 | return NULL; | 3292 | return NULL; |
| 3386 | } | 3293 | } |
| 3387 | 3294 | ||
| @@ -3392,7 +3299,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, | |||
| 3392 | re_token_t *token, int token_len, re_dfa_t *dfa, | 3299 | re_token_t *token, int token_len, re_dfa_t *dfa, |
| 3393 | reg_syntax_t syntax, bool accept_hyphen) | 3300 | reg_syntax_t syntax, bool accept_hyphen) |
| 3394 | { | 3301 | { |
| 3395 | #ifdef RE_ENABLE_I18N | ||
| 3396 | int cur_char_size; | 3302 | int cur_char_size; |
| 3397 | cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); | 3303 | cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); |
| 3398 | if (cur_char_size > 1) | 3304 | if (cur_char_size > 1) |
| @@ -3402,7 +3308,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, | |||
| 3402 | re_string_skip_bytes (regexp, cur_char_size); | 3308 | re_string_skip_bytes (regexp, cur_char_size); |
| 3403 | return REG_NOERROR; | 3309 | return REG_NOERROR; |
| 3404 | } | 3310 | } |
| 3405 | #endif /* RE_ENABLE_I18N */ | ||
| 3406 | re_string_skip_bytes (regexp, token_len); /* Skip a token. */ | 3311 | re_string_skip_bytes (regexp, token_len); /* Skip a token. */ |
| 3407 | if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS | 3312 | if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS |
| 3408 | || token->type == OP_OPEN_EQUIV_CLASS) | 3313 | || token->type == OP_OPEN_EQUIV_CLASS) |
| @@ -3475,12 +3380,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, | |||
| 3475 | is a pointer argument since we may update it. */ | 3380 | is a pointer argument since we may update it. */ |
| 3476 | 3381 | ||
| 3477 | static reg_errcode_t | 3382 | static reg_errcode_t |
| 3478 | #ifdef RE_ENABLE_I18N | ||
| 3479 | build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, | 3383 | build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, |
| 3480 | Idx *equiv_class_alloc, const unsigned char *name) | 3384 | Idx *equiv_class_alloc, const unsigned char *name) |
| 3481 | #else /* not RE_ENABLE_I18N */ | ||
| 3482 | build_equiv_class (bitset_t sbcset, const unsigned char *name) | ||
| 3483 | #endif /* not RE_ENABLE_I18N */ | ||
| 3484 | { | 3385 | { |
| 3485 | #ifdef _LIBC | 3386 | #ifdef _LIBC |
| 3486 | uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | 3387 | uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
| @@ -3560,14 +3461,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) | |||
| 3560 | is a pointer argument since we may update it. */ | 3461 | is a pointer argument since we may update it. */ |
| 3561 | 3462 | ||
| 3562 | static reg_errcode_t | 3463 | static reg_errcode_t |
| 3563 | #ifdef RE_ENABLE_I18N | ||
| 3564 | build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, | 3464 | build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, |
| 3565 | re_charset_t *mbcset, Idx *char_class_alloc, | 3465 | re_charset_t *mbcset, Idx *char_class_alloc, |
| 3566 | const char *class_name, reg_syntax_t syntax) | 3466 | const char *class_name, reg_syntax_t syntax) |
| 3567 | #else /* not RE_ENABLE_I18N */ | ||
| 3568 | build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, | ||
| 3569 | const char *class_name, reg_syntax_t syntax) | ||
| 3570 | #endif /* not RE_ENABLE_I18N */ | ||
| 3571 | { | 3467 | { |
| 3572 | int i; | 3468 | int i; |
| 3573 | const char *name = class_name; | 3469 | const char *name = class_name; |
| @@ -3578,7 +3474,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, | |||
| 3578 | && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) | 3474 | && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) |
| 3579 | name = "alpha"; | 3475 | name = "alpha"; |
| 3580 | 3476 | ||
| 3581 | #ifdef RE_ENABLE_I18N | ||
| 3582 | /* Check the space of the arrays. */ | 3477 | /* Check the space of the arrays. */ |
| 3583 | if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) | 3478 | if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) |
| 3584 | { | 3479 | { |
| @@ -3594,7 +3489,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, | |||
| 3594 | *char_class_alloc = new_char_class_alloc; | 3489 | *char_class_alloc = new_char_class_alloc; |
| 3595 | } | 3490 | } |
| 3596 | mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); | 3491 | mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); |
| 3597 | #endif /* RE_ENABLE_I18N */ | ||
| 3598 | 3492 | ||
| 3599 | #define BUILD_CHARCLASS_LOOP(ctype_func) \ | 3493 | #define BUILD_CHARCLASS_LOOP(ctype_func) \ |
| 3600 | do { \ | 3494 | do { \ |
| @@ -3649,10 +3543,8 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3649 | reg_errcode_t *err) | 3543 | reg_errcode_t *err) |
| 3650 | { | 3544 | { |
| 3651 | re_bitset_ptr_t sbcset; | 3545 | re_bitset_ptr_t sbcset; |
| 3652 | #ifdef RE_ENABLE_I18N | ||
| 3653 | re_charset_t *mbcset; | 3546 | re_charset_t *mbcset; |
| 3654 | Idx alloc = 0; | 3547 | Idx alloc = 0; |
| 3655 | #endif /* not RE_ENABLE_I18N */ | ||
| 3656 | reg_errcode_t ret; | 3548 | reg_errcode_t ret; |
| 3657 | bin_tree_t *tree; | 3549 | bin_tree_t *tree; |
| 3658 | 3550 | ||
| @@ -3662,7 +3554,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3662 | *err = REG_ESPACE; | 3554 | *err = REG_ESPACE; |
| 3663 | return NULL; | 3555 | return NULL; |
| 3664 | } | 3556 | } |
| 3665 | #ifdef RE_ENABLE_I18N | ||
| 3666 | mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); | 3557 | mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); |
| 3667 | if (__glibc_unlikely (mbcset == NULL)) | 3558 | if (__glibc_unlikely (mbcset == NULL)) |
| 3668 | { | 3559 | { |
| @@ -3671,21 +3562,14 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3671 | return NULL; | 3562 | return NULL; |
| 3672 | } | 3563 | } |
| 3673 | mbcset->non_match = non_match; | 3564 | mbcset->non_match = non_match; |
| 3674 | #endif /* RE_ENABLE_I18N */ | ||
| 3675 | 3565 | ||
| 3676 | /* We don't care the syntax in this case. */ | 3566 | /* We don't care the syntax in this case. */ |
| 3677 | ret = build_charclass (trans, sbcset, | 3567 | ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0); |
| 3678 | #ifdef RE_ENABLE_I18N | ||
| 3679 | mbcset, &alloc, | ||
| 3680 | #endif /* RE_ENABLE_I18N */ | ||
| 3681 | class_name, 0); | ||
| 3682 | 3568 | ||
| 3683 | if (__glibc_unlikely (ret != REG_NOERROR)) | 3569 | if (__glibc_unlikely (ret != REG_NOERROR)) |
| 3684 | { | 3570 | { |
| 3685 | re_free (sbcset); | 3571 | re_free (sbcset); |
| 3686 | #ifdef RE_ENABLE_I18N | ||
| 3687 | free_charset (mbcset); | 3572 | free_charset (mbcset); |
| 3688 | #endif /* RE_ENABLE_I18N */ | ||
| 3689 | *err = ret; | 3573 | *err = ret; |
| 3690 | return NULL; | 3574 | return NULL; |
| 3691 | } | 3575 | } |
| @@ -3697,11 +3581,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3697 | if (non_match) | 3581 | if (non_match) |
| 3698 | bitset_not (sbcset); | 3582 | bitset_not (sbcset); |
| 3699 | 3583 | ||
| 3700 | #ifdef RE_ENABLE_I18N | ||
| 3701 | /* Ensure only single byte characters are set. */ | 3584 | /* Ensure only single byte characters are set. */ |
| 3702 | if (dfa->mb_cur_max > 1) | 3585 | if (dfa->mb_cur_max > 1) |
| 3703 | bitset_mask (sbcset, dfa->sb_char); | 3586 | bitset_mask (sbcset, dfa->sb_char); |
| 3704 | #endif | ||
| 3705 | 3587 | ||
| 3706 | /* Build a tree for simple bracket. */ | 3588 | /* Build a tree for simple bracket. */ |
| 3707 | re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset }; | 3589 | re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset }; |
| @@ -3709,7 +3591,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3709 | if (__glibc_unlikely (tree == NULL)) | 3591 | if (__glibc_unlikely (tree == NULL)) |
| 3710 | goto build_word_op_espace; | 3592 | goto build_word_op_espace; |
| 3711 | 3593 | ||
| 3712 | #ifdef RE_ENABLE_I18N | ||
| 3713 | if (dfa->mb_cur_max > 1) | 3594 | if (dfa->mb_cur_max > 1) |
| 3714 | { | 3595 | { |
| 3715 | bin_tree_t *mbc_tree; | 3596 | bin_tree_t *mbc_tree; |
| @@ -3730,15 +3611,10 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, | |||
| 3730 | free_charset (mbcset); | 3611 | free_charset (mbcset); |
| 3731 | return tree; | 3612 | return tree; |
| 3732 | } | 3613 | } |
| 3733 | #else /* not RE_ENABLE_I18N */ | ||
| 3734 | return tree; | ||
| 3735 | #endif /* not RE_ENABLE_I18N */ | ||
| 3736 | 3614 | ||
| 3737 | build_word_op_espace: | 3615 | build_word_op_espace: |
| 3738 | re_free (sbcset); | 3616 | re_free (sbcset); |
| 3739 | #ifdef RE_ENABLE_I18N | ||
| 3740 | free_charset (mbcset); | 3617 | free_charset (mbcset); |
| 3741 | #endif /* RE_ENABLE_I18N */ | ||
| 3742 | *err = REG_ESPACE; | 3618 | *err = REG_ESPACE; |
| 3743 | return NULL; | 3619 | return NULL; |
| 3744 | } | 3620 | } |
| @@ -3771,21 +3647,19 @@ fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) | |||
| 3771 | return num; | 3647 | return num; |
| 3772 | } | 3648 | } |
| 3773 | 3649 | ||
| 3774 | #ifdef RE_ENABLE_I18N | ||
| 3775 | static void | 3650 | static void |
| 3776 | free_charset (re_charset_t *cset) | 3651 | free_charset (re_charset_t *cset) |
| 3777 | { | 3652 | { |
| 3778 | re_free (cset->mbchars); | 3653 | re_free (cset->mbchars); |
| 3779 | # ifdef _LIBC | 3654 | #ifdef _LIBC |
| 3780 | re_free (cset->coll_syms); | 3655 | re_free (cset->coll_syms); |
| 3781 | re_free (cset->equiv_classes); | 3656 | re_free (cset->equiv_classes); |
| 3782 | # endif | 3657 | #endif |
| 3783 | re_free (cset->range_starts); | 3658 | re_free (cset->range_starts); |
| 3784 | re_free (cset->range_ends); | 3659 | re_free (cset->range_ends); |
| 3785 | re_free (cset->char_classes); | 3660 | re_free (cset->char_classes); |
| 3786 | re_free (cset); | 3661 | re_free (cset); |
| 3787 | } | 3662 | } |
| 3788 | #endif /* RE_ENABLE_I18N */ | ||
| 3789 | 3663 | ||
| 3790 | /* Functions for binary tree operation. */ | 3664 | /* Functions for binary tree operation. */ |
| 3791 | 3665 | ||
| @@ -3851,13 +3725,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node) | |||
| 3851 | static void | 3725 | static void |
| 3852 | free_token (re_token_t *node) | 3726 | free_token (re_token_t *node) |
| 3853 | { | 3727 | { |
| 3854 | #ifdef RE_ENABLE_I18N | ||
| 3855 | if (node->type == COMPLEX_BRACKET && node->duplicated == 0) | 3728 | if (node->type == COMPLEX_BRACKET && node->duplicated == 0) |
| 3856 | free_charset (node->opr.mbcset); | 3729 | free_charset (node->opr.mbcset); |
| 3857 | else | 3730 | else if (node->type == SIMPLE_BRACKET && node->duplicated == 0) |
| 3858 | #endif /* RE_ENABLE_I18N */ | 3731 | re_free (node->opr.sbcset); |
| 3859 | if (node->type == SIMPLE_BRACKET && node->duplicated == 0) | ||
| 3860 | re_free (node->opr.sbcset); | ||
| 3861 | } | 3732 | } |
| 3862 | 3733 | ||
| 3863 | /* Worker function for tree walking. Free the allocated memory inside NODE | 3734 | /* Worker function for tree walking. Free the allocated memory inside NODE |
