diff options
Diffstat (limited to 'gl/regexec.c')
-rw-r--r-- | gl/regexec.c | 84 |
1 files changed, 21 insertions, 63 deletions
diff --git a/gl/regexec.c b/gl/regexec.c index 6aeba3c..521cb02 100644 --- a/gl/regexec.c +++ b/gl/regexec.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002-2021 Free Software Foundation, Inc. | 2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
5 | 5 | ||
@@ -67,11 +67,9 @@ static reg_errcode_t set_regs (const regex_t *preg, | |||
67 | bool fl_backtrack); | 67 | bool fl_backtrack); |
68 | static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); | 68 | static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); |
69 | 69 | ||
70 | #ifdef RE_ENABLE_I18N | ||
71 | static int sift_states_iter_mb (const re_match_context_t *mctx, | 70 | static int sift_states_iter_mb (const re_match_context_t *mctx, |
72 | re_sift_context_t *sctx, | 71 | re_sift_context_t *sctx, |
73 | Idx node_idx, Idx str_idx, Idx max_str_idx); | 72 | Idx node_idx, Idx str_idx, Idx max_str_idx); |
74 | #endif /* RE_ENABLE_I18N */ | ||
75 | static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, | 73 | static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, |
76 | re_sift_context_t *sctx); | 74 | re_sift_context_t *sctx); |
77 | static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, | 75 | static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, |
@@ -123,10 +121,8 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err, | |||
123 | re_match_context_t *mctx, | 121 | re_match_context_t *mctx, |
124 | re_dfastate_t *pstate); | 122 | re_dfastate_t *pstate); |
125 | #endif | 123 | #endif |
126 | #ifdef RE_ENABLE_I18N | ||
127 | static reg_errcode_t transit_state_mb (re_match_context_t *mctx, | 124 | static reg_errcode_t transit_state_mb (re_match_context_t *mctx, |
128 | re_dfastate_t *pstate); | 125 | re_dfastate_t *pstate); |
129 | #endif /* RE_ENABLE_I18N */ | ||
130 | static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, | 126 | static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, |
131 | const re_node_set *nodes); | 127 | const re_node_set *nodes); |
132 | static reg_errcode_t get_subexp (re_match_context_t *mctx, | 128 | static reg_errcode_t get_subexp (re_match_context_t *mctx, |
@@ -156,14 +152,12 @@ static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, | |||
156 | re_node_set *cur_nodes, Idx cur_str, | 152 | re_node_set *cur_nodes, Idx cur_str, |
157 | Idx subexp_num, int type); | 153 | Idx subexp_num, int type); |
158 | static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); | 154 | static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); |
159 | #ifdef RE_ENABLE_I18N | ||
160 | static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | 155 | static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, |
161 | const re_string_t *input, Idx idx); | 156 | const re_string_t *input, Idx idx); |
162 | # ifdef _LIBC | 157 | #ifdef _LIBC |
163 | static unsigned int find_collation_sequence_value (const unsigned char *mbs, | 158 | static unsigned int find_collation_sequence_value (const unsigned char *mbs, |
164 | size_t name_len); | 159 | size_t name_len); |
165 | # endif /* _LIBC */ | 160 | #endif |
166 | #endif /* RE_ENABLE_I18N */ | ||
167 | static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, | 161 | static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, |
168 | const re_dfastate_t *state, | 162 | const re_dfastate_t *state, |
169 | re_node_set *states_node, | 163 | re_node_set *states_node, |
@@ -779,12 +773,10 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
779 | if (__glibc_unlikely (err != REG_NOERROR)) | 773 | if (__glibc_unlikely (err != REG_NOERROR)) |
780 | goto free_return; | 774 | goto free_return; |
781 | 775 | ||
782 | #ifdef RE_ENABLE_I18N | 776 | /* Don't consider this char as a possible match start if it part, |
783 | /* Don't consider this char as a possible match start if it part, | 777 | yet isn't the head, of a multibyte character. */ |
784 | yet isn't the head, of a multibyte character. */ | ||
785 | if (!sb && !re_string_first_byte (&mctx.input, 0)) | 778 | if (!sb && !re_string_first_byte (&mctx.input, 0)) |
786 | continue; | 779 | continue; |
787 | #endif | ||
788 | 780 | ||
789 | /* It seems to be appropriate one, then use the matcher. */ | 781 | /* It seems to be appropriate one, then use the matcher. */ |
790 | /* We assume that the matching starts from 0. */ | 782 | /* We assume that the matching starts from 0. */ |
@@ -858,7 +850,6 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
858 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) | 850 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) |
859 | if (pmatch[reg_idx].rm_so != -1) | 851 | if (pmatch[reg_idx].rm_so != -1) |
860 | { | 852 | { |
861 | #ifdef RE_ENABLE_I18N | ||
862 | if (__glibc_unlikely (mctx.input.offsets_needed != 0)) | 853 | if (__glibc_unlikely (mctx.input.offsets_needed != 0)) |
863 | { | 854 | { |
864 | pmatch[reg_idx].rm_so = | 855 | pmatch[reg_idx].rm_so = |
@@ -870,9 +861,6 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, | |||
870 | ? mctx.input.valid_raw_len | 861 | ? mctx.input.valid_raw_len |
871 | : mctx.input.offsets[pmatch[reg_idx].rm_eo]); | 862 | : mctx.input.offsets[pmatch[reg_idx].rm_eo]); |
872 | } | 863 | } |
873 | #else | ||
874 | DEBUG_ASSERT (mctx.input.offsets_needed == 0); | ||
875 | #endif | ||
876 | pmatch[reg_idx].rm_so += match_first; | 864 | pmatch[reg_idx].rm_so += match_first; |
877 | pmatch[reg_idx].rm_eo += match_first; | 865 | pmatch[reg_idx].rm_eo += match_first; |
878 | } | 866 | } |
@@ -996,8 +984,7 @@ prune_impossible_nodes (re_match_context_t *mctx) | |||
996 | We must select appropriate initial state depending on the context, | 984 | We must select appropriate initial state depending on the context, |
997 | since initial states may have constraints like "\<", "^", etc.. */ | 985 | since initial states may have constraints like "\<", "^", etc.. */ |
998 | 986 | ||
999 | static inline re_dfastate_t * | 987 | static __always_inline re_dfastate_t * |
1000 | __attribute__ ((always_inline)) | ||
1001 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, | 988 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, |
1002 | Idx idx) | 989 | Idx idx) |
1003 | { | 990 | { |
@@ -1261,12 +1248,9 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |||
1261 | Idx naccepted = 0; | 1248 | Idx naccepted = 0; |
1262 | re_token_type_t type = dfa->nodes[node].type; | 1249 | re_token_type_t type = dfa->nodes[node].type; |
1263 | 1250 | ||
1264 | #ifdef RE_ENABLE_I18N | ||
1265 | if (dfa->nodes[node].accept_mb) | 1251 | if (dfa->nodes[node].accept_mb) |
1266 | naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); | 1252 | naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); |
1267 | else | 1253 | else if (type == OP_BACK_REF) |
1268 | #endif /* RE_ENABLE_I18N */ | ||
1269 | if (type == OP_BACK_REF) | ||
1270 | { | 1254 | { |
1271 | Idx subexp_idx = dfa->nodes[node].opr.idx + 1; | 1255 | Idx subexp_idx = dfa->nodes[node].opr.idx + 1; |
1272 | if (subexp_idx < nregs) | 1256 | if (subexp_idx < nregs) |
@@ -1324,8 +1308,8 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, | |||
1324 | re_node_set *eps_via_nodes) | 1308 | re_node_set *eps_via_nodes) |
1325 | { | 1309 | { |
1326 | reg_errcode_t err; | 1310 | reg_errcode_t err; |
1327 | Idx num = fs->num++; | 1311 | Idx num = fs->num; |
1328 | if (fs->num == fs->alloc) | 1312 | if (num == fs->alloc) |
1329 | { | 1313 | { |
1330 | struct re_fail_stack_ent_t *new_array; | 1314 | struct re_fail_stack_ent_t *new_array; |
1331 | new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, | 1315 | new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, |
@@ -1340,6 +1324,7 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, | |||
1340 | fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); | 1324 | fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); |
1341 | if (fs->stack[num].regs == NULL) | 1325 | if (fs->stack[num].regs == NULL) |
1342 | return REG_ESPACE; | 1326 | return REG_ESPACE; |
1327 | fs->num = num + 1; | ||
1343 | memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); | 1328 | memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); |
1344 | memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); | 1329 | memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); |
1345 | err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); | 1330 | err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); |
@@ -1634,12 +1619,10 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
1634 | bool ok; | 1619 | bool ok; |
1635 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); | 1620 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); |
1636 | 1621 | ||
1637 | #ifdef RE_ENABLE_I18N | ||
1638 | /* If the node may accept "multi byte". */ | 1622 | /* If the node may accept "multi byte". */ |
1639 | if (dfa->nodes[prev_node].accept_mb) | 1623 | if (dfa->nodes[prev_node].accept_mb) |
1640 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, | 1624 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, |
1641 | str_idx, sctx->last_str_idx); | 1625 | str_idx, sctx->last_str_idx); |
1642 | #endif /* RE_ENABLE_I18N */ | ||
1643 | 1626 | ||
1644 | /* We don't check backreferences here. | 1627 | /* We don't check backreferences here. |
1645 | See update_cur_sifted_state(). */ | 1628 | See update_cur_sifted_state(). */ |
@@ -1688,6 +1671,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) | |||
1688 | 1671 | ||
1689 | if (top < next_state_log_idx) | 1672 | if (top < next_state_log_idx) |
1690 | { | 1673 | { |
1674 | DEBUG_ASSERT (mctx->state_log != NULL); | ||
1691 | memset (mctx->state_log + top + 1, '\0', | 1675 | memset (mctx->state_log + top + 1, '\0', |
1692 | sizeof (re_dfastate_t *) * (next_state_log_idx - top)); | 1676 | sizeof (re_dfastate_t *) * (next_state_log_idx - top)); |
1693 | mctx->state_log_top = next_state_log_idx; | 1677 | mctx->state_log_top = next_state_log_idx; |
@@ -2176,7 +2160,6 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
2176 | } | 2160 | } |
2177 | 2161 | ||
2178 | 2162 | ||
2179 | #ifdef RE_ENABLE_I18N | ||
2180 | static int | 2163 | static int |
2181 | sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | 2164 | sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, |
2182 | Idx node_idx, Idx str_idx, Idx max_str_idx) | 2165 | Idx node_idx, Idx str_idx, Idx max_str_idx) |
@@ -2196,8 +2179,6 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
2196 | 'naccepted' bytes input. */ | 2179 | 'naccepted' bytes input. */ |
2197 | return naccepted; | 2180 | return naccepted; |
2198 | } | 2181 | } |
2199 | #endif /* RE_ENABLE_I18N */ | ||
2200 | |||
2201 | 2182 | ||
2202 | /* Functions for state transition. */ | 2183 | /* Functions for state transition. */ |
2203 | 2184 | ||
@@ -2215,7 +2196,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, | |||
2215 | re_dfastate_t **trtable; | 2196 | re_dfastate_t **trtable; |
2216 | unsigned char ch; | 2197 | unsigned char ch; |
2217 | 2198 | ||
2218 | #ifdef RE_ENABLE_I18N | ||
2219 | /* If the current state can accept multibyte. */ | 2199 | /* If the current state can accept multibyte. */ |
2220 | if (__glibc_unlikely (state->accept_mb)) | 2200 | if (__glibc_unlikely (state->accept_mb)) |
2221 | { | 2201 | { |
@@ -2223,7 +2203,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, | |||
2223 | if (__glibc_unlikely (*err != REG_NOERROR)) | 2203 | if (__glibc_unlikely (*err != REG_NOERROR)) |
2224 | return NULL; | 2204 | return NULL; |
2225 | } | 2205 | } |
2226 | #endif /* RE_ENABLE_I18N */ | ||
2227 | 2206 | ||
2228 | /* Then decide the next state with the single byte. */ | 2207 | /* Then decide the next state with the single byte. */ |
2229 | #if 0 | 2208 | #if 0 |
@@ -2444,7 +2423,6 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, | |||
2444 | } | 2423 | } |
2445 | #endif | 2424 | #endif |
2446 | 2425 | ||
2447 | #ifdef RE_ENABLE_I18N | ||
2448 | static reg_errcode_t | 2426 | static reg_errcode_t |
2449 | transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | 2427 | transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) |
2450 | { | 2428 | { |
@@ -2512,7 +2490,6 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | |||
2512 | } | 2490 | } |
2513 | return REG_NOERROR; | 2491 | return REG_NOERROR; |
2514 | } | 2492 | } |
2515 | #endif /* RE_ENABLE_I18N */ | ||
2516 | 2493 | ||
2517 | static reg_errcode_t | 2494 | static reg_errcode_t |
2518 | transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | 2495 | transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) |
@@ -3002,9 +2979,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
3002 | const re_dfa_t *const dfa = mctx->dfa; | 2979 | const re_dfa_t *const dfa = mctx->dfa; |
3003 | bool ok; | 2980 | bool ok; |
3004 | Idx cur_idx; | 2981 | Idx cur_idx; |
3005 | #ifdef RE_ENABLE_I18N | ||
3006 | reg_errcode_t err = REG_NOERROR; | 2982 | reg_errcode_t err = REG_NOERROR; |
3007 | #endif | ||
3008 | re_node_set union_set; | 2983 | re_node_set union_set; |
3009 | re_node_set_init_empty (&union_set); | 2984 | re_node_set_init_empty (&union_set); |
3010 | for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) | 2985 | for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) |
@@ -3013,7 +2988,6 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
3013 | Idx cur_node = cur_nodes->elems[cur_idx]; | 2988 | Idx cur_node = cur_nodes->elems[cur_idx]; |
3014 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); | 2989 | DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); |
3015 | 2990 | ||
3016 | #ifdef RE_ENABLE_I18N | ||
3017 | /* If the node may accept "multi byte". */ | 2991 | /* If the node may accept "multi byte". */ |
3018 | if (dfa->nodes[cur_node].accept_mb) | 2992 | if (dfa->nodes[cur_node].accept_mb) |
3019 | { | 2993 | { |
@@ -3051,7 +3025,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
3051 | } | 3025 | } |
3052 | } | 3026 | } |
3053 | } | 3027 | } |
3054 | #endif /* RE_ENABLE_I18N */ | 3028 | |
3055 | if (naccepted | 3029 | if (naccepted |
3056 | || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) | 3030 | || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) |
3057 | { | 3031 | { |
@@ -3475,18 +3449,15 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3475 | } | 3449 | } |
3476 | else if (type == OP_PERIOD) | 3450 | else if (type == OP_PERIOD) |
3477 | { | 3451 | { |
3478 | #ifdef RE_ENABLE_I18N | ||
3479 | if (dfa->mb_cur_max > 1) | 3452 | if (dfa->mb_cur_max > 1) |
3480 | bitset_merge (accepts, dfa->sb_char); | 3453 | bitset_merge (accepts, dfa->sb_char); |
3481 | else | 3454 | else |
3482 | #endif | ||
3483 | bitset_set_all (accepts); | 3455 | bitset_set_all (accepts); |
3484 | if (!(dfa->syntax & RE_DOT_NEWLINE)) | 3456 | if (!(dfa->syntax & RE_DOT_NEWLINE)) |
3485 | bitset_clear (accepts, '\n'); | 3457 | bitset_clear (accepts, '\n'); |
3486 | if (dfa->syntax & RE_DOT_NOT_NULL) | 3458 | if (dfa->syntax & RE_DOT_NOT_NULL) |
3487 | bitset_clear (accepts, '\0'); | 3459 | bitset_clear (accepts, '\0'); |
3488 | } | 3460 | } |
3489 | #ifdef RE_ENABLE_I18N | ||
3490 | else if (type == OP_UTF8_PERIOD) | 3461 | else if (type == OP_UTF8_PERIOD) |
3491 | { | 3462 | { |
3492 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) | 3463 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) |
@@ -3498,7 +3469,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3498 | if (dfa->syntax & RE_DOT_NOT_NULL) | 3469 | if (dfa->syntax & RE_DOT_NOT_NULL) |
3499 | bitset_clear (accepts, '\0'); | 3470 | bitset_clear (accepts, '\0'); |
3500 | } | 3471 | } |
3501 | #endif | ||
3502 | else | 3472 | else |
3503 | continue; | 3473 | continue; |
3504 | 3474 | ||
@@ -3529,12 +3499,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3529 | bitset_empty (accepts); | 3499 | bitset_empty (accepts); |
3530 | continue; | 3500 | continue; |
3531 | } | 3501 | } |
3532 | #ifdef RE_ENABLE_I18N | ||
3533 | if (dfa->mb_cur_max > 1) | 3502 | if (dfa->mb_cur_max > 1) |
3534 | for (j = 0; j < BITSET_WORDS; ++j) | 3503 | for (j = 0; j < BITSET_WORDS; ++j) |
3535 | any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); | 3504 | any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); |
3536 | else | 3505 | else |
3537 | #endif | ||
3538 | for (j = 0; j < BITSET_WORDS; ++j) | 3506 | for (j = 0; j < BITSET_WORDS; ++j) |
3539 | any_set |= (accepts[j] &= dfa->word_char[j]); | 3507 | any_set |= (accepts[j] &= dfa->word_char[j]); |
3540 | if (!any_set) | 3508 | if (!any_set) |
@@ -3548,12 +3516,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3548 | bitset_empty (accepts); | 3516 | bitset_empty (accepts); |
3549 | continue; | 3517 | continue; |
3550 | } | 3518 | } |
3551 | #ifdef RE_ENABLE_I18N | ||
3552 | if (dfa->mb_cur_max > 1) | 3519 | if (dfa->mb_cur_max > 1) |
3553 | for (j = 0; j < BITSET_WORDS; ++j) | 3520 | for (j = 0; j < BITSET_WORDS; ++j) |
3554 | any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); | 3521 | any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); |
3555 | else | 3522 | else |
3556 | #endif | ||
3557 | for (j = 0; j < BITSET_WORDS; ++j) | 3523 | for (j = 0; j < BITSET_WORDS; ++j) |
3558 | any_set |= (accepts[j] &= ~dfa->word_char[j]); | 3524 | any_set |= (accepts[j] &= ~dfa->word_char[j]); |
3559 | if (!any_set) | 3525 | if (!any_set) |
@@ -3630,7 +3596,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3630 | return -1; | 3596 | return -1; |
3631 | } | 3597 | } |
3632 | 3598 | ||
3633 | #ifdef RE_ENABLE_I18N | ||
3634 | /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. | 3599 | /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. |
3635 | Return the number of the bytes the node accepts. | 3600 | Return the number of the bytes the node accepts. |
3636 | STR_IDX is the current index of the input string. | 3601 | STR_IDX is the current index of the input string. |
@@ -3639,9 +3604,9 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3639 | one collating element like '.', '[a-z]', opposite to the other nodes | 3604 | one collating element like '.', '[a-z]', opposite to the other nodes |
3640 | can only accept one byte. */ | 3605 | can only accept one byte. */ |
3641 | 3606 | ||
3642 | # ifdef _LIBC | 3607 | #ifdef _LIBC |
3643 | # include <locale/weight.h> | 3608 | # include <locale/weight.h> |
3644 | # endif | 3609 | #endif |
3645 | 3610 | ||
3646 | static int | 3611 | static int |
3647 | check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | 3612 | check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, |
@@ -3725,12 +3690,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3725 | if (node->type == COMPLEX_BRACKET) | 3690 | if (node->type == COMPLEX_BRACKET) |
3726 | { | 3691 | { |
3727 | const re_charset_t *cset = node->opr.mbcset; | 3692 | const re_charset_t *cset = node->opr.mbcset; |
3728 | # ifdef _LIBC | 3693 | #ifdef _LIBC |
3729 | const unsigned char *pin | 3694 | const unsigned char *pin |
3730 | = ((const unsigned char *) re_string_get_buffer (input) + str_idx); | 3695 | = ((const unsigned char *) re_string_get_buffer (input) + str_idx); |
3731 | Idx j; | 3696 | Idx j; |
3732 | uint32_t nrules; | 3697 | uint32_t nrules; |
3733 | # endif /* _LIBC */ | 3698 | #endif |
3734 | int match_len = 0; | 3699 | int match_len = 0; |
3735 | wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) | 3700 | wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) |
3736 | ? re_string_wchar_at (input, str_idx) : 0); | 3701 | ? re_string_wchar_at (input, str_idx) : 0); |
@@ -3753,7 +3718,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3753 | } | 3718 | } |
3754 | } | 3719 | } |
3755 | 3720 | ||
3756 | # ifdef _LIBC | 3721 | #ifdef _LIBC |
3757 | nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | 3722 | nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
3758 | if (nrules != 0) | 3723 | if (nrules != 0) |
3759 | { | 3724 | { |
@@ -3842,7 +3807,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3842 | } | 3807 | } |
3843 | } | 3808 | } |
3844 | else | 3809 | else |
3845 | # endif /* _LIBC */ | 3810 | #endif /* _LIBC */ |
3846 | { | 3811 | { |
3847 | /* match with range expression? */ | 3812 | /* match with range expression? */ |
3848 | for (i = 0; i < cset->nranges; ++i) | 3813 | for (i = 0; i < cset->nranges; ++i) |
@@ -3868,7 +3833,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3868 | return 0; | 3833 | return 0; |
3869 | } | 3834 | } |
3870 | 3835 | ||
3871 | # ifdef _LIBC | 3836 | #ifdef _LIBC |
3872 | static unsigned int | 3837 | static unsigned int |
3873 | find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) | 3838 | find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) |
3874 | { | 3839 | { |
@@ -3926,8 +3891,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) | |||
3926 | return UINT_MAX; | 3891 | return UINT_MAX; |
3927 | } | 3892 | } |
3928 | } | 3893 | } |
3929 | # endif /* _LIBC */ | 3894 | #endif /* _LIBC */ |
3930 | #endif /* RE_ENABLE_I18N */ | ||
3931 | 3895 | ||
3932 | /* Check whether the node accepts the byte which is IDX-th | 3896 | /* Check whether the node accepts the byte which is IDX-th |
3933 | byte of the INPUT. */ | 3897 | byte of the INPUT. */ |
@@ -3950,12 +3914,10 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, | |||
3950 | return false; | 3914 | return false; |
3951 | break; | 3915 | break; |
3952 | 3916 | ||
3953 | #ifdef RE_ENABLE_I18N | ||
3954 | case OP_UTF8_PERIOD: | 3917 | case OP_UTF8_PERIOD: |
3955 | if (ch >= ASCII_CHARS) | 3918 | if (ch >= ASCII_CHARS) |
3956 | return false; | 3919 | return false; |
3957 | FALLTHROUGH; | 3920 | FALLTHROUGH; |
3958 | #endif | ||
3959 | case OP_PERIOD: | 3921 | case OP_PERIOD: |
3960 | if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) | 3922 | if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) |
3961 | || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) | 3923 | || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) |
@@ -4016,7 +3978,6 @@ extend_buffers (re_match_context_t *mctx, int min_len) | |||
4016 | /* Then reconstruct the buffers. */ | 3978 | /* Then reconstruct the buffers. */ |
4017 | if (pstr->icase) | 3979 | if (pstr->icase) |
4018 | { | 3980 | { |
4019 | #ifdef RE_ENABLE_I18N | ||
4020 | if (pstr->mb_cur_max > 1) | 3981 | if (pstr->mb_cur_max > 1) |
4021 | { | 3982 | { |
4022 | ret = build_wcs_upper_buffer (pstr); | 3983 | ret = build_wcs_upper_buffer (pstr); |
@@ -4024,16 +3985,13 @@ extend_buffers (re_match_context_t *mctx, int min_len) | |||
4024 | return ret; | 3985 | return ret; |
4025 | } | 3986 | } |
4026 | else | 3987 | else |
4027 | #endif /* RE_ENABLE_I18N */ | ||
4028 | build_upper_buffer (pstr); | 3988 | build_upper_buffer (pstr); |
4029 | } | 3989 | } |
4030 | else | 3990 | else |
4031 | { | 3991 | { |
4032 | #ifdef RE_ENABLE_I18N | ||
4033 | if (pstr->mb_cur_max > 1) | 3992 | if (pstr->mb_cur_max > 1) |
4034 | build_wcs_buffer (pstr); | 3993 | build_wcs_buffer (pstr); |
4035 | else | 3994 | else |
4036 | #endif /* RE_ENABLE_I18N */ | ||
4037 | { | 3995 | { |
4038 | if (pstr->trans != NULL) | 3996 | if (pstr->trans != NULL) |
4039 | re_string_translate_buffer (pstr); | 3997 | re_string_translate_buffer (pstr); |