diff options
Diffstat (limited to 'gl/regexec.c')
-rw-r--r-- | gl/regexec.c | 161 |
1 files changed, 85 insertions, 76 deletions
diff --git a/gl/regexec.c b/gl/regexec.c index 5452ef78..dc449ce5 100644 --- a/gl/regexec.c +++ b/gl/regexec.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 | 2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free |
3 | Free Software Foundation, Inc. | 3 | Software Foundation, Inc. |
4 | This file is part of the GNU C Library. | 4 | This file is part of the GNU C Library. |
5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
6 | 6 | ||
@@ -637,7 +637,7 @@ re_exec (s) | |||
637 | (0 <= LAST_START && LAST_START <= LENGTH) */ | 637 | (0 <= LAST_START && LAST_START <= LENGTH) */ |
638 | 638 | ||
639 | static reg_errcode_t | 639 | static reg_errcode_t |
640 | internal_function | 640 | internal_function __attribute_warn_unused_result__ |
641 | re_search_internal (const regex_t *preg, | 641 | re_search_internal (const regex_t *preg, |
642 | const char *string, Idx length, | 642 | const char *string, Idx length, |
643 | Idx start, Idx last_start, Idx stop, | 643 | Idx start, Idx last_start, Idx stop, |
@@ -833,10 +833,10 @@ re_search_internal (const regex_t *preg, | |||
833 | break; | 833 | break; |
834 | match_first += incr; | 834 | match_first += incr; |
835 | if (match_first < left_lim || match_first > right_lim) | 835 | if (match_first < left_lim || match_first > right_lim) |
836 | { | 836 | { |
837 | err = REG_NOMATCH; | 837 | err = REG_NOMATCH; |
838 | goto free_return; | 838 | goto free_return; |
839 | } | 839 | } |
840 | } | 840 | } |
841 | break; | 841 | break; |
842 | } | 842 | } |
@@ -953,14 +953,14 @@ re_search_internal (const regex_t *preg, | |||
953 | } | 953 | } |
954 | 954 | ||
955 | if (dfa->subexp_map) | 955 | if (dfa->subexp_map) |
956 | for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) | 956 | for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) |
957 | if (dfa->subexp_map[reg_idx] != reg_idx) | 957 | if (dfa->subexp_map[reg_idx] != reg_idx) |
958 | { | 958 | { |
959 | pmatch[reg_idx + 1].rm_so | 959 | pmatch[reg_idx + 1].rm_so |
960 | = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; | 960 | = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; |
961 | pmatch[reg_idx + 1].rm_eo | 961 | pmatch[reg_idx + 1].rm_eo |
962 | = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; | 962 | = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; |
963 | } | 963 | } |
964 | } | 964 | } |
965 | 965 | ||
966 | free_return: | 966 | free_return: |
@@ -972,7 +972,7 @@ re_search_internal (const regex_t *preg, | |||
972 | } | 972 | } |
973 | 973 | ||
974 | static reg_errcode_t | 974 | static reg_errcode_t |
975 | internal_function | 975 | internal_function __attribute_warn_unused_result__ |
976 | prune_impossible_nodes (re_match_context_t *mctx) | 976 | prune_impossible_nodes (re_match_context_t *mctx) |
977 | { | 977 | { |
978 | const re_dfa_t *const dfa = mctx->dfa; | 978 | const re_dfa_t *const dfa = mctx->dfa; |
@@ -1110,7 +1110,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, | |||
1110 | index of the buffer. */ | 1110 | index of the buffer. */ |
1111 | 1111 | ||
1112 | static Idx | 1112 | static Idx |
1113 | internal_function | 1113 | internal_function __attribute_warn_unused_result__ |
1114 | check_matching (re_match_context_t *mctx, bool fl_longest_match, | 1114 | check_matching (re_match_context_t *mctx, bool fl_longest_match, |
1115 | Idx *p_match_first) | 1115 | Idx *p_match_first) |
1116 | { | 1116 | { |
@@ -1149,7 +1149,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, | |||
1149 | { | 1149 | { |
1150 | err = transit_state_bkref (mctx, &cur_state->nodes); | 1150 | err = transit_state_bkref (mctx, &cur_state->nodes); |
1151 | if (BE (err != REG_NOERROR, 0)) | 1151 | if (BE (err != REG_NOERROR, 0)) |
1152 | return err; | 1152 | return err; |
1153 | } | 1153 | } |
1154 | } | 1154 | } |
1155 | } | 1155 | } |
@@ -1176,16 +1176,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, | |||
1176 | Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; | 1176 | Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; |
1177 | 1177 | ||
1178 | if (BE (next_char_idx >= mctx->input.bufs_len, 0) | 1178 | if (BE (next_char_idx >= mctx->input.bufs_len, 0) |
1179 | || (BE (next_char_idx >= mctx->input.valid_len, 0) | 1179 | || (BE (next_char_idx >= mctx->input.valid_len, 0) |
1180 | && mctx->input.valid_len < mctx->input.len)) | 1180 | && mctx->input.valid_len < mctx->input.len)) |
1181 | { | 1181 | { |
1182 | err = extend_buffers (mctx); | 1182 | err = extend_buffers (mctx); |
1183 | if (BE (err != REG_NOERROR, 0)) | 1183 | if (BE (err != REG_NOERROR, 0)) |
1184 | { | 1184 | { |
1185 | assert (err == REG_ESPACE); | 1185 | assert (err == REG_ESPACE); |
1186 | return REG_ERROR; | 1186 | return REG_ERROR; |
1187 | } | 1187 | } |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | cur_state = transit_state (&err, mctx, cur_state); | 1190 | cur_state = transit_state (&err, mctx, cur_state); |
1191 | if (mctx->state_log != NULL) | 1191 | if (mctx->state_log != NULL) |
@@ -1309,17 +1309,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |||
1309 | if (dest_node == REG_MISSING) | 1309 | if (dest_node == REG_MISSING) |
1310 | dest_node = candidate; | 1310 | dest_node = candidate; |
1311 | 1311 | ||
1312 | else | 1312 | else |
1313 | { | 1313 | { |
1314 | /* In order to avoid infinite loop like "(a*)*", return the second | 1314 | /* In order to avoid infinite loop like "(a*)*", return the second |
1315 | epsilon-transition if the first was already considered. */ | 1315 | epsilon-transition if the first was already considered. */ |
1316 | if (re_node_set_contains (eps_via_nodes, dest_node)) | 1316 | if (re_node_set_contains (eps_via_nodes, dest_node)) |
1317 | return candidate; | 1317 | return candidate; |
1318 | 1318 | ||
1319 | /* Otherwise, push the second epsilon-transition on the fail stack. */ | 1319 | /* Otherwise, push the second epsilon-transition on the fail stack. */ |
1320 | else if (fs != NULL | 1320 | else if (fs != NULL |
1321 | && push_fail_stack (fs, *pidx, candidate, nregs, regs, | 1321 | && push_fail_stack (fs, *pidx, candidate, nregs, regs, |
1322 | eps_via_nodes)) | 1322 | eps_via_nodes)) |
1323 | return REG_ERROR; | 1323 | return REG_ERROR; |
1324 | 1324 | ||
1325 | /* We know we are going to exit. */ | 1325 | /* We know we are going to exit. */ |
@@ -1385,7 +1385,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |||
1385 | } | 1385 | } |
1386 | 1386 | ||
1387 | static reg_errcode_t | 1387 | static reg_errcode_t |
1388 | internal_function | 1388 | internal_function __attribute_warn_unused_result__ |
1389 | push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, | 1389 | push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, |
1390 | Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) | 1390 | Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) |
1391 | { | 1391 | { |
@@ -1432,7 +1432,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, | |||
1432 | pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ | 1432 | pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ |
1433 | 1433 | ||
1434 | static reg_errcode_t | 1434 | static reg_errcode_t |
1435 | internal_function | 1435 | internal_function __attribute_warn_unused_result__ |
1436 | set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, | 1436 | set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, |
1437 | regmatch_t *pmatch, bool fl_backtrack) | 1437 | regmatch_t *pmatch, bool fl_backtrack) |
1438 | { | 1438 | { |
@@ -1667,7 +1667,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) | |||
1667 | if (mctx->state_log[str_idx]) | 1667 | if (mctx->state_log[str_idx]) |
1668 | { | 1668 | { |
1669 | err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); | 1669 | err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); |
1670 | if (BE (err != REG_NOERROR, 0)) | 1670 | if (BE (err != REG_NOERROR, 0)) |
1671 | goto free_return; | 1671 | goto free_return; |
1672 | } | 1672 | } |
1673 | 1673 | ||
@@ -1686,7 +1686,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) | |||
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | static reg_errcode_t | 1688 | static reg_errcode_t |
1689 | internal_function | 1689 | internal_function __attribute_warn_unused_result__ |
1690 | build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, | 1690 | build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, |
1691 | Idx str_idx, re_node_set *cur_dest) | 1691 | Idx str_idx, re_node_set *cur_dest) |
1692 | { | 1692 | { |
@@ -1848,7 +1848,7 @@ update_cur_sifted_state (const re_match_context_t *mctx, | |||
1848 | } | 1848 | } |
1849 | 1849 | ||
1850 | static reg_errcode_t | 1850 | static reg_errcode_t |
1851 | internal_function | 1851 | internal_function __attribute_warn_unused_result__ |
1852 | add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, | 1852 | add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, |
1853 | const re_node_set *candidates) | 1853 | const re_node_set *candidates) |
1854 | { | 1854 | { |
@@ -1863,10 +1863,14 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, | |||
1863 | { | 1863 | { |
1864 | err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); | 1864 | err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); |
1865 | if (BE (err != REG_NOERROR, 0)) | 1865 | if (BE (err != REG_NOERROR, 0)) |
1866 | return REG_ESPACE; | 1866 | return REG_ESPACE; |
1867 | for (i = 0; i < dest_nodes->nelem; i++) | 1867 | for (i = 0; i < dest_nodes->nelem; i++) |
1868 | re_node_set_merge (&state->inveclosure, | 1868 | { |
1869 | dfa->inveclosures + dest_nodes->elems[i]); | 1869 | err = re_node_set_merge (&state->inveclosure, |
1870 | dfa->inveclosures + dest_nodes->elems[i]); | ||
1871 | if (BE (err != REG_NOERROR, 0)) | ||
1872 | return REG_ESPACE; | ||
1873 | } | ||
1870 | } | 1874 | } |
1871 | return re_node_set_add_intersect (dest_nodes, candidates, | 1875 | return re_node_set_add_intersect (dest_nodes, candidates, |
1872 | &state->inveclosure); | 1876 | &state->inveclosure); |
@@ -1978,7 +1982,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, | |||
1978 | { | 1982 | { |
1979 | struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; | 1983 | struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; |
1980 | do | 1984 | do |
1981 | { | 1985 | { |
1982 | Idx dst; | 1986 | Idx dst; |
1983 | int cpos; | 1987 | int cpos; |
1984 | 1988 | ||
@@ -2000,9 +2004,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, | |||
2000 | if (dst == from_node) | 2004 | if (dst == from_node) |
2001 | { | 2005 | { |
2002 | if (boundaries & 1) | 2006 | if (boundaries & 1) |
2003 | return -1; | 2007 | return -1; |
2004 | else /* if (boundaries & 2) */ | 2008 | else /* if (boundaries & 2) */ |
2005 | return 0; | 2009 | return 0; |
2006 | } | 2010 | } |
2007 | 2011 | ||
2008 | cpos = | 2012 | cpos = |
@@ -2016,7 +2020,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, | |||
2016 | if (subexp_idx < BITSET_WORD_BITS) | 2020 | if (subexp_idx < BITSET_WORD_BITS) |
2017 | ent->eps_reachable_subexps_map | 2021 | ent->eps_reachable_subexps_map |
2018 | &= ~((bitset_word_t) 1 << subexp_idx); | 2022 | &= ~((bitset_word_t) 1 << subexp_idx); |
2019 | } | 2023 | } |
2020 | while (ent++->more); | 2024 | while (ent++->more); |
2021 | } | 2025 | } |
2022 | break; | 2026 | break; |
@@ -2158,7 +2162,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, | |||
2158 | } | 2162 | } |
2159 | 2163 | ||
2160 | static reg_errcode_t | 2164 | static reg_errcode_t |
2161 | internal_function | 2165 | internal_function __attribute_warn_unused_result__ |
2162 | sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, | 2166 | sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, |
2163 | Idx str_idx, const re_node_set *candidates) | 2167 | Idx str_idx, const re_node_set *candidates) |
2164 | { | 2168 | { |
@@ -2241,7 +2245,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
2241 | re_node_set_remove (&local_sctx.limits, enabled_idx); | 2245 | re_node_set_remove (&local_sctx.limits, enabled_idx); |
2242 | 2246 | ||
2243 | /* mctx->bkref_ents may have changed, reload the pointer. */ | 2247 | /* mctx->bkref_ents may have changed, reload the pointer. */ |
2244 | entry = mctx->bkref_ents + enabled_idx; | 2248 | entry = mctx->bkref_ents + enabled_idx; |
2245 | } | 2249 | } |
2246 | while (enabled_idx++, entry++->more); | 2250 | while (enabled_idx++, entry++->more); |
2247 | } | 2251 | } |
@@ -2288,7 +2292,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
2288 | update the destination of STATE_LOG. */ | 2292 | update the destination of STATE_LOG. */ |
2289 | 2293 | ||
2290 | static re_dfastate_t * | 2294 | static re_dfastate_t * |
2291 | internal_function | 2295 | internal_function __attribute_warn_unused_result__ |
2292 | transit_state (reg_errcode_t *err, re_match_context_t *mctx, | 2296 | transit_state (reg_errcode_t *err, re_match_context_t *mctx, |
2293 | re_dfastate_t *state) | 2297 | re_dfastate_t *state) |
2294 | { | 2298 | { |
@@ -2322,7 +2326,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, | |||
2322 | 2326 | ||
2323 | trtable = state->word_trtable; | 2327 | trtable = state->word_trtable; |
2324 | if (BE (trtable != NULL, 1)) | 2328 | if (BE (trtable != NULL, 1)) |
2325 | { | 2329 | { |
2326 | unsigned int context; | 2330 | unsigned int context; |
2327 | context | 2331 | context |
2328 | = re_string_context_at (&mctx->input, | 2332 | = re_string_context_at (&mctx->input, |
@@ -2368,21 +2372,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, | |||
2368 | unsigned int context; | 2372 | unsigned int context; |
2369 | re_node_set next_nodes, *log_nodes, *table_nodes = NULL; | 2373 | re_node_set next_nodes, *log_nodes, *table_nodes = NULL; |
2370 | /* If (state_log[cur_idx] != 0), it implies that cur_idx is | 2374 | /* If (state_log[cur_idx] != 0), it implies that cur_idx is |
2371 | the destination of a multibyte char/collating element/ | 2375 | the destination of a multibyte char/collating element/ |
2372 | back reference. Then the next state is the union set of | 2376 | back reference. Then the next state is the union set of |
2373 | these destinations and the results of the transition table. */ | 2377 | these destinations and the results of the transition table. */ |
2374 | pstate = mctx->state_log[cur_idx]; | 2378 | pstate = mctx->state_log[cur_idx]; |
2375 | log_nodes = pstate->entrance_nodes; | 2379 | log_nodes = pstate->entrance_nodes; |
2376 | if (next_state != NULL) | 2380 | if (next_state != NULL) |
2377 | { | 2381 | { |
2378 | table_nodes = next_state->entrance_nodes; | 2382 | table_nodes = next_state->entrance_nodes; |
2379 | *err = re_node_set_init_union (&next_nodes, table_nodes, | 2383 | *err = re_node_set_init_union (&next_nodes, table_nodes, |
2380 | log_nodes); | 2384 | log_nodes); |
2381 | if (BE (*err != REG_NOERROR, 0)) | 2385 | if (BE (*err != REG_NOERROR, 0)) |
2382 | return NULL; | 2386 | return NULL; |
2383 | } | 2387 | } |
2384 | else | 2388 | else |
2385 | next_nodes = *log_nodes; | 2389 | next_nodes = *log_nodes; |
2386 | /* Note: We already add the nodes of the initial state, | 2390 | /* Note: We already add the nodes of the initial state, |
2387 | then we don't need to add them here. */ | 2391 | then we don't need to add them here. */ |
2388 | 2392 | ||
@@ -2390,12 +2394,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, | |||
2390 | re_string_cur_idx (&mctx->input) - 1, | 2394 | re_string_cur_idx (&mctx->input) - 1, |
2391 | mctx->eflags); | 2395 | mctx->eflags); |
2392 | next_state = mctx->state_log[cur_idx] | 2396 | next_state = mctx->state_log[cur_idx] |
2393 | = re_acquire_state_context (err, dfa, &next_nodes, context); | 2397 | = re_acquire_state_context (err, dfa, &next_nodes, context); |
2394 | /* We don't need to check errors here, since the return value of | 2398 | /* We don't need to check errors here, since the return value of |
2395 | this function is next_state and ERR is already set. */ | 2399 | this function is next_state and ERR is already set. */ |
2396 | 2400 | ||
2397 | if (table_nodes != NULL) | 2401 | if (table_nodes != NULL) |
2398 | re_node_set_free (&next_nodes); | 2402 | re_node_set_free (&next_nodes); |
2399 | } | 2403 | } |
2400 | 2404 | ||
2401 | if (BE (dfa->nbackref, 0) && next_state != NULL) | 2405 | if (BE (dfa->nbackref, 0) && next_state != NULL) |
@@ -2436,9 +2440,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) | |||
2436 | 2440 | ||
2437 | do | 2441 | do |
2438 | { | 2442 | { |
2439 | if (++cur_str_idx > max) | 2443 | if (++cur_str_idx > max) |
2440 | return NULL; | 2444 | return NULL; |
2441 | re_string_skip_bytes (&mctx->input, 1); | 2445 | re_string_skip_bytes (&mctx->input, 1); |
2442 | } | 2446 | } |
2443 | while (mctx->state_log[cur_str_idx] == NULL); | 2447 | while (mctx->state_log[cur_str_idx] == NULL); |
2444 | 2448 | ||
@@ -2546,7 +2550,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | |||
2546 | re_dfastate_t *dest_state; | 2550 | re_dfastate_t *dest_state; |
2547 | 2551 | ||
2548 | if (!dfa->nodes[cur_node_idx].accept_mb) | 2552 | if (!dfa->nodes[cur_node_idx].accept_mb) |
2549 | continue; | 2553 | continue; |
2550 | 2554 | ||
2551 | if (dfa->nodes[cur_node_idx].constraint) | 2555 | if (dfa->nodes[cur_node_idx].constraint) |
2552 | { | 2556 | { |
@@ -2714,7 +2718,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | |||
2714 | delay these checking for prune_impossible_nodes(). */ | 2718 | delay these checking for prune_impossible_nodes(). */ |
2715 | 2719 | ||
2716 | static reg_errcode_t | 2720 | static reg_errcode_t |
2717 | internal_function | 2721 | internal_function __attribute_warn_unused_result__ |
2718 | get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) | 2722 | get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) |
2719 | { | 2723 | { |
2720 | const re_dfa_t *const dfa = mctx->dfa; | 2724 | const re_dfa_t *const dfa = mctx->dfa; |
@@ -2727,7 +2731,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) | |||
2727 | const struct re_backref_cache_entry *entry | 2731 | const struct re_backref_cache_entry *entry |
2728 | = mctx->bkref_ents + cache_idx; | 2732 | = mctx->bkref_ents + cache_idx; |
2729 | do | 2733 | do |
2730 | if (entry->node == bkref_node) | 2734 | if (entry->node == bkref_node) |
2731 | return REG_NOERROR; /* We already checked it. */ | 2735 | return REG_NOERROR; /* We already checked it. */ |
2732 | while (entry++->more); | 2736 | while (entry++->more); |
2733 | } | 2737 | } |
@@ -2915,7 +2919,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, | |||
2915 | Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ | 2919 | Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ |
2916 | 2920 | ||
2917 | static reg_errcode_t | 2921 | static reg_errcode_t |
2918 | internal_function | 2922 | internal_function __attribute_warn_unused_result__ |
2919 | check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, | 2923 | check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, |
2920 | Idx top_str, Idx last_node, Idx last_str, int type) | 2924 | Idx top_str, Idx last_node, Idx last_str, int type) |
2921 | { | 2925 | { |
@@ -3077,7 +3081,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, | |||
3077 | Can't we unify them? */ | 3081 | Can't we unify them? */ |
3078 | 3082 | ||
3079 | static reg_errcode_t | 3083 | static reg_errcode_t |
3080 | internal_function | 3084 | internal_function __attribute_warn_unused_result__ |
3081 | check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | 3085 | check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, |
3082 | re_node_set *cur_nodes, re_node_set *next_nodes) | 3086 | re_node_set *cur_nodes, re_node_set *next_nodes) |
3083 | { | 3087 | { |
@@ -3211,7 +3215,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, | |||
3211 | problematic append it to DST_NODES. */ | 3215 | problematic append it to DST_NODES. */ |
3212 | 3216 | ||
3213 | static reg_errcode_t | 3217 | static reg_errcode_t |
3214 | internal_function | 3218 | internal_function __attribute_warn_unused_result__ |
3215 | check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, | 3219 | check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, |
3216 | Idx target, Idx ex_subexp, int type) | 3220 | Idx target, Idx ex_subexp, int type) |
3217 | { | 3221 | { |
@@ -3256,7 +3260,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, | |||
3256 | in MCTX->BKREF_ENTS. */ | 3260 | in MCTX->BKREF_ENTS. */ |
3257 | 3261 | ||
3258 | static reg_errcode_t | 3262 | static reg_errcode_t |
3259 | internal_function | 3263 | internal_function __attribute_warn_unused_result__ |
3260 | expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, | 3264 | expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, |
3261 | Idx cur_str, Idx subexp_num, int type) | 3265 | Idx cur_str, Idx subexp_num, int type) |
3262 | { | 3266 | { |
@@ -3622,7 +3626,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3622 | } | 3626 | } |
3623 | #ifdef RE_ENABLE_I18N | 3627 | #ifdef RE_ENABLE_I18N |
3624 | else if (type == OP_UTF8_PERIOD) | 3628 | else if (type == OP_UTF8_PERIOD) |
3625 | { | 3629 | { |
3626 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) | 3630 | if (ASCII_CHARS % BITSET_WORD_BITS == 0) |
3627 | memset (accepts, -1, ASCII_CHARS / CHAR_BIT); | 3631 | memset (accepts, -1, ASCII_CHARS / CHAR_BIT); |
3628 | else | 3632 | else |
@@ -3631,7 +3635,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3631 | bitset_clear (accepts, '\n'); | 3635 | bitset_clear (accepts, '\n'); |
3632 | if (dfa->syntax & RE_DOT_NOT_NULL) | 3636 | if (dfa->syntax & RE_DOT_NOT_NULL) |
3633 | bitset_clear (accepts, '\0'); | 3637 | bitset_clear (accepts, '\0'); |
3634 | } | 3638 | } |
3635 | #endif | 3639 | #endif |
3636 | else | 3640 | else |
3637 | continue; | 3641 | continue; |
@@ -3836,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3836 | if (node->type == OP_PERIOD) | 3840 | if (node->type == OP_PERIOD) |
3837 | { | 3841 | { |
3838 | if (char_len <= 1) | 3842 | if (char_len <= 1) |
3839 | return 0; | 3843 | return 0; |
3840 | /* FIXME: I don't think this if is needed, as both '\n' | 3844 | /* FIXME: I don't think this if is needed, as both '\n' |
3841 | and '\0' are char_len == 1. */ | 3845 | and '\0' are char_len == 1. */ |
3842 | /* '.' accepts any one character except the following two cases. */ | 3846 | /* '.' accepts any one character except the following two cases. */ |
@@ -3949,15 +3953,20 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3949 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | 3953 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
3950 | indirect = (const int32_t *) | 3954 | indirect = (const int32_t *) |
3951 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | 3955 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
3952 | idx = findidx (&cp); | 3956 | int32_t idx = findidx (&cp); |
3953 | if (idx > 0) | 3957 | if (idx > 0) |
3954 | for (i = 0; i < cset->nequiv_classes; ++i) | 3958 | for (i = 0; i < cset->nequiv_classes; ++i) |
3955 | { | 3959 | { |
3956 | int32_t equiv_class_idx = cset->equiv_classes[i]; | 3960 | int32_t equiv_class_idx = cset->equiv_classes[i]; |
3957 | size_t weight_len = weights[idx]; | 3961 | size_t weight_len = weights[idx & 0xffffff]; |
3958 | if (weight_len == weights[equiv_class_idx]) | 3962 | if (weight_len == weights[equiv_class_idx & 0xffffff] |
3963 | && (idx >> 24) == (equiv_class_idx >> 24)) | ||
3959 | { | 3964 | { |
3960 | Idx cnt = 0; | 3965 | Idx cnt = 0; |
3966 | |||
3967 | idx &= 0xffffff; | ||
3968 | equiv_class_idx &= 0xffffff; | ||
3969 | |||
3961 | while (cnt <= weight_len | 3970 | while (cnt <= weight_len |
3962 | && (weights[equiv_class_idx + 1 + cnt] | 3971 | && (weights[equiv_class_idx + 1 + cnt] |
3963 | == weights[idx + 1 + cnt])) | 3972 | == weights[idx + 1 + cnt])) |
@@ -4123,7 +4132,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, | |||
4123 | /* Extend the buffers, if the buffers have run out. */ | 4132 | /* Extend the buffers, if the buffers have run out. */ |
4124 | 4133 | ||
4125 | static reg_errcode_t | 4134 | static reg_errcode_t |
4126 | internal_function | 4135 | internal_function __attribute_warn_unused_result__ |
4127 | extend_buffers (re_match_context_t *mctx) | 4136 | extend_buffers (re_match_context_t *mctx) |
4128 | { | 4137 | { |
4129 | reg_errcode_t ret; | 4138 | reg_errcode_t ret; |
@@ -4186,7 +4195,7 @@ extend_buffers (re_match_context_t *mctx) | |||
4186 | /* Initialize MCTX. */ | 4195 | /* Initialize MCTX. */ |
4187 | 4196 | ||
4188 | static reg_errcode_t | 4197 | static reg_errcode_t |
4189 | internal_function | 4198 | internal_function __attribute_warn_unused_result__ |
4190 | match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) | 4199 | match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) |
4191 | { | 4200 | { |
4192 | mctx->eflags = eflags; | 4201 | mctx->eflags = eflags; |
@@ -4266,7 +4275,7 @@ match_ctx_free (re_match_context_t *mctx) | |||
4266 | */ | 4275 | */ |
4267 | 4276 | ||
4268 | static reg_errcode_t | 4277 | static reg_errcode_t |
4269 | internal_function | 4278 | internal_function __attribute_warn_unused_result__ |
4270 | match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, | 4279 | match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, |
4271 | Idx to) | 4280 | Idx to) |
4272 | { | 4281 | { |
@@ -4338,7 +4347,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) | |||
4338 | at STR_IDX. */ | 4347 | at STR_IDX. */ |
4339 | 4348 | ||
4340 | static reg_errcode_t | 4349 | static reg_errcode_t |
4341 | internal_function | 4350 | internal_function __attribute_warn_unused_result__ |
4342 | match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) | 4351 | match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) |
4343 | { | 4352 | { |
4344 | #ifdef DEBUG | 4353 | #ifdef DEBUG |