diff options
author | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-08-19 23:27:12 +0200 |
---|---|---|
committer | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-08-19 23:27:12 +0200 |
commit | 26fbe7f1e68bb0c96da32491efcf3696fe6c299b (patch) | |
tree | c4d95289187a64e9c7517bf73d8208026c3d2fb3 /gl/regexec.c | |
parent | 5f79e3e9f62ca5487d9881973149136ba1d19d3e (diff) | |
download | monitoring-plugins-26fbe7f1e68bb0c96da32491efcf3696fe6c299b.tar.gz |
Sync with the latest Gnulib code (6f2d632)
Diffstat (limited to 'gl/regexec.c')
-rw-r--r-- | gl/regexec.c | 214 |
1 files changed, 104 insertions, 110 deletions
diff --git a/gl/regexec.c b/gl/regexec.c index dc449ce5..d29d442b 100644 --- a/gl/regexec.c +++ b/gl/regexec.c | |||
@@ -1,22 +1,21 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free | 2 | Copyright (C) 2002-2013 Free Software Foundation, Inc. |
3 | Software Foundation, Inc. | ||
4 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
6 | 5 | ||
7 | This program is free software; you can redistribute it and/or modify | 6 | The GNU C Library is free software; you can redistribute it and/or |
8 | it under the terms of the GNU General Public License as published by | 7 | modify it under the terms of the GNU General Public |
9 | the Free Software Foundation; either version 3, or (at your option) | 8 | License as published by the Free Software Foundation; either |
10 | any later version. | 9 | version 3 of the License, or (at your option) any later version. |
11 | 10 | ||
12 | This program is distributed in the hope that it will be useful, | 11 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | GNU General Public License for more details. | 14 | General Public License for more details. |
16 | 15 | ||
17 | You should have received a copy of the GNU General Public License along | 16 | You should have received a copy of the GNU General Public |
18 | with this program; if not, write to the Free Software Foundation, | 17 | License along with the GNU C Library; if not, see |
19 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ | 18 | <http://www.gnu.org/licenses/>. */ |
20 | 19 | ||
21 | static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, | 20 | static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, |
22 | Idx n) internal_function; | 21 | Idx n) internal_function; |
@@ -52,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp, | |||
52 | regoff_t range, Idx stop, | 51 | regoff_t range, Idx stop, |
53 | struct re_registers *regs, | 52 | struct re_registers *regs, |
54 | bool ret_len) internal_function; | 53 | bool ret_len) internal_function; |
55 | static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, | 54 | static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, |
56 | Idx nregs, int regs_allocated) | 55 | Idx nregs, int regs_allocated) internal_function; |
57 | internal_function; | ||
58 | static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) | 56 | static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) |
59 | internal_function; | 57 | internal_function; |
60 | static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, | 58 | static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, |
@@ -201,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, | |||
201 | static bool check_node_accept (const re_match_context_t *mctx, | 199 | static bool check_node_accept (const re_match_context_t *mctx, |
202 | const re_token_t *node, Idx idx) | 200 | const re_token_t *node, Idx idx) |
203 | internal_function; | 201 | internal_function; |
204 | static reg_errcode_t extend_buffers (re_match_context_t *mctx) | 202 | static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) |
205 | internal_function; | 203 | internal_function; |
206 | 204 | ||
207 | /* Entry point for POSIX code. */ | 205 | /* Entry point for POSIX code. */ |
@@ -210,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx) | |||
210 | string STRING. | 208 | string STRING. |
211 | 209 | ||
212 | If NMATCH is zero or REG_NOSUB was set in the cflags argument to | 210 | If NMATCH is zero or REG_NOSUB was set in the cflags argument to |
213 | `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | 211 | 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at |
214 | least NMATCH elements, and we set them to the offsets of the | 212 | least NMATCH elements, and we set them to the offsets of the |
215 | corresponding matched substrings. | 213 | corresponding matched substrings. |
216 | 214 | ||
217 | EFLAGS specifies `execution flags' which affect matching: if | 215 | EFLAGS specifies "execution flags" which affect matching: if |
218 | REG_NOTBOL is set, then ^ does not match at the beginning of the | 216 | REG_NOTBOL is set, then ^ does not match at the beginning of the |
219 | string; if REG_NOTEOL is set, then $ does not match at the end. | 217 | string; if REG_NOTEOL is set, then $ does not match at the end. |
220 | 218 | ||
@@ -230,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
230 | { | 228 | { |
231 | reg_errcode_t err; | 229 | reg_errcode_t err; |
232 | Idx start, length; | 230 | Idx start, length; |
233 | #ifdef _LIBC | 231 | re_dfa_t *dfa = preg->buffer; |
234 | re_dfa_t *dfa = (re_dfa_t *) preg->buffer; | ||
235 | #endif | ||
236 | 232 | ||
237 | if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) | 233 | if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) |
238 | return REG_BADPAT; | 234 | return REG_BADPAT; |
@@ -248,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags) | |||
248 | length = strlen (string); | 244 | length = strlen (string); |
249 | } | 245 | } |
250 | 246 | ||
251 | __libc_lock_lock (dfa->lock); | 247 | lock_lock (dfa->lock); |
252 | if (preg->no_sub) | 248 | if (preg->no_sub) |
253 | err = re_search_internal (preg, string, length, start, length, | 249 | err = re_search_internal (preg, string, length, start, length, |
254 | length, 0, NULL, eflags); | 250 | length, 0, NULL, eflags); |
255 | else | 251 | else |
256 | err = re_search_internal (preg, string, length, start, length, | 252 | err = re_search_internal (preg, string, length, start, length, |
257 | length, nmatch, pmatch, eflags); | 253 | length, nmatch, pmatch, eflags); |
258 | __libc_lock_unlock (dfa->lock); | 254 | lock_unlock (dfa->lock); |
259 | return err != REG_NOERROR; | 255 | return err != REG_NOERROR; |
260 | } | 256 | } |
261 | 257 | ||
@@ -366,7 +362,6 @@ weak_alias (__re_search_2, re_search_2) | |||
366 | #endif | 362 | #endif |
367 | 363 | ||
368 | static regoff_t | 364 | static regoff_t |
369 | internal_function | ||
370 | re_search_2_stub (struct re_pattern_buffer *bufp, | 365 | re_search_2_stub (struct re_pattern_buffer *bufp, |
371 | const char *string1, Idx length1, | 366 | const char *string1, Idx length1, |
372 | const char *string2, Idx length2, | 367 | const char *string2, Idx length2, |
@@ -414,7 +409,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp, | |||
414 | otherwise the position of the match is returned. */ | 409 | otherwise the position of the match is returned. */ |
415 | 410 | ||
416 | static regoff_t | 411 | static regoff_t |
417 | internal_function | ||
418 | re_search_stub (struct re_pattern_buffer *bufp, | 412 | re_search_stub (struct re_pattern_buffer *bufp, |
419 | const char *string, Idx length, | 413 | const char *string, Idx length, |
420 | Idx start, regoff_t range, Idx stop, struct re_registers *regs, | 414 | Idx start, regoff_t range, Idx stop, struct re_registers *regs, |
@@ -425,9 +419,7 @@ re_search_stub (struct re_pattern_buffer *bufp, | |||
425 | Idx nregs; | 419 | Idx nregs; |
426 | regoff_t rval; | 420 | regoff_t rval; |
427 | int eflags = 0; | 421 | int eflags = 0; |
428 | #ifdef _LIBC | 422 | re_dfa_t *dfa = bufp->buffer; |
429 | re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; | ||
430 | #endif | ||
431 | Idx last_start = start + range; | 423 | Idx last_start = start + range; |
432 | 424 | ||
433 | /* Check for out-of-range. */ | 425 | /* Check for out-of-range. */ |
@@ -438,7 +430,7 @@ re_search_stub (struct re_pattern_buffer *bufp, | |||
438 | else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) | 430 | else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) |
439 | last_start = 0; | 431 | last_start = 0; |
440 | 432 | ||
441 | __libc_lock_lock (dfa->lock); | 433 | lock_lock (dfa->lock); |
442 | 434 | ||
443 | eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; | 435 | eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; |
444 | eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; | 436 | eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; |
@@ -478,9 +470,9 @@ re_search_stub (struct re_pattern_buffer *bufp, | |||
478 | 470 | ||
479 | rval = 0; | 471 | rval = 0; |
480 | 472 | ||
481 | /* I hope we needn't fill ther regs with -1's when no match was found. */ | 473 | /* I hope we needn't fill their regs with -1's when no match was found. */ |
482 | if (result != REG_NOERROR) | 474 | if (result != REG_NOERROR) |
483 | rval = -1; | 475 | rval = result == REG_NOMATCH ? -1 : -2; |
484 | else if (regs != NULL) | 476 | else if (regs != NULL) |
485 | { | 477 | { |
486 | /* If caller wants register contents data back, copy them. */ | 478 | /* If caller wants register contents data back, copy them. */ |
@@ -502,19 +494,18 @@ re_search_stub (struct re_pattern_buffer *bufp, | |||
502 | } | 494 | } |
503 | re_free (pmatch); | 495 | re_free (pmatch); |
504 | out: | 496 | out: |
505 | __libc_lock_unlock (dfa->lock); | 497 | lock_unlock (dfa->lock); |
506 | return rval; | 498 | return rval; |
507 | } | 499 | } |
508 | 500 | ||
509 | static unsigned int | 501 | static unsigned |
510 | internal_function | ||
511 | re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, | 502 | re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, |
512 | int regs_allocated) | 503 | int regs_allocated) |
513 | { | 504 | { |
514 | int rval = REGS_REALLOCATE; | 505 | int rval = REGS_REALLOCATE; |
515 | Idx i; | 506 | Idx i; |
516 | Idx need_regs = nregs + 1; | 507 | Idx need_regs = nregs + 1; |
517 | /* We need one extra element beyond `num_regs' for the `-1' marker GNU code | 508 | /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code |
518 | uses. */ | 509 | uses. */ |
519 | 510 | ||
520 | /* Have the register data arrays been allocated? */ | 511 | /* Have the register data arrays been allocated? */ |
@@ -637,7 +628,7 @@ re_exec (s) | |||
637 | (0 <= LAST_START && LAST_START <= LENGTH) */ | 628 | (0 <= LAST_START && LAST_START <= LENGTH) */ |
638 | 629 | ||
639 | static reg_errcode_t | 630 | static reg_errcode_t |
640 | internal_function __attribute_warn_unused_result__ | 631 | __attribute_warn_unused_result__ |
641 | re_search_internal (const regex_t *preg, | 632 | re_search_internal (const regex_t *preg, |
642 | const char *string, Idx length, | 633 | const char *string, Idx length, |
643 | Idx start, Idx last_start, Idx stop, | 634 | Idx start, Idx last_start, Idx stop, |
@@ -645,7 +636,7 @@ re_search_internal (const regex_t *preg, | |||
645 | int eflags) | 636 | int eflags) |
646 | { | 637 | { |
647 | reg_errcode_t err; | 638 | reg_errcode_t err; |
648 | const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; | 639 | const re_dfa_t *dfa = preg->buffer; |
649 | Idx left_lim, right_lim; | 640 | Idx left_lim, right_lim; |
650 | int incr; | 641 | int incr; |
651 | bool fl_longest_match; | 642 | bool fl_longest_match; |
@@ -720,7 +711,8 @@ re_search_internal (const regex_t *preg, | |||
720 | if (nmatch > 1 || dfa->has_mb_node) | 711 | if (nmatch > 1 || dfa->has_mb_node) |
721 | { | 712 | { |
722 | /* Avoid overflow. */ | 713 | /* Avoid overflow. */ |
723 | if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) | 714 | if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) |
715 | <= mctx.input.bufs_len), 0)) | ||
724 | { | 716 | { |
725 | err = REG_ESPACE; | 717 | err = REG_ESPACE; |
726 | goto free_return; | 718 | goto free_return; |
@@ -740,7 +732,7 @@ re_search_internal (const regex_t *preg, | |||
740 | mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF | 732 | mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF |
741 | : CONTEXT_NEWLINE | CONTEXT_BEGBUF; | 733 | : CONTEXT_NEWLINE | CONTEXT_BEGBUF; |
742 | 734 | ||
743 | /* Check incrementally whether of not the input string match. */ | 735 | /* Check incrementally whether the input string matches. */ |
744 | incr = (last_start < start) ? -1 : 1; | 736 | incr = (last_start < start) ? -1 : 1; |
745 | left_lim = (last_start < start) ? last_start : start; | 737 | left_lim = (last_start < start) ? last_start : start; |
746 | right_lim = (last_start < start) ? start : last_start; | 738 | right_lim = (last_start < start) ? start : last_start; |
@@ -922,7 +914,7 @@ re_search_internal (const regex_t *preg, | |||
922 | goto free_return; | 914 | goto free_return; |
923 | } | 915 | } |
924 | 916 | ||
925 | /* At last, add the offset to the each registers, since we slided | 917 | /* At last, add the offset to each register, since we slid |
926 | the buffers so that we could assume that the matching starts | 918 | the buffers so that we could assume that the matching starts |
927 | from 0. */ | 919 | from 0. */ |
928 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) | 920 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) |
@@ -972,7 +964,7 @@ re_search_internal (const regex_t *preg, | |||
972 | } | 964 | } |
973 | 965 | ||
974 | static reg_errcode_t | 966 | static reg_errcode_t |
975 | internal_function __attribute_warn_unused_result__ | 967 | __attribute_warn_unused_result__ |
976 | prune_impossible_nodes (re_match_context_t *mctx) | 968 | prune_impossible_nodes (re_match_context_t *mctx) |
977 | { | 969 | { |
978 | const re_dfa_t *const dfa = mctx->dfa; | 970 | const re_dfa_t *const dfa = mctx->dfa; |
@@ -988,7 +980,7 @@ prune_impossible_nodes (re_match_context_t *mctx) | |||
988 | halt_node = mctx->last_node; | 980 | halt_node = mctx->last_node; |
989 | 981 | ||
990 | /* Avoid overflow. */ | 982 | /* Avoid overflow. */ |
991 | if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) | 983 | if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) |
992 | return REG_ESPACE; | 984 | return REG_ESPACE; |
993 | 985 | ||
994 | sifted_states = re_malloc (re_dfastate_t *, match_last + 1); | 986 | sifted_states = re_malloc (re_dfastate_t *, match_last + 1); |
@@ -1068,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx) | |||
1068 | since initial states may have constraints like "\<", "^", etc.. */ | 1060 | since initial states may have constraints like "\<", "^", etc.. */ |
1069 | 1061 | ||
1070 | static inline re_dfastate_t * | 1062 | static inline re_dfastate_t * |
1071 | __attribute ((always_inline)) internal_function | 1063 | __attribute__ ((always_inline)) internal_function |
1072 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, | 1064 | acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, |
1073 | Idx idx) | 1065 | Idx idx) |
1074 | { | 1066 | { |
@@ -1106,7 +1098,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, | |||
1106 | FL_LONGEST_MATCH means we want the POSIX longest matching. | 1098 | FL_LONGEST_MATCH means we want the POSIX longest matching. |
1107 | If P_MATCH_FIRST is not NULL, and the match fails, it is set to the | 1099 | If P_MATCH_FIRST is not NULL, and the match fails, it is set to the |
1108 | next place where we may want to try matching. | 1100 | next place where we may want to try matching. |
1109 | Note that the matcher assume that the maching starts from the current | 1101 | Note that the matcher assumes that the matching starts from the current |
1110 | index of the buffer. */ | 1102 | index of the buffer. */ |
1111 | 1103 | ||
1112 | static Idx | 1104 | static Idx |
@@ -1175,11 +1167,12 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, | |||
1175 | re_dfastate_t *old_state = cur_state; | 1167 | re_dfastate_t *old_state = cur_state; |
1176 | Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; | 1168 | Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; |
1177 | 1169 | ||
1178 | if (BE (next_char_idx >= mctx->input.bufs_len, 0) | 1170 | if ((BE (next_char_idx >= mctx->input.bufs_len, 0) |
1171 | && mctx->input.bufs_len < mctx->input.len) | ||
1179 | || (BE (next_char_idx >= mctx->input.valid_len, 0) | 1172 | || (BE (next_char_idx >= mctx->input.valid_len, 0) |
1180 | && mctx->input.valid_len < mctx->input.len)) | 1173 | && mctx->input.valid_len < mctx->input.len)) |
1181 | { | 1174 | { |
1182 | err = extend_buffers (mctx); | 1175 | err = extend_buffers (mctx, next_char_idx + 1); |
1183 | if (BE (err != REG_NOERROR, 0)) | 1176 | if (BE (err != REG_NOERROR, 0)) |
1184 | { | 1177 | { |
1185 | assert (err == REG_ESPACE); | 1178 | assert (err == REG_ESPACE); |
@@ -1436,7 +1429,7 @@ internal_function __attribute_warn_unused_result__ | |||
1436 | set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, | 1429 | set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, |
1437 | regmatch_t *pmatch, bool fl_backtrack) | 1430 | regmatch_t *pmatch, bool fl_backtrack) |
1438 | { | 1431 | { |
1439 | const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; | 1432 | const re_dfa_t *dfa = preg->buffer; |
1440 | Idx idx, cur_node; | 1433 | Idx idx, cur_node; |
1441 | re_node_set eps_via_nodes; | 1434 | re_node_set eps_via_nodes; |
1442 | struct re_fail_stack_t *fs; | 1435 | struct re_fail_stack_t *fs; |
@@ -1608,21 +1601,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, | |||
1608 | and sift the nodes in each states according to the following rules. | 1601 | and sift the nodes in each states according to the following rules. |
1609 | Updated state_log will be wrote to STATE_LOG. | 1602 | Updated state_log will be wrote to STATE_LOG. |
1610 | 1603 | ||
1611 | Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... | 1604 | Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... |
1612 | 1. When STR_IDX == MATCH_LAST(the last index in the state_log): | 1605 | 1. When STR_IDX == MATCH_LAST(the last index in the state_log): |
1613 | If `a' isn't the LAST_NODE and `a' can't epsilon transit to | 1606 | If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to |
1614 | the LAST_NODE, we throw away the node `a'. | 1607 | the LAST_NODE, we throw away the node 'a'. |
1615 | 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts | 1608 | 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts |
1616 | string `s' and transit to `b': | 1609 | string 's' and transit to 'b': |
1617 | i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw | 1610 | i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw |
1618 | away the node `a'. | 1611 | away the node 'a'. |
1619 | ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is | 1612 | ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is |
1620 | thrown away, we throw away the node `a'. | 1613 | thrown away, we throw away the node 'a'. |
1621 | 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': | 1614 | 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': |
1622 | i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the | 1615 | i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the |
1623 | node `a'. | 1616 | node 'a'. |
1624 | ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, | 1617 | ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, |
1625 | we throw away the node `a'. */ | 1618 | we throw away the node 'a'. */ |
1626 | 1619 | ||
1627 | #define STATE_NODE_CONTAINS(state,node) \ | 1620 | #define STATE_NODE_CONTAINS(state,node) \ |
1628 | ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) | 1621 | ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) |
@@ -1695,11 +1688,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
1695 | Idx i; | 1688 | Idx i; |
1696 | 1689 | ||
1697 | /* Then build the next sifted state. | 1690 | /* Then build the next sifted state. |
1698 | We build the next sifted state on `cur_dest', and update | 1691 | We build the next sifted state on 'cur_dest', and update |
1699 | `sifted_states[str_idx]' with `cur_dest'. | 1692 | 'sifted_states[str_idx]' with 'cur_dest'. |
1700 | Note: | 1693 | Note: |
1701 | `cur_dest' is the sifted state from `state_log[str_idx + 1]'. | 1694 | 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. |
1702 | `cur_src' points the node_set of the old `state_log[str_idx]' | 1695 | 'cur_src' points the node_set of the old 'state_log[str_idx]' |
1703 | (with the epsilon nodes pre-filtered out). */ | 1696 | (with the epsilon nodes pre-filtered out). */ |
1704 | for (i = 0; i < cur_src->nelem; i++) | 1697 | for (i = 0; i < cur_src->nelem; i++) |
1705 | { | 1698 | { |
@@ -1712,7 +1705,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
1712 | assert (!IS_EPSILON_NODE (type)); | 1705 | assert (!IS_EPSILON_NODE (type)); |
1713 | #endif | 1706 | #endif |
1714 | #ifdef RE_ENABLE_I18N | 1707 | #ifdef RE_ENABLE_I18N |
1715 | /* If the node may accept `multi byte'. */ | 1708 | /* If the node may accept "multi byte". */ |
1716 | if (dfa->nodes[prev_node].accept_mb) | 1709 | if (dfa->nodes[prev_node].accept_mb) |
1717 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, | 1710 | naccepted = sift_states_iter_mb (mctx, sctx, prev_node, |
1718 | str_idx, sctx->last_str_idx); | 1711 | str_idx, sctx->last_str_idx); |
@@ -1753,12 +1746,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) | |||
1753 | { | 1746 | { |
1754 | Idx top = mctx->state_log_top; | 1747 | Idx top = mctx->state_log_top; |
1755 | 1748 | ||
1756 | if (next_state_log_idx >= mctx->input.bufs_len | 1749 | if ((next_state_log_idx >= mctx->input.bufs_len |
1750 | && mctx->input.bufs_len < mctx->input.len) | ||
1757 | || (next_state_log_idx >= mctx->input.valid_len | 1751 | || (next_state_log_idx >= mctx->input.valid_len |
1758 | && mctx->input.valid_len < mctx->input.len)) | 1752 | && mctx->input.valid_len < mctx->input.len)) |
1759 | { | 1753 | { |
1760 | reg_errcode_t err; | 1754 | reg_errcode_t err; |
1761 | err = extend_buffers (mctx); | 1755 | err = extend_buffers (mctx, next_state_log_idx + 1); |
1762 | if (BE (err != REG_NOERROR, 0)) | 1756 | if (BE (err != REG_NOERROR, 0)) |
1763 | return err; | 1757 | return err; |
1764 | } | 1758 | } |
@@ -2268,17 +2262,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | |||
2268 | { | 2262 | { |
2269 | const re_dfa_t *const dfa = mctx->dfa; | 2263 | const re_dfa_t *const dfa = mctx->dfa; |
2270 | int naccepted; | 2264 | int naccepted; |
2271 | /* Check the node can accept `multi byte'. */ | 2265 | /* Check the node can accept "multi byte". */ |
2272 | naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); | 2266 | naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); |
2273 | if (naccepted > 0 && str_idx + naccepted <= max_str_idx && | 2267 | if (naccepted > 0 && str_idx + naccepted <= max_str_idx && |
2274 | !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], | 2268 | !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], |
2275 | dfa->nexts[node_idx])) | 2269 | dfa->nexts[node_idx])) |
2276 | /* The node can't accept the `multi byte', or the | 2270 | /* The node can't accept the "multi byte", or the |
2277 | destination was already thrown away, then the node | 2271 | destination was already thrown away, then the node |
2278 | could't accept the current input `multi byte'. */ | 2272 | could't accept the current input "multi byte". */ |
2279 | naccepted = 0; | 2273 | naccepted = 0; |
2280 | /* Otherwise, it is sure that the node could accept | 2274 | /* Otherwise, it is sure that the node could accept |
2281 | `naccepted' bytes input. */ | 2275 | 'naccepted' bytes input. */ |
2282 | return naccepted; | 2276 | return naccepted; |
2283 | } | 2277 | } |
2284 | #endif /* RE_ENABLE_I18N */ | 2278 | #endif /* RE_ENABLE_I18N */ |
@@ -2457,7 +2451,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) | |||
2457 | /* From the node set CUR_NODES, pick up the nodes whose types are | 2451 | /* From the node set CUR_NODES, pick up the nodes whose types are |
2458 | OP_OPEN_SUBEXP and which have corresponding back references in the regular | 2452 | OP_OPEN_SUBEXP and which have corresponding back references in the regular |
2459 | expression. And register them to use them later for evaluating the | 2453 | expression. And register them to use them later for evaluating the |
2460 | correspoding back references. */ | 2454 | corresponding back references. */ |
2461 | 2455 | ||
2462 | static reg_errcode_t | 2456 | static reg_errcode_t |
2463 | internal_function | 2457 | internal_function |
@@ -2568,7 +2562,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) | |||
2568 | if (naccepted == 0) | 2562 | if (naccepted == 0) |
2569 | continue; | 2563 | continue; |
2570 | 2564 | ||
2571 | /* The node can accepts `naccepted' bytes. */ | 2565 | /* The node can accepts 'naccepted' bytes. */ |
2572 | dest_idx = re_string_cur_idx (&mctx->input) + naccepted; | 2566 | dest_idx = re_string_cur_idx (&mctx->input) + naccepted; |
2573 | mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted | 2567 | mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted |
2574 | : mctx->max_mb_elem_len); | 2568 | : mctx->max_mb_elem_len); |
@@ -2620,7 +2614,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | |||
2620 | const re_token_t *node = dfa->nodes + node_idx; | 2614 | const re_token_t *node = dfa->nodes + node_idx; |
2621 | re_node_set *new_dest_nodes; | 2615 | re_node_set *new_dest_nodes; |
2622 | 2616 | ||
2623 | /* Check whether `node' is a backreference or not. */ | 2617 | /* Check whether 'node' is a backreference or not. */ |
2624 | if (node->type != OP_BACK_REF) | 2618 | if (node->type != OP_BACK_REF) |
2625 | continue; | 2619 | continue; |
2626 | 2620 | ||
@@ -2632,14 +2626,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | |||
2632 | continue; | 2626 | continue; |
2633 | } | 2627 | } |
2634 | 2628 | ||
2635 | /* `node' is a backreference. | 2629 | /* 'node' is a backreference. |
2636 | Check the substring which the substring matched. */ | 2630 | Check the substring which the substring matched. */ |
2637 | bkc_idx = mctx->nbkref_ents; | 2631 | bkc_idx = mctx->nbkref_ents; |
2638 | err = get_subexp (mctx, node_idx, cur_str_idx); | 2632 | err = get_subexp (mctx, node_idx, cur_str_idx); |
2639 | if (BE (err != REG_NOERROR, 0)) | 2633 | if (BE (err != REG_NOERROR, 0)) |
2640 | goto free_return; | 2634 | goto free_return; |
2641 | 2635 | ||
2642 | /* And add the epsilon closures (which is `new_dest_nodes') of | 2636 | /* And add the epsilon closures (which is 'new_dest_nodes') of |
2643 | the backreference to appropriate state_log. */ | 2637 | the backreference to appropriate state_log. */ |
2644 | #ifdef DEBUG | 2638 | #ifdef DEBUG |
2645 | assert (dfa->nexts[node_idx] != REG_MISSING); | 2639 | assert (dfa->nexts[node_idx] != REG_MISSING); |
@@ -2663,7 +2657,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) | |||
2663 | dest_state = mctx->state_log[dest_str_idx]; | 2657 | dest_state = mctx->state_log[dest_str_idx]; |
2664 | prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 | 2658 | prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 |
2665 | : mctx->state_log[cur_str_idx]->nodes.nelem); | 2659 | : mctx->state_log[cur_str_idx]->nodes.nelem); |
2666 | /* Add `new_dest_node' to state_log. */ | 2660 | /* Add 'new_dest_node' to state_log. */ |
2667 | if (dest_state == NULL) | 2661 | if (dest_state == NULL) |
2668 | { | 2662 | { |
2669 | mctx->state_log[dest_str_idx] | 2663 | mctx->state_log[dest_str_idx] |
@@ -2815,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) | |||
2815 | if (bkref_str_off >= mctx->input.len) | 2809 | if (bkref_str_off >= mctx->input.len) |
2816 | break; | 2810 | break; |
2817 | 2811 | ||
2818 | err = extend_buffers (mctx); | 2812 | err = extend_buffers (mctx, bkref_str_off + 1); |
2819 | if (BE (err != REG_NOERROR, 0)) | 2813 | if (BE (err != REG_NOERROR, 0)) |
2820 | return err; | 2814 | return err; |
2821 | 2815 | ||
@@ -2937,9 +2931,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, | |||
2937 | { | 2931 | { |
2938 | re_dfastate_t **new_array; | 2932 | re_dfastate_t **new_array; |
2939 | Idx old_alloc = path->alloc; | 2933 | Idx old_alloc = path->alloc; |
2940 | Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; | 2934 | Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; |
2941 | if (BE (new_alloc < old_alloc, 0) | 2935 | Idx new_alloc; |
2942 | || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) | 2936 | if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) |
2937 | return REG_ESPACE; | ||
2938 | new_alloc = old_alloc + incr_alloc; | ||
2939 | if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) | ||
2943 | return REG_ESPACE; | 2940 | return REG_ESPACE; |
2944 | new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); | 2941 | new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); |
2945 | if (BE (new_array == NULL, 0)) | 2942 | if (BE (new_array == NULL, 0)) |
@@ -3102,7 +3099,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, | |||
3102 | assert (!IS_EPSILON_NODE (type)); | 3099 | assert (!IS_EPSILON_NODE (type)); |
3103 | #endif | 3100 | #endif |
3104 | #ifdef RE_ENABLE_I18N | 3101 | #ifdef RE_ENABLE_I18N |
3105 | /* If the node may accept `multi byte'. */ | 3102 | /* If the node may accept "multi byte". */ |
3106 | if (dfa->nodes[cur_node].accept_mb) | 3103 | if (dfa->nodes[cur_node].accept_mb) |
3107 | { | 3104 | { |
3108 | naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, | 3105 | naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, |
@@ -3359,7 +3356,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) | |||
3359 | bitset_word_t elem, mask; | 3356 | bitset_word_t elem, mask; |
3360 | bool dests_node_malloced = false; | 3357 | bool dests_node_malloced = false; |
3361 | bool dest_states_malloced = false; | 3358 | bool dest_states_malloced = false; |
3362 | Idx ndests; /* Number of the destination states from `state'. */ | 3359 | Idx ndests; /* Number of the destination states from 'state'. */ |
3363 | re_dfastate_t **trtable; | 3360 | re_dfastate_t **trtable; |
3364 | re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; | 3361 | re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; |
3365 | re_node_set follows, *dests_node; | 3362 | re_node_set follows, *dests_node; |
@@ -3373,8 +3370,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) | |||
3373 | } *dests_alloc; | 3370 | } *dests_alloc; |
3374 | 3371 | ||
3375 | /* We build DFA states which corresponds to the destination nodes | 3372 | /* We build DFA states which corresponds to the destination nodes |
3376 | from `state'. `dests_node[i]' represents the nodes which i-th | 3373 | from 'state'. 'dests_node[i]' represents the nodes which i-th |
3377 | destination state contains, and `dests_ch[i]' represents the | 3374 | destination state contains, and 'dests_ch[i]' represents the |
3378 | characters which i-th destination state accepts. */ | 3375 | characters which i-th destination state accepts. */ |
3379 | if (__libc_use_alloca (sizeof (struct dests_alloc))) | 3376 | if (__libc_use_alloca (sizeof (struct dests_alloc))) |
3380 | dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); | 3377 | dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); |
@@ -3388,20 +3385,23 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) | |||
3388 | dests_node = dests_alloc->dests_node; | 3385 | dests_node = dests_alloc->dests_node; |
3389 | dests_ch = dests_alloc->dests_ch; | 3386 | dests_ch = dests_alloc->dests_ch; |
3390 | 3387 | ||
3391 | /* Initialize transiton table. */ | 3388 | /* Initialize transition table. */ |
3392 | state->word_trtable = state->trtable = NULL; | 3389 | state->word_trtable = state->trtable = NULL; |
3393 | 3390 | ||
3394 | /* At first, group all nodes belonging to `state' into several | 3391 | /* At first, group all nodes belonging to 'state' into several |
3395 | destinations. */ | 3392 | destinations. */ |
3396 | ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); | 3393 | ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); |
3397 | if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) | 3394 | if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) |
3398 | { | 3395 | { |
3399 | if (dests_node_malloced) | 3396 | if (dests_node_malloced) |
3400 | free (dests_alloc); | 3397 | free (dests_alloc); |
3398 | /* Return false in case of an error, true otherwise. */ | ||
3401 | if (ndests == 0) | 3399 | if (ndests == 0) |
3402 | { | 3400 | { |
3403 | state->trtable = (re_dfastate_t **) | 3401 | state->trtable = (re_dfastate_t **) |
3404 | calloc (sizeof (re_dfastate_t *), SBC_MAX); | 3402 | calloc (sizeof (re_dfastate_t *), SBC_MAX); |
3403 | if (BE (state->trtable == NULL, 0)) | ||
3404 | return false; | ||
3405 | return true; | 3405 | return true; |
3406 | } | 3406 | } |
3407 | return false; | 3407 | return false; |
@@ -3591,13 +3591,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3591 | reg_errcode_t err; | 3591 | reg_errcode_t err; |
3592 | bool ok; | 3592 | bool ok; |
3593 | Idx i, j, k; | 3593 | Idx i, j, k; |
3594 | Idx ndests; /* Number of the destinations from `state'. */ | 3594 | Idx ndests; /* Number of the destinations from 'state'. */ |
3595 | bitset_t accepts; /* Characters a node can accept. */ | 3595 | bitset_t accepts; /* Characters a node can accept. */ |
3596 | const re_node_set *cur_nodes = &state->nodes; | 3596 | const re_node_set *cur_nodes = &state->nodes; |
3597 | bitset_empty (accepts); | 3597 | bitset_empty (accepts); |
3598 | ndests = 0; | 3598 | ndests = 0; |
3599 | 3599 | ||
3600 | /* For all the nodes belonging to `state', */ | 3600 | /* For all the nodes belonging to 'state', */ |
3601 | for (i = 0; i < cur_nodes->nelem; ++i) | 3601 | for (i = 0; i < cur_nodes->nelem; ++i) |
3602 | { | 3602 | { |
3603 | re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; | 3603 | re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; |
@@ -3640,7 +3640,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3640 | else | 3640 | else |
3641 | continue; | 3641 | continue; |
3642 | 3642 | ||
3643 | /* Check the `accepts' and sift the characters which are not | 3643 | /* Check the 'accepts' and sift the characters which are not |
3644 | match it the context. */ | 3644 | match it the context. */ |
3645 | if (constraint) | 3645 | if (constraint) |
3646 | { | 3646 | { |
@@ -3699,7 +3699,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3699 | } | 3699 | } |
3700 | } | 3700 | } |
3701 | 3701 | ||
3702 | /* Then divide `accepts' into DFA states, or create a new | 3702 | /* Then divide 'accepts' into DFA states, or create a new |
3703 | state. Above, we make sure that accepts is not empty. */ | 3703 | state. Above, we make sure that accepts is not empty. */ |
3704 | for (j = 0; j < ndests; ++j) | 3704 | for (j = 0; j < ndests; ++j) |
3705 | { | 3705 | { |
@@ -3712,7 +3712,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3712 | if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) | 3712 | if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) |
3713 | continue; | 3713 | continue; |
3714 | 3714 | ||
3715 | /* Enumerate the intersection set of this state and `accepts'. */ | 3715 | /* Enumerate the intersection set of this state and 'accepts'. */ |
3716 | has_intersec = 0; | 3716 | has_intersec = 0; |
3717 | for (k = 0; k < BITSET_WORDS; ++k) | 3717 | for (k = 0; k < BITSET_WORDS; ++k) |
3718 | has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; | 3718 | has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; |
@@ -3720,7 +3720,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3720 | if (!has_intersec) | 3720 | if (!has_intersec) |
3721 | continue; | 3721 | continue; |
3722 | 3722 | ||
3723 | /* Then check if this state is a subset of `accepts'. */ | 3723 | /* Then check if this state is a subset of 'accepts'. */ |
3724 | not_subset = not_consumed = 0; | 3724 | not_subset = not_consumed = 0; |
3725 | for (k = 0; k < BITSET_WORDS; ++k) | 3725 | for (k = 0; k < BITSET_WORDS; ++k) |
3726 | { | 3726 | { |
@@ -3728,8 +3728,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3728 | not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; | 3728 | not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; |
3729 | } | 3729 | } |
3730 | 3730 | ||
3731 | /* If this state isn't a subset of `accepts', create a | 3731 | /* If this state isn't a subset of 'accepts', create a |
3732 | new group state, which has the `remains'. */ | 3732 | new group state, which has the 'remains'. */ |
3733 | if (not_subset) | 3733 | if (not_subset) |
3734 | { | 3734 | { |
3735 | bitset_copy (dests_ch[ndests], remains); | 3735 | bitset_copy (dests_ch[ndests], remains); |
@@ -3768,7 +3768,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |||
3768 | } | 3768 | } |
3769 | 3769 | ||
3770 | #ifdef RE_ENABLE_I18N | 3770 | #ifdef RE_ENABLE_I18N |
3771 | /* Check how many bytes the node `dfa->nodes[node_idx]' accepts. | 3771 | /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. |
3772 | Return the number of the bytes the node accepts. | 3772 | Return the number of the bytes the node accepts. |
3773 | STR_IDX is the current index of the input string. | 3773 | STR_IDX is the current index of the input string. |
3774 | 3774 | ||
@@ -3895,7 +3895,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3895 | const int32_t *table, *indirect; | 3895 | const int32_t *table, *indirect; |
3896 | const unsigned char *weights, *extra; | 3896 | const unsigned char *weights, *extra; |
3897 | const char *collseqwc; | 3897 | const char *collseqwc; |
3898 | int32_t idx; | ||
3899 | /* This #include defines a local function! */ | 3898 | /* This #include defines a local function! */ |
3900 | # include <locale/weight.h> | 3899 | # include <locale/weight.h> |
3901 | 3900 | ||
@@ -3933,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3933 | in_collseq = find_collation_sequence_value (pin, elem_len); | 3932 | in_collseq = find_collation_sequence_value (pin, elem_len); |
3934 | } | 3933 | } |
3935 | /* match with range expression? */ | 3934 | /* match with range expression? */ |
3935 | /* FIXME: Implement rational ranges here, too. */ | ||
3936 | for (i = 0; i < cset->nranges; ++i) | 3936 | for (i = 0; i < cset->nranges; ++i) |
3937 | if (cset->range_starts[i] <= in_collseq | 3937 | if (cset->range_starts[i] <= in_collseq |
3938 | && in_collseq <= cset->range_ends[i]) | 3938 | && in_collseq <= cset->range_ends[i]) |
@@ -3953,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3953 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | 3953 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
3954 | indirect = (const int32_t *) | 3954 | indirect = (const int32_t *) |
3955 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | 3955 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
3956 | int32_t idx = findidx (&cp); | 3956 | int32_t idx = findidx (&cp, elem_len); |
3957 | if (idx > 0) | 3957 | if (idx > 0) |
3958 | for (i = 0; i < cset->nequiv_classes; ++i) | 3958 | for (i = 0; i < cset->nequiv_classes; ++i) |
3959 | { | 3959 | { |
@@ -3984,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, | |||
3984 | # endif /* _LIBC */ | 3984 | # endif /* _LIBC */ |
3985 | { | 3985 | { |
3986 | /* match with range expression? */ | 3986 | /* match with range expression? */ |
3987 | #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__) | ||
3988 | wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; | ||
3989 | #else | ||
3990 | wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; | ||
3991 | cmp_buf[2] = wc; | ||
3992 | #endif | ||
3993 | for (i = 0; i < cset->nranges; ++i) | 3987 | for (i = 0; i < cset->nranges; ++i) |
3994 | { | 3988 | { |
3995 | cmp_buf[0] = cset->range_starts[i]; | 3989 | if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) |
3996 | cmp_buf[4] = cset->range_ends[i]; | ||
3997 | if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 | ||
3998 | && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) | ||
3999 | { | 3990 | { |
4000 | match_len = char_len; | 3991 | match_len = char_len; |
4001 | goto check_node_accept_bytes_match; | 3992 | goto check_node_accept_bytes_match; |
@@ -4065,7 +4056,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) | |||
4065 | /* Skip the collation sequence value. */ | 4056 | /* Skip the collation sequence value. */ |
4066 | idx += sizeof (uint32_t); | 4057 | idx += sizeof (uint32_t); |
4067 | /* Skip the wide char sequence of the collating element. */ | 4058 | /* Skip the wide char sequence of the collating element. */ |
4068 | idx = idx + sizeof (uint32_t) * (extra[idx] + 1); | 4059 | idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); |
4069 | /* If we found the entry, return the sequence value. */ | 4060 | /* If we found the entry, return the sequence value. */ |
4070 | if (found) | 4061 | if (found) |
4071 | return *(uint32_t *) (extra + idx); | 4062 | return *(uint32_t *) (extra + idx); |
@@ -4133,17 +4124,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, | |||
4133 | 4124 | ||
4134 | static reg_errcode_t | 4125 | static reg_errcode_t |
4135 | internal_function __attribute_warn_unused_result__ | 4126 | internal_function __attribute_warn_unused_result__ |
4136 | extend_buffers (re_match_context_t *mctx) | 4127 | extend_buffers (re_match_context_t *mctx, int min_len) |
4137 | { | 4128 | { |
4138 | reg_errcode_t ret; | 4129 | reg_errcode_t ret; |
4139 | re_string_t *pstr = &mctx->input; | 4130 | re_string_t *pstr = &mctx->input; |
4140 | 4131 | ||
4141 | /* Avoid overflow. */ | 4132 | /* Avoid overflow. */ |
4142 | if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) | 4133 | if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 |
4134 | <= pstr->bufs_len, 0)) | ||
4143 | return REG_ESPACE; | 4135 | return REG_ESPACE; |
4144 | 4136 | ||
4145 | /* Double the lengthes of the buffers. */ | 4137 | /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ |
4146 | ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); | 4138 | ret = re_string_realloc_buffers (pstr, |
4139 | MAX (min_len, | ||
4140 | MIN (pstr->len, pstr->bufs_len * 2))); | ||
4147 | if (BE (ret != REG_NOERROR, 0)) | 4141 | if (BE (ret != REG_NOERROR, 0)) |
4148 | return ret; | 4142 | return ret; |
4149 | 4143 | ||
@@ -4206,7 +4200,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) | |||
4206 | size_t max_object_size = | 4200 | size_t max_object_size = |
4207 | MAX (sizeof (struct re_backref_cache_entry), | 4201 | MAX (sizeof (struct re_backref_cache_entry), |
4208 | sizeof (re_sub_match_top_t *)); | 4202 | sizeof (re_sub_match_top_t *)); |
4209 | if (BE (SIZE_MAX / max_object_size < n, 0)) | 4203 | if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) |
4210 | return REG_ESPACE; | 4204 | return REG_ESPACE; |
4211 | 4205 | ||
4212 | mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); | 4206 | mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); |