diff options
Diffstat (limited to 'gl/regex_internal.c')
-rw-r--r-- | gl/regex_internal.c | 64 |
1 files changed, 11 insertions, 53 deletions
diff --git a/gl/regex_internal.c b/gl/regex_internal.c index aefcfa2..0e6919f 100644 --- a/gl/regex_internal.c +++ b/gl/regex_internal.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002-2021 Free Software Foundation, Inc. | 2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
5 | 5 | ||
@@ -30,10 +30,8 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, | |||
30 | re_hashval_t hash); | 30 | re_hashval_t hash); |
31 | static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, | 31 | static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, |
32 | Idx new_buf_len); | 32 | Idx new_buf_len); |
33 | #ifdef RE_ENABLE_I18N | ||
34 | static void build_wcs_buffer (re_string_t *pstr); | 33 | static void build_wcs_buffer (re_string_t *pstr); |
35 | static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); | 34 | static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); |
36 | #endif /* RE_ENABLE_I18N */ | ||
37 | static void build_upper_buffer (re_string_t *pstr); | 35 | static void build_upper_buffer (re_string_t *pstr); |
38 | static void re_string_translate_buffer (re_string_t *pstr); | 36 | static void re_string_translate_buffer (re_string_t *pstr); |
39 | static unsigned int re_string_context_at (const re_string_t *input, Idx idx, | 37 | static unsigned int re_string_context_at (const re_string_t *input, Idx idx, |
@@ -91,7 +89,6 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len, | |||
91 | 89 | ||
92 | if (icase) | 90 | if (icase) |
93 | { | 91 | { |
94 | #ifdef RE_ENABLE_I18N | ||
95 | if (dfa->mb_cur_max > 1) | 92 | if (dfa->mb_cur_max > 1) |
96 | { | 93 | { |
97 | while (1) | 94 | while (1) |
@@ -109,16 +106,13 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len, | |||
109 | } | 106 | } |
110 | } | 107 | } |
111 | else | 108 | else |
112 | #endif /* RE_ENABLE_I18N */ | ||
113 | build_upper_buffer (pstr); | 109 | build_upper_buffer (pstr); |
114 | } | 110 | } |
115 | else | 111 | else |
116 | { | 112 | { |
117 | #ifdef RE_ENABLE_I18N | ||
118 | if (dfa->mb_cur_max > 1) | 113 | if (dfa->mb_cur_max > 1) |
119 | build_wcs_buffer (pstr); | 114 | build_wcs_buffer (pstr); |
120 | else | 115 | else |
121 | #endif /* RE_ENABLE_I18N */ | ||
122 | { | 116 | { |
123 | if (trans != NULL) | 117 | if (trans != NULL) |
124 | re_string_translate_buffer (pstr); | 118 | re_string_translate_buffer (pstr); |
@@ -139,7 +133,6 @@ static reg_errcode_t | |||
139 | __attribute_warn_unused_result__ | 133 | __attribute_warn_unused_result__ |
140 | re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) | 134 | re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) |
141 | { | 135 | { |
142 | #ifdef RE_ENABLE_I18N | ||
143 | if (pstr->mb_cur_max > 1) | 136 | if (pstr->mb_cur_max > 1) |
144 | { | 137 | { |
145 | wint_t *new_wcs; | 138 | wint_t *new_wcs; |
@@ -162,7 +155,6 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) | |||
162 | pstr->offsets = new_offsets; | 155 | pstr->offsets = new_offsets; |
163 | } | 156 | } |
164 | } | 157 | } |
165 | #endif /* RE_ENABLE_I18N */ | ||
166 | if (pstr->mbs_allocated) | 158 | if (pstr->mbs_allocated) |
167 | { | 159 | { |
168 | unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, | 160 | unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, |
@@ -194,7 +186,6 @@ re_string_construct_common (const char *str, Idx len, re_string_t *pstr, | |||
194 | pstr->raw_stop = pstr->stop; | 186 | pstr->raw_stop = pstr->stop; |
195 | } | 187 | } |
196 | 188 | ||
197 | #ifdef RE_ENABLE_I18N | ||
198 | 189 | ||
199 | /* Build wide character buffer PSTR->WCS. | 190 | /* Build wide character buffer PSTR->WCS. |
200 | If the byte sequence of the string are: | 191 | If the byte sequence of the string are: |
@@ -530,7 +521,6 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) | |||
530 | *last_wc = wc; | 521 | *last_wc = wc; |
531 | return rawbuf_idx; | 522 | return rawbuf_idx; |
532 | } | 523 | } |
533 | #endif /* RE_ENABLE_I18N */ | ||
534 | 524 | ||
535 | /* Build the buffer PSTR->MBS, and apply the translation if we need. | 525 | /* Build the buffer PSTR->MBS, and apply the translation if we need. |
536 | This function is used in case of REG_ICASE. */ | 526 | This function is used in case of REG_ICASE. */ |
@@ -585,10 +575,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
585 | else | 575 | else |
586 | { | 576 | { |
587 | /* Reset buffer. */ | 577 | /* Reset buffer. */ |
588 | #ifdef RE_ENABLE_I18N | ||
589 | if (pstr->mb_cur_max > 1) | 578 | if (pstr->mb_cur_max > 1) |
590 | memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); | 579 | memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); |
591 | #endif /* RE_ENABLE_I18N */ | ||
592 | pstr->len = pstr->raw_len; | 580 | pstr->len = pstr->raw_len; |
593 | pstr->stop = pstr->raw_stop; | 581 | pstr->stop = pstr->raw_stop; |
594 | pstr->valid_len = 0; | 582 | pstr->valid_len = 0; |
@@ -608,7 +596,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
608 | if (__glibc_likely (offset < pstr->valid_raw_len)) | 596 | if (__glibc_likely (offset < pstr->valid_raw_len)) |
609 | { | 597 | { |
610 | /* Yes, move them to the front of the buffer. */ | 598 | /* Yes, move them to the front of the buffer. */ |
611 | #ifdef RE_ENABLE_I18N | ||
612 | if (__glibc_unlikely (pstr->offsets_needed)) | 599 | if (__glibc_unlikely (pstr->offsets_needed)) |
613 | { | 600 | { |
614 | Idx low = 0, high = pstr->valid_len, mid; | 601 | Idx low = 0, high = pstr->valid_len, mid; |
@@ -672,15 +659,12 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
672 | } | 659 | } |
673 | } | 660 | } |
674 | else | 661 | else |
675 | #endif | ||
676 | { | 662 | { |
677 | pstr->tip_context = re_string_context_at (pstr, offset - 1, | 663 | pstr->tip_context = re_string_context_at (pstr, offset - 1, |
678 | eflags); | 664 | eflags); |
679 | #ifdef RE_ENABLE_I18N | ||
680 | if (pstr->mb_cur_max > 1) | 665 | if (pstr->mb_cur_max > 1) |
681 | memmove (pstr->wcs, pstr->wcs + offset, | 666 | memmove (pstr->wcs, pstr->wcs + offset, |
682 | (pstr->valid_len - offset) * sizeof (wint_t)); | 667 | (pstr->valid_len - offset) * sizeof (wint_t)); |
683 | #endif /* RE_ENABLE_I18N */ | ||
684 | if (__glibc_unlikely (pstr->mbs_allocated)) | 668 | if (__glibc_unlikely (pstr->mbs_allocated)) |
685 | memmove (pstr->mbs, pstr->mbs + offset, | 669 | memmove (pstr->mbs, pstr->mbs + offset, |
686 | pstr->valid_len - offset); | 670 | pstr->valid_len - offset); |
@@ -691,7 +675,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
691 | } | 675 | } |
692 | else | 676 | else |
693 | { | 677 | { |
694 | #ifdef RE_ENABLE_I18N | ||
695 | /* No, skip all characters until IDX. */ | 678 | /* No, skip all characters until IDX. */ |
696 | Idx prev_valid_len = pstr->valid_len; | 679 | Idx prev_valid_len = pstr->valid_len; |
697 | 680 | ||
@@ -701,9 +684,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
701 | pstr->stop = pstr->raw_stop - idx + offset; | 684 | pstr->stop = pstr->raw_stop - idx + offset; |
702 | pstr->offsets_needed = 0; | 685 | pstr->offsets_needed = 0; |
703 | } | 686 | } |
704 | #endif | ||
705 | pstr->valid_len = 0; | 687 | pstr->valid_len = 0; |
706 | #ifdef RE_ENABLE_I18N | ||
707 | if (pstr->mb_cur_max > 1) | 688 | if (pstr->mb_cur_max > 1) |
708 | { | 689 | { |
709 | Idx wcs_idx; | 690 | Idx wcs_idx; |
@@ -787,7 +768,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
787 | pstr->valid_raw_len = pstr->valid_len; | 768 | pstr->valid_raw_len = pstr->valid_len; |
788 | } | 769 | } |
789 | else | 770 | else |
790 | #endif /* RE_ENABLE_I18N */ | ||
791 | { | 771 | { |
792 | int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; | 772 | int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; |
793 | pstr->valid_raw_len = 0; | 773 | pstr->valid_raw_len = 0; |
@@ -807,7 +787,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
807 | pstr->stop -= offset; | 787 | pstr->stop -= offset; |
808 | 788 | ||
809 | /* Then build the buffers. */ | 789 | /* Then build the buffers. */ |
810 | #ifdef RE_ENABLE_I18N | ||
811 | if (pstr->mb_cur_max > 1) | 790 | if (pstr->mb_cur_max > 1) |
812 | { | 791 | { |
813 | if (pstr->icase) | 792 | if (pstr->icase) |
@@ -820,7 +799,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
820 | build_wcs_buffer (pstr); | 799 | build_wcs_buffer (pstr); |
821 | } | 800 | } |
822 | else | 801 | else |
823 | #endif /* RE_ENABLE_I18N */ | ||
824 | if (__glibc_unlikely (pstr->mbs_allocated)) | 802 | if (__glibc_unlikely (pstr->mbs_allocated)) |
825 | { | 803 | { |
826 | if (pstr->icase) | 804 | if (pstr->icase) |
@@ -846,28 +824,22 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx idx) | |||
846 | if (__glibc_likely (!pstr->mbs_allocated)) | 824 | if (__glibc_likely (!pstr->mbs_allocated)) |
847 | return re_string_peek_byte (pstr, idx); | 825 | return re_string_peek_byte (pstr, idx); |
848 | 826 | ||
849 | #ifdef RE_ENABLE_I18N | ||
850 | if (pstr->mb_cur_max > 1 | 827 | if (pstr->mb_cur_max > 1 |
851 | && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) | 828 | && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) |
852 | return re_string_peek_byte (pstr, idx); | 829 | return re_string_peek_byte (pstr, idx); |
853 | #endif | ||
854 | 830 | ||
855 | off = pstr->cur_idx + idx; | 831 | off = pstr->cur_idx + idx; |
856 | #ifdef RE_ENABLE_I18N | ||
857 | if (pstr->offsets_needed) | 832 | if (pstr->offsets_needed) |
858 | off = pstr->offsets[off]; | 833 | off = pstr->offsets[off]; |
859 | #endif | ||
860 | 834 | ||
861 | ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; | 835 | ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; |
862 | 836 | ||
863 | #ifdef RE_ENABLE_I18N | ||
864 | /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I | 837 | /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I |
865 | this function returns CAPITAL LETTER I instead of first byte of | 838 | this function returns CAPITAL LETTER I instead of first byte of |
866 | DOTLESS SMALL LETTER I. The latter would confuse the parser, | 839 | DOTLESS SMALL LETTER I. The latter would confuse the parser, |
867 | since peek_byte_case doesn't advance cur_idx in any way. */ | 840 | since peek_byte_case doesn't advance cur_idx in any way. */ |
868 | if (pstr->offsets_needed && !isascii (ch)) | 841 | if (pstr->offsets_needed && !isascii (ch)) |
869 | return re_string_peek_byte (pstr, idx); | 842 | return re_string_peek_byte (pstr, idx); |
870 | #endif | ||
871 | 843 | ||
872 | return ch; | 844 | return ch; |
873 | } | 845 | } |
@@ -878,7 +850,6 @@ re_string_fetch_byte_case (re_string_t *pstr) | |||
878 | if (__glibc_likely (!pstr->mbs_allocated)) | 850 | if (__glibc_likely (!pstr->mbs_allocated)) |
879 | return re_string_fetch_byte (pstr); | 851 | return re_string_fetch_byte (pstr); |
880 | 852 | ||
881 | #ifdef RE_ENABLE_I18N | ||
882 | if (pstr->offsets_needed) | 853 | if (pstr->offsets_needed) |
883 | { | 854 | { |
884 | Idx off; | 855 | Idx off; |
@@ -904,7 +875,6 @@ re_string_fetch_byte_case (re_string_t *pstr) | |||
904 | re_string_char_size_at (pstr, pstr->cur_idx)); | 875 | re_string_char_size_at (pstr, pstr->cur_idx)); |
905 | return ch; | 876 | return ch; |
906 | } | 877 | } |
907 | #endif | ||
908 | 878 | ||
909 | return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; | 879 | return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; |
910 | } | 880 | } |
@@ -912,10 +882,8 @@ re_string_fetch_byte_case (re_string_t *pstr) | |||
912 | static void | 882 | static void |
913 | re_string_destruct (re_string_t *pstr) | 883 | re_string_destruct (re_string_t *pstr) |
914 | { | 884 | { |
915 | #ifdef RE_ENABLE_I18N | ||
916 | re_free (pstr->wcs); | 885 | re_free (pstr->wcs); |
917 | re_free (pstr->offsets); | 886 | re_free (pstr->offsets); |
918 | #endif /* RE_ENABLE_I18N */ | ||
919 | if (pstr->mbs_allocated) | 887 | if (pstr->mbs_allocated) |
920 | re_free (pstr->mbs); | 888 | re_free (pstr->mbs); |
921 | } | 889 | } |
@@ -933,7 +901,6 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags) | |||
933 | if (__glibc_unlikely (idx == input->len)) | 901 | if (__glibc_unlikely (idx == input->len)) |
934 | return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF | 902 | return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF |
935 | : CONTEXT_NEWLINE | CONTEXT_ENDBUF); | 903 | : CONTEXT_NEWLINE | CONTEXT_ENDBUF); |
936 | #ifdef RE_ENABLE_I18N | ||
937 | if (input->mb_cur_max > 1) | 904 | if (input->mb_cur_max > 1) |
938 | { | 905 | { |
939 | wint_t wc; | 906 | wint_t wc; |
@@ -953,7 +920,6 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags) | |||
953 | ? CONTEXT_NEWLINE : 0); | 920 | ? CONTEXT_NEWLINE : 0); |
954 | } | 921 | } |
955 | else | 922 | else |
956 | #endif | ||
957 | { | 923 | { |
958 | c = re_string_byte_at (input, idx); | 924 | c = re_string_byte_at (input, idx); |
959 | if (bitset_contain (input->word_char, c)) | 925 | if (bitset_contain (input->word_char, c)) |
@@ -1430,32 +1396,28 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) | |||
1430 | if (__glibc_unlikely (new_nodes == NULL)) | 1396 | if (__glibc_unlikely (new_nodes == NULL)) |
1431 | return -1; | 1397 | return -1; |
1432 | dfa->nodes = new_nodes; | 1398 | dfa->nodes = new_nodes; |
1399 | dfa->nodes_alloc = new_nodes_alloc; | ||
1433 | new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); | 1400 | new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); |
1401 | if (new_nexts != NULL) | ||
1402 | dfa->nexts = new_nexts; | ||
1434 | new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); | 1403 | new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); |
1404 | if (new_indices != NULL) | ||
1405 | dfa->org_indices = new_indices; | ||
1435 | new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); | 1406 | new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); |
1407 | if (new_edests != NULL) | ||
1408 | dfa->edests = new_edests; | ||
1436 | new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); | 1409 | new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); |
1410 | if (new_eclosures != NULL) | ||
1411 | dfa->eclosures = new_eclosures; | ||
1437 | if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL | 1412 | if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL |
1438 | || new_edests == NULL || new_eclosures == NULL)) | 1413 | || new_edests == NULL || new_eclosures == NULL)) |
1439 | { | 1414 | return -1; |
1440 | re_free (new_nexts); | ||
1441 | re_free (new_indices); | ||
1442 | re_free (new_edests); | ||
1443 | re_free (new_eclosures); | ||
1444 | return -1; | ||
1445 | } | ||
1446 | dfa->nexts = new_nexts; | ||
1447 | dfa->org_indices = new_indices; | ||
1448 | dfa->edests = new_edests; | ||
1449 | dfa->eclosures = new_eclosures; | ||
1450 | dfa->nodes_alloc = new_nodes_alloc; | ||
1451 | } | 1415 | } |
1452 | dfa->nodes[dfa->nodes_len] = token; | 1416 | dfa->nodes[dfa->nodes_len] = token; |
1453 | dfa->nodes[dfa->nodes_len].constraint = 0; | 1417 | dfa->nodes[dfa->nodes_len].constraint = 0; |
1454 | #ifdef RE_ENABLE_I18N | ||
1455 | dfa->nodes[dfa->nodes_len].accept_mb = | 1418 | dfa->nodes[dfa->nodes_len].accept_mb = |
1456 | ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) | 1419 | ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) |
1457 | || token.type == COMPLEX_BRACKET); | 1420 | || token.type == COMPLEX_BRACKET); |
1458 | #endif | ||
1459 | dfa->nexts[dfa->nodes_len] = -1; | 1421 | dfa->nexts[dfa->nodes_len] = -1; |
1460 | re_node_set_init_empty (dfa->edests + dfa->nodes_len); | 1422 | re_node_set_init_empty (dfa->edests + dfa->nodes_len); |
1461 | re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); | 1423 | re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); |
@@ -1651,9 +1613,7 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, | |||
1651 | re_token_type_t type = node->type; | 1613 | re_token_type_t type = node->type; |
1652 | if (type == CHARACTER && !node->constraint) | 1614 | if (type == CHARACTER && !node->constraint) |
1653 | continue; | 1615 | continue; |
1654 | #ifdef RE_ENABLE_I18N | ||
1655 | newstate->accept_mb |= node->accept_mb; | 1616 | newstate->accept_mb |= node->accept_mb; |
1656 | #endif /* RE_ENABLE_I18N */ | ||
1657 | 1617 | ||
1658 | /* If the state has the halt node, the state is a halt state. */ | 1618 | /* If the state has the halt node, the state is a halt state. */ |
1659 | if (type == END_OF_RE) | 1619 | if (type == END_OF_RE) |
@@ -1705,9 +1665,7 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, | |||
1705 | 1665 | ||
1706 | if (type == CHARACTER && !constraint) | 1666 | if (type == CHARACTER && !constraint) |
1707 | continue; | 1667 | continue; |
1708 | #ifdef RE_ENABLE_I18N | ||
1709 | newstate->accept_mb |= node->accept_mb; | 1668 | newstate->accept_mb |= node->accept_mb; |
1710 | #endif /* RE_ENABLE_I18N */ | ||
1711 | 1669 | ||
1712 | /* If the state has the halt node, the state is a halt state. */ | 1670 | /* If the state has the halt node, the state is a halt state. */ |
1713 | if (type == END_OF_RE) | 1671 | if (type == END_OF_RE) |