summaryrefslogtreecommitdiffstats
path: root/gl/regex.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regex.h')
-rw-r--r--gl/regex.h259
1 files changed, 125 insertions, 134 deletions
diff --git a/gl/regex.h b/gl/regex.h
index d7426c76..854c6eda 100644
--- a/gl/regex.h
+++ b/gl/regex.h
@@ -1,23 +1,22 @@
1/* Definitions for data structures and routines for the regular 1/* Definitions for data structures and routines for the regular
2 expression library. 2 expression library.
3 Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997, 1998, 3 Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2013 Free Software
4 2000, 2001, 2002, 2003, 2005, 2006, 2009, 2010 Free Software Foundation, 4 Foundation, Inc.
5 Inc.
6 This file is part of the GNU C Library. 5 This file is part of the GNU C Library.
7 6
8 This program is free software; you can redistribute it and/or modify 7 The GNU C Library is free software; you can redistribute it and/or
9 it under the terms of the GNU General Public License as published by 8 modify it under the terms of the GNU General Public
10 the Free Software Foundation; either version 3, or (at your option) 9 License as published by the Free Software Foundation; either
11 any later version. 10 version 3 of the License, or (at your option) any later version.
12 11
13 This program is distributed in the hope that it will be useful, 12 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 GNU General Public License for more details. 15 General Public License for more details.
17 16
18 You should have received a copy of the GNU General Public License along 17 You should have received a copy of the GNU General Public
19 with this program; if not, write to the Free Software Foundation, 18 License along with the GNU C Library; if not, see
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 <http://www.gnu.org/licenses/>. */
21 20
22#ifndef _REGEX_H 21#ifndef _REGEX_H
23#define _REGEX_H 1 22#define _REGEX_H 1
@@ -29,13 +28,10 @@
29extern "C" { 28extern "C" {
30#endif 29#endif
31 30
32/* Define __USE_GNU_REGEX to declare GNU extensions that violate the 31/* Define __USE_GNU to declare GNU extensions that violate the
33 POSIX name space rules. */ 32 POSIX name space rules. */
34#undef __USE_GNU_REGEX 33#ifdef _GNU_SOURCE
35#if (defined _GNU_SOURCE \ 34# define __USE_GNU 1
36 || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \
37 && !defined _XOPEN_SOURCE))
38# define __USE_GNU_REGEX 1
39#endif 35#endif
40 36
41#ifdef _REGEX_LARGE_OFFSETS 37#ifdef _REGEX_LARGE_OFFSETS
@@ -46,16 +42,6 @@ extern "C" {
46 supported within glibc itself, and glibc users should not define 42 supported within glibc itself, and glibc users should not define
47 _REGEX_LARGE_OFFSETS. */ 43 _REGEX_LARGE_OFFSETS. */
48 44
49/* The type of the offset of a byte within a string.
50 For historical reasons POSIX 1003.1-2004 requires that regoff_t be
51 at least as wide as off_t. However, many common POSIX platforms set
52 regoff_t to the more-sensible ssize_t and the Open Group has
53 signalled its intention to change the requirement to be that
54 regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN
55 60 (2005-08-25). We don't know of any hosts where ssize_t or
56 ptrdiff_t is wider than ssize_t, so ssize_t is safe. */
57typedef ssize_t regoff_t;
58
59/* The type of nonnegative object indexes. Traditionally, GNU regex 45/* The type of nonnegative object indexes. Traditionally, GNU regex
60 uses 'int' for these. Code that uses __re_idx_t should work 46 uses 'int' for these. Code that uses __re_idx_t should work
61 regardless of whether the type is signed. */ 47 regardless of whether the type is signed. */
@@ -70,10 +56,8 @@ typedef size_t __re_long_size_t;
70 56
71#else 57#else
72 58
73/* Use types that are binary-compatible with the traditional GNU regex 59/* The traditional GNU regex implementation mishandles strings longer
74 implementation, which mishandles strings longer than INT_MAX. */ 60 than INT_MAX. */
75
76typedef int regoff_t;
77typedef int __re_idx_t; 61typedef int __re_idx_t;
78typedef unsigned int __re_size_t; 62typedef unsigned int __re_size_t;
79typedef unsigned long int __re_long_size_t; 63typedef unsigned long int __re_long_size_t;
@@ -94,8 +78,7 @@ typedef unsigned long int active_reg_t;
94 add or remove a bit, only one other definition need change. */ 78 add or remove a bit, only one other definition need change. */
95typedef unsigned long int reg_syntax_t; 79typedef unsigned long int reg_syntax_t;
96 80
97#ifdef __USE_GNU_REGEX 81#ifdef __USE_GNU
98
99/* If this bit is not set, then \ inside a bracket expression is literal. 82/* If this bit is not set, then \ inside a bracket expression is literal.
100 If set, then such a \ quotes the following character. */ 83 If set, then such a \ quotes the following character. */
101# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 84# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
@@ -114,10 +97,10 @@ typedef unsigned long int reg_syntax_t;
114/* If this bit is set, then ^ and $ are always anchors (outside bracket 97/* If this bit is set, then ^ and $ are always anchors (outside bracket
115 expressions, of course). 98 expressions, of course).
116 If this bit is not set, then it depends: 99 If this bit is not set, then it depends:
117 ^ is an anchor if it is at the beginning of a regular 100 ^ is an anchor if it is at the beginning of a regular
118 expression or after an open-group or an alternation operator; 101 expression or after an open-group or an alternation operator;
119 $ is an anchor if it is at the end of a regular expression, or 102 $ is an anchor if it is at the end of a regular expression, or
120 before a close-group or an alternation operator. 103 before a close-group or an alternation operator.
121 104
122 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 105 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
123 POSIX draft 11.2 says that * etc. in leading positions is undefined. 106 POSIX draft 11.2 says that * etc. in leading positions is undefined.
@@ -162,9 +145,9 @@ typedef unsigned long int reg_syntax_t;
162 If not set, newline is literal. */ 145 If not set, newline is literal. */
163# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 146# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
164 147
165/* If this bit is set, then `{...}' defines an interval, and \{ and \} 148/* If this bit is set, then '{...}' defines an interval, and \{ and \}
166 are literals. 149 are literals.
167 If not set, then `\{...\}' defines an interval. */ 150 If not set, then '\{...\}' defines an interval. */
168# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 151# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
169 152
170/* If this bit is set, (...) defines a group, and \( and \) are literals. 153/* If this bit is set, (...) defines a group, and \( and \) are literals.
@@ -219,15 +202,14 @@ typedef unsigned long int reg_syntax_t;
219 whether ^ should be special. */ 202 whether ^ should be special. */
220# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) 203# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
221 204
222/* If this bit is set, then \{ cannot be first in an bre or 205/* If this bit is set, then \{ cannot be first in a regex or
223 immediately after an alternation or begin-group operator. */ 206 immediately after an alternation, open-group or \} operator. */
224# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) 207# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
225 208
226/* If this bit is set, then no_sub will be set to 1 during 209/* If this bit is set, then no_sub will be set to 1 during
227 re_compile_pattern. */ 210 re_compile_pattern. */
228# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) 211# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
229 212#endif
230#endif /* defined __USE_GNU_REGEX */
231 213
232/* This global variable defines the particular regexp syntax to use (for 214/* This global variable defines the particular regexp syntax to use (for
233 some interfaces). When a regexp is compiled, the syntax used is 215 some interfaces). When a regexp is compiled, the syntax used is
@@ -235,7 +217,7 @@ typedef unsigned long int reg_syntax_t;
235 already-compiled regexps. */ 217 already-compiled regexps. */
236extern reg_syntax_t re_syntax_options; 218extern reg_syntax_t re_syntax_options;
237 219
238#ifdef __USE_GNU_REGEX 220#ifdef __USE_GNU
239/* Define combinations of the above bits for the standard possibilities. 221/* Define combinations of the above bits for the standard possibilities.
240 (The [[[ comments delimit what gets put into the Texinfo file, so 222 (The [[[ comments delimit what gets put into the Texinfo file, so
241 don't delete them!) */ 223 don't delete them!) */
@@ -247,16 +229,19 @@ extern reg_syntax_t re_syntax_options;
247 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 229 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
248 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 230 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
249 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 231 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
232 | RE_CHAR_CLASSES \
250 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 233 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
251 234
252# define RE_SYNTAX_GNU_AWK \ 235# define RE_SYNTAX_GNU_AWK \
253 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 236 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
254 & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ 237 | RE_INVALID_INTERVAL_ORD) \
255 | RE_CONTEXT_INVALID_OPS )) 238 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
239 | RE_CONTEXT_INVALID_OPS ))
256 240
257# define RE_SYNTAX_POSIX_AWK \ 241# define RE_SYNTAX_POSIX_AWK \
258 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 242 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
259 | RE_INTERVALS | RE_NO_GNU_OPS) 243 | RE_INTERVALS | RE_NO_GNU_OPS \
244 | RE_INVALID_INTERVAL_ORD)
260 245
261# define RE_SYNTAX_GREP \ 246# define RE_SYNTAX_GREP \
262 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 247 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
@@ -307,13 +292,12 @@ extern reg_syntax_t re_syntax_options;
307 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 292 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
308/* [[[end syntaxes]]] */ 293/* [[[end syntaxes]]] */
309 294
310#endif /* defined __USE_GNU_REGEX */
311
312#ifdef __USE_GNU_REGEX
313
314/* Maximum number of duplicates an interval can allow. POSIX-conforming 295/* Maximum number of duplicates an interval can allow. POSIX-conforming
315 systems might define this in <limits.h>, but we want our 296 systems might define this in <limits.h>, but we want our
316 value, so remove any previous define. */ 297 value, so remove any previous define. */
298# ifdef _REGEX_INCLUDE_LIMITS_H
299# include <limits.h>
300# endif
317# ifdef RE_DUP_MAX 301# ifdef RE_DUP_MAX
318# undef RE_DUP_MAX 302# undef RE_DUP_MAX
319# endif 303# endif
@@ -321,16 +305,15 @@ extern reg_syntax_t re_syntax_options;
321/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored 305/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
322 the counter as a 2-byte signed integer. This is no longer true, so 306 the counter as a 2-byte signed integer. This is no longer true, so
323 RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to 307 RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
324 ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. 308 ((SIZE_MAX - 9) / 10) if _REGEX_LARGE_OFFSETS is defined.
325 However, there would be a huge performance problem if someone 309 However, there would be a huge performance problem if someone
326 actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains 310 actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
327 its historical value. */ 311 its historical value. */
328# define RE_DUP_MAX (0x7fff) 312# define RE_DUP_MAX (0x7fff)
329 313#endif
330#endif /* defined __USE_GNU_REGEX */
331 314
332 315
333/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 316/* POSIX 'cflags' bits (i.e., information for 'regcomp'). */
334 317
335/* If this bit is set, then use extended regular expression syntax. 318/* If this bit is set, then use extended regular expression syntax.
336 If not set, then use basic regular expression syntax. */ 319 If not set, then use basic regular expression syntax. */
@@ -350,7 +333,7 @@ extern reg_syntax_t re_syntax_options;
350#define REG_NOSUB (1 << 3) 333#define REG_NOSUB (1 << 3)
351 334
352 335
353/* POSIX `eflags' bits (i.e., information for regexec). */ 336/* POSIX 'eflags' bits (i.e., information for regexec). */
354 337
355/* If this bit is set, then the beginning-of-line operator doesn't match 338/* If this bit is set, then the beginning-of-line operator doesn't match
356 the beginning of the string (presumably because it's not the 339 the beginning of the string (presumably because it's not the
@@ -368,7 +351,7 @@ extern reg_syntax_t re_syntax_options;
368 351
369 352
370/* If any error codes are removed, changed, or added, update the 353/* If any error codes are removed, changed, or added, update the
371 `__re_error_msgid' table in regcomp.c. */ 354 '__re_error_msgid' table in regcomp.c. */
372 355
373typedef enum 356typedef enum
374{ 357{
@@ -393,11 +376,11 @@ typedef enum
393 376
394 /* Error codes we've added. */ 377 /* Error codes we've added. */
395 _REG_EEND, /* Premature end. */ 378 _REG_EEND, /* Premature end. */
396 _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 379 _REG_ESIZE, /* Too large (e.g., repeat count too large). */
397 _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 380 _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
398} reg_errcode_t; 381} reg_errcode_t;
399 382
400#ifdef _XOPEN_SOURCE 383#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
401# define REG_ENOSYS _REG_ENOSYS 384# define REG_ENOSYS _REG_ENOSYS
402#endif 385#endif
403#define REG_NOERROR _REG_NOERROR 386#define REG_NOERROR _REG_NOERROR
@@ -418,126 +401,127 @@ typedef enum
418#define REG_ESIZE _REG_ESIZE 401#define REG_ESIZE _REG_ESIZE
419#define REG_ERPAREN _REG_ERPAREN 402#define REG_ERPAREN _REG_ERPAREN
420 403
421/* struct re_pattern_buffer normally uses member names like `buffer' 404/* This data structure represents a compiled pattern. Before calling
422 that POSIX does not allow. In POSIX mode these members have names 405 the pattern compiler, the fields 'buffer', 'allocated', 'fastmap',
423 with leading `re_' (e.g., `re_buffer'). */ 406 and 'translate' can be set. After the pattern has been compiled,
424#ifdef __USE_GNU_REGEX 407 the fields 're_nsub', 'not_bol' and 'not_eol' are available. All
425# define _REG_RE_NAME(id) id 408 other fields are private to the regex routines. */
426# define _REG_RM_NAME(id) id 409
427#else 410#ifndef RE_TRANSLATE_TYPE
428# define _REG_RE_NAME(id) re_##id 411# define __RE_TRANSLATE_TYPE unsigned char *
429# define _REG_RM_NAME(id) rm_##id 412# ifdef __USE_GNU
413# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
414# endif
430#endif 415#endif
431 416
432/* The user can specify the type of the re_translate member by 417#ifdef __USE_GNU
433 defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned 418# define __REPB_PREFIX(name) name
434 char *. This pollutes the POSIX name space, so in POSIX mode just
435 use unsigned char *. */
436#ifdef __USE_GNU_REGEX
437# ifndef RE_TRANSLATE_TYPE
438# define RE_TRANSLATE_TYPE unsigned char *
439# endif
440# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
441#else 419#else
442# define REG_TRANSLATE_TYPE unsigned char * 420# define __REPB_PREFIX(name) __##name
443#endif 421#endif
444 422
445/* This data structure represents a compiled pattern. Before calling
446 the pattern compiler, the fields `buffer', `allocated', `fastmap',
447 `translate', and `no_sub' can be set. After the pattern has been
448 compiled, the `re_nsub' field is available. All other fields are
449 private to the regex routines. */
450
451struct re_pattern_buffer 423struct re_pattern_buffer
452{ 424{
453 /* Space that holds the compiled pattern. It is declared as 425 /* Space that holds the compiled pattern. The type
454 `unsigned char *' because its elements are sometimes used as 426 'struct re_dfa_t' is private and is not declared here. */
455 array indexes. */ 427 struct re_dfa_t *__REPB_PREFIX(buffer);
456 unsigned char *_REG_RE_NAME (buffer);
457 428
458 /* Number of bytes to which `buffer' points. */ 429 /* Number of bytes to which 'buffer' points. */
459 __re_long_size_t _REG_RE_NAME (allocated); 430 __re_long_size_t __REPB_PREFIX(allocated);
460 431
461 /* Number of bytes actually used in `buffer'. */ 432 /* Number of bytes actually used in 'buffer'. */
462 __re_long_size_t _REG_RE_NAME (used); 433 __re_long_size_t __REPB_PREFIX(used);
463 434
464 /* Syntax setting with which the pattern was compiled. */ 435 /* Syntax setting with which the pattern was compiled. */
465 reg_syntax_t _REG_RE_NAME (syntax); 436 reg_syntax_t __REPB_PREFIX(syntax);
466 437
467 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the 438 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
468 fastmap, if there is one, to skip over impossible starting points 439 fastmap, if there is one, to skip over impossible starting points
469 for matches. */ 440 for matches. */
470 char *_REG_RE_NAME (fastmap); 441 char *__REPB_PREFIX(fastmap);
471 442
472 /* Either a translate table to apply to all characters before 443 /* Either a translate table to apply to all characters before
473 comparing them, or zero for no translation. The translation is 444 comparing them, or zero for no translation. The translation is
474 applied to a pattern when it is compiled and to a string when it 445 applied to a pattern when it is compiled and to a string when it
475 is matched. */ 446 is matched. */
476 REG_TRANSLATE_TYPE _REG_RE_NAME (translate); 447 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
477 448
478 /* Number of subexpressions found by the compiler. */ 449 /* Number of subexpressions found by the compiler. */
479 size_t re_nsub; 450 size_t re_nsub;
480 451
481 /* Zero if this pattern cannot match the empty string, one else. 452 /* Zero if this pattern cannot match the empty string, one else.
482 Well, in truth it's used only in `re_search_2', to see whether or 453 Well, in truth it's used only in 're_search_2', to see whether or
483 not we should use the fastmap, so we don't set this absolutely 454 not we should use the fastmap, so we don't set this absolutely
484 perfectly; see `re_compile_fastmap' (the `duplicate' case). */ 455 perfectly; see 're_compile_fastmap' (the "duplicate" case). */
485 unsigned int _REG_RE_NAME (can_be_null) : 1; 456 unsigned __REPB_PREFIX(can_be_null) : 1;
486 457
487 /* If REGS_UNALLOCATED, allocate space in the `regs' structure 458 /* If REGS_UNALLOCATED, allocate space in the 'regs' structure
488 for `max (RE_NREGS, re_nsub + 1)' groups. 459 for 'max (RE_NREGS, re_nsub + 1)' groups.
489 If REGS_REALLOCATE, reallocate space if necessary. 460 If REGS_REALLOCATE, reallocate space if necessary.
490 If REGS_FIXED, use what's there. */ 461 If REGS_FIXED, use what's there. */
491#ifdef __USE_GNU_REGEX 462#ifdef __USE_GNU
492# define REGS_UNALLOCATED 0 463# define REGS_UNALLOCATED 0
493# define REGS_REALLOCATE 1 464# define REGS_REALLOCATE 1
494# define REGS_FIXED 2 465# define REGS_FIXED 2
495#endif 466#endif
496 unsigned int _REG_RE_NAME (regs_allocated) : 2; 467 unsigned __REPB_PREFIX(regs_allocated) : 2;
497 468
498 /* Set to zero when `regex_compile' compiles a pattern; set to one 469 /* Set to zero when 're_compile_pattern' compiles a pattern; set to
499 by `re_compile_fastmap' if it updates the fastmap. */ 470 one by 're_compile_fastmap' if it updates the fastmap. */
500 unsigned int _REG_RE_NAME (fastmap_accurate) : 1; 471 unsigned __REPB_PREFIX(fastmap_accurate) : 1;
501 472
502 /* If set, `re_match_2' does not return information about 473 /* If set, 're_match_2' does not return information about
503 subexpressions. */ 474 subexpressions. */
504 unsigned int _REG_RE_NAME (no_sub) : 1; 475 unsigned __REPB_PREFIX(no_sub) : 1;
505 476
506 /* If set, a beginning-of-line anchor doesn't match at the beginning 477 /* If set, a beginning-of-line anchor doesn't match at the beginning
507 of the string. */ 478 of the string. */
508 unsigned int _REG_RE_NAME (not_bol) : 1; 479 unsigned __REPB_PREFIX(not_bol) : 1;
509 480
510 /* Similarly for an end-of-line anchor. */ 481 /* Similarly for an end-of-line anchor. */
511 unsigned int _REG_RE_NAME (not_eol) : 1; 482 unsigned __REPB_PREFIX(not_eol) : 1;
512 483
513 /* If true, an anchor at a newline matches. */ 484 /* If true, an anchor at a newline matches. */
514 unsigned int _REG_RE_NAME (newline_anchor) : 1; 485 unsigned __REPB_PREFIX(newline_anchor) : 1;
515
516/* [[[end pattern_buffer]]] */
517}; 486};
518 487
519typedef struct re_pattern_buffer regex_t; 488typedef struct re_pattern_buffer regex_t;
520 489
490/* Type for byte offsets within the string. POSIX mandates this. */
491#ifdef _REGEX_LARGE_OFFSETS
492/* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
493 ptrdiff_t and ssize_t. We don't know of any hosts where ptrdiff_t
494 is wider than ssize_t, so ssize_t is safe. */
495typedef ssize_t regoff_t;
496#else
497/* The traditional GNU regex implementation mishandles strings longer
498 than INT_MAX. */
499typedef int regoff_t;
500#endif
501
502
503#ifdef __USE_GNU
521/* This is the structure we store register match data in. See 504/* This is the structure we store register match data in. See
522 regex.texinfo for a full description of what registers match. */ 505 regex.texinfo for a full description of what registers match. */
523struct re_registers 506struct re_registers
524{ 507{
525 __re_size_t _REG_RM_NAME (num_regs); 508 __re_size_t num_regs;
526 regoff_t *_REG_RM_NAME (start); 509 regoff_t *start;
527 regoff_t *_REG_RM_NAME (end); 510 regoff_t *end;
528}; 511};
529 512
530 513
531/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 514/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
532 `re_match_2' returns information about at least this many registers 515 're_match_2' returns information about at least this many registers
533 the first time a `regs' structure is passed. */ 516 the first time a 'regs' structure is passed. */
534#if !defined RE_NREGS && defined __USE_GNU_REGEX 517# ifndef RE_NREGS
535# define RE_NREGS 30 518# define RE_NREGS 30
519# endif
536#endif 520#endif
537 521
538 522
539/* POSIX specification for registers. Aside from the different names than 523/* POSIX specification for registers. Aside from the different names than
540 `re_registers', POSIX uses an array of structures, instead of a 524 're_registers', POSIX uses an array of structures, instead of a
541 structure of arrays. */ 525 structure of arrays. */
542typedef struct 526typedef struct
543{ 527{
@@ -547,13 +531,19 @@ typedef struct
547 531
548/* Declarations for routines. */ 532/* Declarations for routines. */
549 533
534#ifdef __USE_GNU
550/* Sets the current default syntax to SYNTAX, and return the old syntax. 535/* Sets the current default syntax to SYNTAX, and return the old syntax.
551 You can also simply assign to the `re_syntax_options' variable. */ 536 You can also simply assign to the 're_syntax_options' variable. */
552extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); 537extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
553 538
554/* Compile the regular expression PATTERN, with length LENGTH 539/* Compile the regular expression PATTERN, with length LENGTH
555 and syntax given by the global `re_syntax_options', into the buffer 540 and syntax given by the global 're_syntax_options', into the buffer
556 BUFFER. Return NULL if successful, and an error string if not. */ 541 BUFFER. Return NULL if successful, and an error string if not.
542
543 To free the allocated storage, you must call 'regfree' on BUFFER.
544 Note that the translate table must either have been initialised by
545 'regcomp', with a malloc'ed value, or set to NULL before calling
546 'regfree'. */
557extern const char *re_compile_pattern (const char *__pattern, size_t __length, 547extern const char *re_compile_pattern (const char *__pattern, size_t __length,
558 struct re_pattern_buffer *__buffer); 548 struct re_pattern_buffer *__buffer);
559 549
@@ -575,7 +565,7 @@ extern regoff_t re_search (struct re_pattern_buffer *__buffer,
575 struct re_registers *__regs); 565 struct re_registers *__regs);
576 566
577 567
578/* Like `re_search', but search in the concatenation of STRING1 and 568/* Like 're_search', but search in the concatenation of STRING1 and
579 STRING2. Also, stop searching at index START + STOP. */ 569 STRING2. Also, stop searching at index START + STOP. */
580extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, 570extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
581 const char *__string1, __re_idx_t __length1, 571 const char *__string1, __re_idx_t __length1,
@@ -585,14 +575,14 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
585 __re_idx_t __stop); 575 __re_idx_t __stop);
586 576
587 577
588/* Like `re_search', but return how many characters in STRING the regexp 578/* Like 're_search', but return how many characters in STRING the regexp
589 in BUFFER matched, starting at position START. */ 579 in BUFFER matched, starting at position START. */
590extern regoff_t re_match (struct re_pattern_buffer *__buffer, 580extern regoff_t re_match (struct re_pattern_buffer *__buffer,
591 const char *__string, __re_idx_t __length, 581 const char *__string, __re_idx_t __length,
592 __re_idx_t __start, struct re_registers *__regs); 582 __re_idx_t __start, struct re_registers *__regs);
593 583
594 584
595/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 585/* Relates to 're_match' as 're_search_2' relates to 're_search'. */
596extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, 586extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
597 const char *__string1, __re_idx_t __length1, 587 const char *__string1, __re_idx_t __length1,
598 const char *__string2, __re_idx_t __length2, 588 const char *__string2, __re_idx_t __length2,
@@ -603,21 +593,22 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
603/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 593/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
604 ENDS. Subsequent matches using BUFFER and REGS will use this memory 594 ENDS. Subsequent matches using BUFFER and REGS will use this memory
605 for recording register information. STARTS and ENDS must be 595 for recording register information. STARTS and ENDS must be
606 allocated with malloc, and must each be at least `NUM_REGS * sizeof 596 allocated with malloc, and must each be at least 'NUM_REGS * sizeof
607 (regoff_t)' bytes long. 597 (regoff_t)' bytes long.
608 598
609 If NUM_REGS == 0, then subsequent matches should allocate their own 599 If NUM_REGS == 0, then subsequent matches should allocate their own
610 register data. 600 register data.
611 601
612 Unless this function is called, the first search or match using 602 Unless this function is called, the first search or match using
613 PATTERN_BUFFER will allocate its own register data, without 603 BUFFER will allocate its own register data, without
614 freeing the old data. */ 604 freeing the old data. */
615extern void re_set_registers (struct re_pattern_buffer *__buffer, 605extern void re_set_registers (struct re_pattern_buffer *__buffer,
616 struct re_registers *__regs, 606 struct re_registers *__regs,
617 __re_size_t __num_regs, 607 __re_size_t __num_regs,
618 regoff_t *__starts, regoff_t *__ends); 608 regoff_t *__starts, regoff_t *__ends);
609#endif /* Use GNU */
619 610
620#if defined _REGEX_RE_COMP || defined _LIBC 611#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
621# ifndef _CRAY 612# ifndef _CRAY
622/* 4.2 bsd compatibility. */ 613/* 4.2 bsd compatibility. */
623extern char *re_comp (const char *); 614extern char *re_comp (const char *);
@@ -645,7 +636,7 @@ extern int re_exec (const char *);
645#ifndef _Restrict_arr_ 636#ifndef _Restrict_arr_
646# if ((199901L <= __STDC_VERSION__ \ 637# if ((199901L <= __STDC_VERSION__ \
647 || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ 638 || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \
648 && !__STRICT_ANSI__)) \ 639 && !defined __STRICT_ANSI__)) \
649 && !defined __GNUG__) 640 && !defined __GNUG__)
650# define _Restrict_arr_ _Restrict_ 641# define _Restrict_arr_ _Restrict_
651# else 642# else