summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorTon Voon <tonvoon@users.sourceforge.net>2007-01-24 22:47:25 +0000
committerTon Voon <tonvoon@users.sourceforge.net>2007-01-24 22:47:25 +0000
commitfe856aa957978504137c1d425815d4ed8a22be40 (patch)
treea5bb46ce0e64b2056f75700eadbf27aba7c39418 /lib
parent210f39bc84cfbb21cd72dc054e43f13815ee0616 (diff)
downloadmonitoring-plugins-fe856aa957978504137c1d425815d4ed8a22be40.tar.gz
Sync with gnulib - lots of extraneous code removed in preference to GNU code
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1580 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile.am79
-rw-r--r--lib/alloca.c491
-rw-r--r--lib/alloca_.h52
-rw-r--r--lib/basename.c79
-rw-r--r--lib/c-strtod.c81
-rw-r--r--lib/c-strtod.h2
-rw-r--r--lib/c-strtold.c2
-rw-r--r--lib/cloexec.c60
-rw-r--r--lib/cloexec.h2
-rw-r--r--lib/creat-safer.c33
-rw-r--r--lib/dirname.h47
-rw-r--r--lib/dup-safer.c46
-rw-r--r--lib/error.c304
-rw-r--r--lib/error.h66
-rw-r--r--lib/exit.h32
-rw-r--r--lib/exitfail.c27
-rw-r--r--lib/exitfail.h20
-rw-r--r--lib/fcntl--.h28
-rw-r--r--lib/fcntl-safer.h24
-rw-r--r--lib/fd-safer.c59
-rw-r--r--lib/fsusage.c289
-rw-r--r--lib/fsusage.h40
-rw-r--r--lib/full-read.c19
-rw-r--r--lib/full-read.h24
-rw-r--r--lib/full-write.c83
-rw-r--r--lib/full-write.h35
-rw-r--r--lib/getloadavg.c1017
-rw-r--r--lib/getopt.c1241
-rw-r--r--lib/getopt1.c174
-rw-r--r--lib/getopt_.h225
-rw-r--r--lib/getopt_int.h131
-rw-r--r--lib/gettext.h78
-rw-r--r--lib/intprops.h65
-rw-r--r--lib/malloc.c36
-rw-r--r--lib/mountlist.c820
-rw-r--r--lib/mountlist.h41
-rw-r--r--lib/open-safer.c51
-rw-r--r--lib/pipe-safer.c50
-rw-r--r--lib/realloc.c46
-rw-r--r--lib/regcomp.c3779
-rw-r--r--lib/regex.c68
-rw-r--r--lib/regex.h769
-rw-r--r--lib/regex_internal.c1656
-rw-r--r--lib/regex_internal.h911
-rw-r--r--lib/regexec.c4333
-rw-r--r--lib/safe-read.c80
-rw-r--r--lib/safe-read.h25
-rw-r--r--lib/safe-write.c19
-rw-r--r--lib/safe-write.h25
-rw-r--r--lib/snprintf.c1023
-rw-r--r--lib/stdbool_.h105
-rw-r--r--lib/strcase.h48
-rw-r--r--lib/strtod.c189
-rw-r--r--lib/tests/Makefile.am2
-rw-r--r--lib/unistd--.h28
-rw-r--r--lib/unistd-safer.h23
-rw-r--r--lib/unlocked-io.h137
-rw-r--r--lib/xalloc-die.c45
-rw-r--r--lib/xalloc.h79
-rw-r--r--lib/xmalloc.c241
-rw-r--r--lib/xstrdup.c33
61 files changed, 5 insertions, 19612 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 7edac1f2..3909bb9b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -2,85 +2,14 @@
2 2
3SUBDIRS = tests 3SUBDIRS = tests
4 4
5noinst_LIBRARIES = libcoreutils.a libnagiosplug.a 5noinst_LIBRARIES = libnagiosplug.a
6 6
7# Will auto pick up fsusage.c mountlist.c
8libcoreutils_a_SOURCES = \
9 cloexec.c cloexec.h \
10 exit.h \
11 full-read.c full-read.h \
12 full-write.c full-write.h \
13 gettext.h \
14 safe-read.c safe-read.h \
15 safe-write.c safe-write.h strcase.h xalloc-die.c
16 7
17# intprops.h required for getloadavg.c 8libnagiosplug_a_SOURCES = utils_base.c utils_disk.c
18# This needs to be an EXTRA_DIST because the m4s 9EXTRA_DIST = utils_base.h utils_disk.h
19# that have AC_LIBSOURCES for intprops.h are not necessary and
20# the getloadavg m4 is an autoconf one, so doesn't
21# know about intprops.h. Confused? You will be!
22EXTRA_DIST = intprops.h
23 10
24other_coreutils_files = \ 11INCLUDES = -I$(srcdir) -I$(top_srcdir)/gl -I$(top_srcdir)/intl -I$(top_srcdir)/plugins
25 error.c error.h \
26 fsusage.c \
27 getloadavg.c \
28 malloc.c \
29 mountlist.c \
30 realloc.c \
31 strtod.c \
32 exitfail.c exitfail.h \
33 fsusage.h \
34 getopt.c getopt1.c \
35 mountlist.h \
36 unlocked-io.h \
37 xalloc.h \
38 xmalloc.c
39
40
41libcoreutils_a_LIBADD = $(LIBOBJS) $(ALLOCA)
42libcoreutils_a_DEPENDENCIES = $(libcoreutils_a_LIBADD)
43lib_OBJECTS = $(libcoreutils_a_OBJECTS)
44
45libnagiosplug_a_SOURCES = snprintf.c utils_base.c utils_disk.c
46EXTRA_DIST += utils_base.h utils_disk.h
47
48INCLUDES = -I$(srcdir) -I$(top_srcdir)/intl -I$(top_srcdir)/plugins
49
50print_coreutil_files:
51 @echo $(libcoreutils_a_SOURCES) $(other_coreutils_files) $(EXTRA_DIST)
52 12
53test: 13test:
54 cd tests && make test 14 cd tests && make test
55 15
56# Below are from coreutil's lib/Makefile.am
57BUILT_SOURCES = $(STDBOOL_H)
58EXTRA_DIST += stdbool_.h
59MOSTLYCLEANFILES = stdbool.h stdbool.ht
60# Create stdbool.h on systems that lack a working one.
61stdbool.h: stdbool_.h
62 sed -e 's/@''HAVE__BOOL''@/$(HAVE__BOOL)/g' $(srcdir)/stdbool_.h > $@t
63 mv $@t $@
64
65BUILT_SOURCES += $(ALLOCA_H)
66EXTRA_DIST += alloca_.h
67
68# We need the following in order to create an <alloca.h> when the system
69# doesn't have one that works with the given compiler
70all-local $(lib_OBJECTS): $(ALLOCA_H)
71alloca.h: alloca_.h
72 cp $(srcdir)/alloca_.h $@-t
73 mv $@-t $@
74MOSTLYCLEANFILES += alloca.h alloca.h-t
75
76BUILT_SOURCES += $(GETOPT_H)
77EXTRA_DIST += getopt_.h getopt_int.h
78
79# We need the following in order to create an <getopt.h> when the system
80# doesn't have one that works with the given compiler.
81all-local $(lib_OBJECTS): $(GETOPT_H)
82getopt.h: getopt_.h
83 cp $(srcdir)/getopt_.h $@-t
84 mv $@-t $@
85MOSTLYCLEANFILES += getopt.h getopt.h-t
86
diff --git a/lib/alloca.c b/lib/alloca.c
deleted file mode 100644
index d1d54475..00000000
--- a/lib/alloca.c
+++ /dev/null
@@ -1,491 +0,0 @@
1/* alloca.c -- allocate automatically reclaimed memory
2 (Mostly) portable public-domain implementation -- D A Gwyn
3
4 This implementation of the PWB library alloca function,
5 which is used to allocate space off the run-time stack so
6 that it is automatically reclaimed upon procedure exit,
7 was inspired by discussions with J. Q. Johnson of Cornell.
8 J.Otto Tennant <jot@cray.com> contributed the Cray support.
9
10 There are some preprocessor constants that can
11 be defined when compiling for your specific system, for
12 improved efficiency; however, the defaults should be okay.
13
14 The general concept of this implementation is to keep
15 track of all alloca-allocated blocks, and reclaim any
16 that are found to be deeper in the stack than the current
17 invocation. This heuristic does not reclaim storage as
18 soon as it becomes invalid, but it will do so eventually.
19
20 As a special case, alloca(0) reclaims storage without
21 allocating any. It is a good idea to use alloca(0) in
22 your main control loop, etc. to force garbage collection. */
23
24#ifdef HAVE_CONFIG_H
25# include <config.h>
26#endif
27
28#include <alloca.h>
29
30#include <string.h>
31#include <stdlib.h>
32
33#ifdef emacs
34# include "lisp.h"
35# include "blockinput.h"
36# ifdef EMACS_FREE
37# undef free
38# define free EMACS_FREE
39# endif
40#else
41# define memory_full() abort ()
42#endif
43
44/* If compiling with GCC 2, this file's not needed. */
45#if !defined (__GNUC__) || __GNUC__ < 2
46
47/* If someone has defined alloca as a macro,
48 there must be some other way alloca is supposed to work. */
49# ifndef alloca
50
51# ifdef emacs
52# ifdef static
53/* actually, only want this if static is defined as ""
54 -- this is for usg, in which emacs must undefine static
55 in order to make unexec workable
56 */
57# ifndef STACK_DIRECTION
58you
59lose
60-- must know STACK_DIRECTION at compile-time
61/* Using #error here is not wise since this file should work for
62 old and obscure compilers. */
63# endif /* STACK_DIRECTION undefined */
64# endif /* static */
65# endif /* emacs */
66
67/* If your stack is a linked list of frames, you have to
68 provide an "address metric" ADDRESS_FUNCTION macro. */
69
70# if defined (CRAY) && defined (CRAY_STACKSEG_END)
71long i00afunc ();
72# define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
73# else
74# define ADDRESS_FUNCTION(arg) &(arg)
75# endif
76
77/* Define STACK_DIRECTION if you know the direction of stack
78 growth for your system; otherwise it will be automatically
79 deduced at run-time.
80
81 STACK_DIRECTION > 0 => grows toward higher addresses
82 STACK_DIRECTION < 0 => grows toward lower addresses
83 STACK_DIRECTION = 0 => direction of growth unknown */
84
85# ifndef STACK_DIRECTION
86# define STACK_DIRECTION 0 /* Direction unknown. */
87# endif
88
89# if STACK_DIRECTION != 0
90
91# define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
92
93# else /* STACK_DIRECTION == 0; need run-time code. */
94
95static int stack_dir; /* 1 or -1 once known. */
96# define STACK_DIR stack_dir
97
98static void
99find_stack_direction (void)
100{
101 static char *addr = NULL; /* Address of first `dummy', once known. */
102 auto char dummy; /* To get stack address. */
103
104 if (addr == NULL)
105 { /* Initial entry. */
106 addr = ADDRESS_FUNCTION (dummy);
107
108 find_stack_direction (); /* Recurse once. */
109 }
110 else
111 {
112 /* Second entry. */
113 if (ADDRESS_FUNCTION (dummy) > addr)
114 stack_dir = 1; /* Stack grew upward. */
115 else
116 stack_dir = -1; /* Stack grew downward. */
117 }
118}
119
120# endif /* STACK_DIRECTION == 0 */
121
122/* An "alloca header" is used to:
123 (a) chain together all alloca'ed blocks;
124 (b) keep track of stack depth.
125
126 It is very important that sizeof(header) agree with malloc
127 alignment chunk size. The following default should work okay. */
128
129# ifndef ALIGN_SIZE
130# define ALIGN_SIZE sizeof(double)
131# endif
132
133typedef union hdr
134{
135 char align[ALIGN_SIZE]; /* To force sizeof(header). */
136 struct
137 {
138 union hdr *next; /* For chaining headers. */
139 char *deep; /* For stack depth measure. */
140 } h;
141} header;
142
143static header *last_alloca_header = NULL; /* -> last alloca header. */
144
145/* Return a pointer to at least SIZE bytes of storage,
146 which will be automatically reclaimed upon exit from
147 the procedure that called alloca. Originally, this space
148 was supposed to be taken from the current stack frame of the
149 caller, but that method cannot be made to work for some
150 implementations of C, for example under Gould's UTX/32. */
151
152void *
153alloca (size_t size)
154{
155 auto char probe; /* Probes stack depth: */
156 register char *depth = ADDRESS_FUNCTION (probe);
157
158# if STACK_DIRECTION == 0
159 if (STACK_DIR == 0) /* Unknown growth direction. */
160 find_stack_direction ();
161# endif
162
163 /* Reclaim garbage, defined as all alloca'd storage that
164 was allocated from deeper in the stack than currently. */
165
166 {
167 register header *hp; /* Traverses linked list. */
168
169# ifdef emacs
170 BLOCK_INPUT;
171# endif
172
173 for (hp = last_alloca_header; hp != NULL;)
174 if ((STACK_DIR > 0 && hp->h.deep > depth)
175 || (STACK_DIR < 0 && hp->h.deep < depth))
176 {
177 register header *np = hp->h.next;
178
179 free (hp); /* Collect garbage. */
180
181 hp = np; /* -> next header. */
182 }
183 else
184 break; /* Rest are not deeper. */
185
186 last_alloca_header = hp; /* -> last valid storage. */
187
188# ifdef emacs
189 UNBLOCK_INPUT;
190# endif
191 }
192
193 if (size == 0)
194 return NULL; /* No allocation required. */
195
196 /* Allocate combined header + user data storage. */
197
198 {
199 /* Address of header. */
200 register header *new;
201
202 size_t combined_size = sizeof (header) + size;
203 if (combined_size < sizeof (header))
204 memory_full ();
205
206 new = malloc (combined_size);
207
208 if (! new)
209 memory_full ();
210
211 new->h.next = last_alloca_header;
212 new->h.deep = depth;
213
214 last_alloca_header = new;
215
216 /* User storage begins just after header. */
217
218 return (void *) (new + 1);
219 }
220}
221
222# if defined (CRAY) && defined (CRAY_STACKSEG_END)
223
224# ifdef DEBUG_I00AFUNC
225# include <stdio.h>
226# endif
227
228# ifndef CRAY_STACK
229# define CRAY_STACK
230# ifndef CRAY2
231/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
232struct stack_control_header
233 {
234 long shgrow:32; /* Number of times stack has grown. */
235 long shaseg:32; /* Size of increments to stack. */
236 long shhwm:32; /* High water mark of stack. */
237 long shsize:32; /* Current size of stack (all segments). */
238 };
239
240/* The stack segment linkage control information occurs at
241 the high-address end of a stack segment. (The stack
242 grows from low addresses to high addresses.) The initial
243 part of the stack segment linkage control information is
244 0200 (octal) words. This provides for register storage
245 for the routine which overflows the stack. */
246
247struct stack_segment_linkage
248 {
249 long ss[0200]; /* 0200 overflow words. */
250 long sssize:32; /* Number of words in this segment. */
251 long ssbase:32; /* Offset to stack base. */
252 long:32;
253 long sspseg:32; /* Offset to linkage control of previous
254 segment of stack. */
255 long:32;
256 long sstcpt:32; /* Pointer to task common address block. */
257 long sscsnm; /* Private control structure number for
258 microtasking. */
259 long ssusr1; /* Reserved for user. */
260 long ssusr2; /* Reserved for user. */
261 long sstpid; /* Process ID for pid based multi-tasking. */
262 long ssgvup; /* Pointer to multitasking thread giveup. */
263 long sscray[7]; /* Reserved for Cray Research. */
264 long ssa0;
265 long ssa1;
266 long ssa2;
267 long ssa3;
268 long ssa4;
269 long ssa5;
270 long ssa6;
271 long ssa7;
272 long sss0;
273 long sss1;
274 long sss2;
275 long sss3;
276 long sss4;
277 long sss5;
278 long sss6;
279 long sss7;
280 };
281
282# else /* CRAY2 */
283/* The following structure defines the vector of words
284 returned by the STKSTAT library routine. */
285struct stk_stat
286 {
287 long now; /* Current total stack size. */
288 long maxc; /* Amount of contiguous space which would
289 be required to satisfy the maximum
290 stack demand to date. */
291 long high_water; /* Stack high-water mark. */
292 long overflows; /* Number of stack overflow ($STKOFEN) calls. */
293 long hits; /* Number of internal buffer hits. */
294 long extends; /* Number of block extensions. */
295 long stko_mallocs; /* Block allocations by $STKOFEN. */
296 long underflows; /* Number of stack underflow calls ($STKRETN). */
297 long stko_free; /* Number of deallocations by $STKRETN. */
298 long stkm_free; /* Number of deallocations by $STKMRET. */
299 long segments; /* Current number of stack segments. */
300 long maxs; /* Maximum number of stack segments so far. */
301 long pad_size; /* Stack pad size. */
302 long current_address; /* Current stack segment address. */
303 long current_size; /* Current stack segment size. This
304 number is actually corrupted by STKSTAT to
305 include the fifteen word trailer area. */
306 long initial_address; /* Address of initial segment. */
307 long initial_size; /* Size of initial segment. */
308 };
309
310/* The following structure describes the data structure which trails
311 any stack segment. I think that the description in 'asdef' is
312 out of date. I only describe the parts that I am sure about. */
313
314struct stk_trailer
315 {
316 long this_address; /* Address of this block. */
317 long this_size; /* Size of this block (does not include
318 this trailer). */
319 long unknown2;
320 long unknown3;
321 long link; /* Address of trailer block of previous
322 segment. */
323 long unknown5;
324 long unknown6;
325 long unknown7;
326 long unknown8;
327 long unknown9;
328 long unknown10;
329 long unknown11;
330 long unknown12;
331 long unknown13;
332 long unknown14;
333 };
334
335# endif /* CRAY2 */
336# endif /* not CRAY_STACK */
337
338# ifdef CRAY2
339/* Determine a "stack measure" for an arbitrary ADDRESS.
340 I doubt that "lint" will like this much. */
341
342static long
343i00afunc (long *address)
344{
345 struct stk_stat status;
346 struct stk_trailer *trailer;
347 long *block, size;
348 long result = 0;
349
350 /* We want to iterate through all of the segments. The first
351 step is to get the stack status structure. We could do this
352 more quickly and more directly, perhaps, by referencing the
353 $LM00 common block, but I know that this works. */
354
355 STKSTAT (&status);
356
357 /* Set up the iteration. */
358
359 trailer = (struct stk_trailer *) (status.current_address
360 + status.current_size
361 - 15);
362
363 /* There must be at least one stack segment. Therefore it is
364 a fatal error if "trailer" is null. */
365
366 if (trailer == 0)
367 abort ();
368
369 /* Discard segments that do not contain our argument address. */
370
371 while (trailer != 0)
372 {
373 block = (long *) trailer->this_address;
374 size = trailer->this_size;
375 if (block == 0 || size == 0)
376 abort ();
377 trailer = (struct stk_trailer *) trailer->link;
378 if ((block <= address) && (address < (block + size)))
379 break;
380 }
381
382 /* Set the result to the offset in this segment and add the sizes
383 of all predecessor segments. */
384
385 result = address - block;
386
387 if (trailer == 0)
388 {
389 return result;
390 }
391
392 do
393 {
394 if (trailer->this_size <= 0)
395 abort ();
396 result += trailer->this_size;
397 trailer = (struct stk_trailer *) trailer->link;
398 }
399 while (trailer != 0);
400
401 /* We are done. Note that if you present a bogus address (one
402 not in any segment), you will get a different number back, formed
403 from subtracting the address of the first block. This is probably
404 not what you want. */
405
406 return (result);
407}
408
409# else /* not CRAY2 */
410/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
411 Determine the number of the cell within the stack,
412 given the address of the cell. The purpose of this
413 routine is to linearize, in some sense, stack addresses
414 for alloca. */
415
416static long
417i00afunc (long address)
418{
419 long stkl = 0;
420
421 long size, pseg, this_segment, stack;
422 long result = 0;
423
424 struct stack_segment_linkage *ssptr;
425
426 /* Register B67 contains the address of the end of the
427 current stack segment. If you (as a subprogram) store
428 your registers on the stack and find that you are past
429 the contents of B67, you have overflowed the segment.
430
431 B67 also points to the stack segment linkage control
432 area, which is what we are really interested in. */
433
434 stkl = CRAY_STACKSEG_END ();
435 ssptr = (struct stack_segment_linkage *) stkl;
436
437 /* If one subtracts 'size' from the end of the segment,
438 one has the address of the first word of the segment.
439
440 If this is not the first segment, 'pseg' will be
441 nonzero. */
442
443 pseg = ssptr->sspseg;
444 size = ssptr->sssize;
445
446 this_segment = stkl - size;
447
448 /* It is possible that calling this routine itself caused
449 a stack overflow. Discard stack segments which do not
450 contain the target address. */
451
452 while (!(this_segment <= address && address <= stkl))
453 {
454# ifdef DEBUG_I00AFUNC
455 fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
456# endif
457 if (pseg == 0)
458 break;
459 stkl = stkl - pseg;
460 ssptr = (struct stack_segment_linkage *) stkl;
461 size = ssptr->sssize;
462 pseg = ssptr->sspseg;
463 this_segment = stkl - size;
464 }
465
466 result = address - this_segment;
467
468 /* If you subtract pseg from the current end of the stack,
469 you get the address of the previous stack segment's end.
470 This seems a little convoluted to me, but I'll bet you save
471 a cycle somewhere. */
472
473 while (pseg != 0)
474 {
475# ifdef DEBUG_I00AFUNC
476 fprintf (stderr, "%011o %011o\n", pseg, size);
477# endif
478 stkl = stkl - pseg;
479 ssptr = (struct stack_segment_linkage *) stkl;
480 size = ssptr->sssize;
481 pseg = ssptr->sspseg;
482 result += size;
483 }
484 return (result);
485}
486
487# endif /* not CRAY2 */
488# endif /* CRAY */
489
490# endif /* no alloca */
491#endif /* not GCC version 2 */
diff --git a/lib/alloca_.h b/lib/alloca_.h
deleted file mode 100644
index 3e3fdf43..00000000
--- a/lib/alloca_.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/* Memory allocation on the stack.
2
3 Copyright (C) 1995, 1999, 2001, 2002, 2003, 2004 Free Software
4 Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published
8 by the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public
17 License along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19 USA. */
20
21/* Avoid using the symbol _ALLOCA_H here, as Bison assumes _ALLOCA_H
22 means there is a real alloca function. */
23#ifndef _GNULIB_ALLOCA_H
24# define _GNULIB_ALLOCA_H
25
26/* alloca (N) returns a pointer to N bytes of memory
27 allocated on the stack, which will last until the function returns.
28 Use of alloca should be avoided:
29 - inside arguments of function calls - undefined behaviour,
30 - in inline functions - the allocation may actually last until the
31 calling function returns,
32 - for huge N (say, N >= 65536) - you never know how large (or small)
33 the stack is, and when the stack cannot fulfill the memory allocation
34 request, the program just crashes.
35 */
36
37#ifdef __GNUC__
38# define alloca __builtin_alloca
39#elif defined _AIX
40# define alloca __alloca
41#elif defined _MSC_VER
42# include <malloc.h>
43# define alloca _alloca
44#else
45# include <stddef.h>
46# ifdef __cplusplus
47extern "C"
48# endif
49void *alloca (size_t);
50#endif
51
52#endif /* _GNULIB_ALLOCA_H */
diff --git a/lib/basename.c b/lib/basename.c
deleted file mode 100644
index 5cc97cd4..00000000
--- a/lib/basename.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/* basename.c -- return the last element in a file name
2
3 Copyright (C) 1990, 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free
4 Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "dirname.h"
25#include <string.h>
26
27/* In general, we can't use the builtin `basename' function if available,
28 since it has different meanings in different environments.
29 In some environments the builtin `basename' modifies its argument.
30
31 Return the address of the last file name component of NAME. If
32 NAME has no file name components because it is all slashes, return
33 NAME if it is empty, the address of its last slash otherwise. */
34
35char *
36base_name (char const *name)
37{
38 char const *base = name + FILE_SYSTEM_PREFIX_LEN (name);
39 char const *p;
40
41 for (p = base; *p; p++)
42 {
43 if (ISSLASH (*p))
44 {
45 /* Treat multiple adjacent slashes like a single slash. */
46 do p++;
47 while (ISSLASH (*p));
48
49 /* If the file name ends in slash, use the trailing slash as
50 the basename if no non-slashes have been found. */
51 if (! *p)
52 {
53 if (ISSLASH (*base))
54 base = p - 1;
55 break;
56 }
57
58 /* *P is a non-slash preceded by a slash. */
59 base = p;
60 }
61 }
62
63 return (char *) base;
64}
65
66/* Return the length of of the basename NAME. Typically NAME is the
67 value returned by base_name. Act like strlen (NAME), except omit
68 redundant trailing slashes. */
69
70size_t
71base_len (char const *name)
72{
73 size_t len;
74
75 for (len = strlen (name); 1 < len && ISSLASH (name[len - 1]); len--)
76 continue;
77
78 return len;
79}
diff --git a/lib/c-strtod.c b/lib/c-strtod.c
deleted file mode 100644
index 031f5f87..00000000
--- a/lib/c-strtod.c
+++ /dev/null
@@ -1,81 +0,0 @@
1/* Convert string to double, using the C locale.
2
3 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25#include "c-strtod.h"
26
27#include <locale.h>
28#include <stdlib.h>
29
30#include "xalloc.h"
31
32#if LONG
33# define C_STRTOD c_strtold
34# define DOUBLE long double
35# define STRTOD_L strtold_l
36#else
37# define C_STRTOD c_strtod
38# define DOUBLE double
39# define STRTOD_L strtod_l
40#endif
41
42/* c_strtold falls back on strtod if strtold doesn't conform to C99. */
43#if LONG && HAVE_C99_STRTOLD
44# define STRTOD strtold
45#else
46# define STRTOD strtod
47#endif
48
49DOUBLE
50C_STRTOD (char const *nptr, char **endptr)
51{
52 DOUBLE r;
53
54#ifdef LC_ALL_MASK
55
56 locale_t c_locale = newlocale (LC_ALL_MASK, "C", 0);
57 r = STRTOD_L (nptr, endptr, c_locale);
58 freelocale (c_locale);
59
60#else
61
62 char *saved_locale = setlocale (LC_NUMERIC, NULL);
63
64 if (saved_locale)
65 {
66 saved_locale = xstrdup (saved_locale);
67 setlocale (LC_NUMERIC, "C");
68 }
69
70 r = STRTOD (nptr, endptr);
71
72 if (saved_locale)
73 {
74 setlocale (LC_NUMERIC, saved_locale);
75 free (saved_locale);
76 }
77
78#endif
79
80 return r;
81}
diff --git a/lib/c-strtod.h b/lib/c-strtod.h
deleted file mode 100644
index ca9a9e7c..00000000
--- a/lib/c-strtod.h
+++ /dev/null
@@ -1,2 +0,0 @@
1double c_strtod (char const *, char **);
2long double c_strtold (char const *, char **);
diff --git a/lib/c-strtold.c b/lib/c-strtold.c
deleted file mode 100644
index 5510e4a4..00000000
--- a/lib/c-strtold.c
+++ /dev/null
@@ -1,2 +0,0 @@
1#define LONG 1
2#include "c-strtod.c"
diff --git a/lib/cloexec.c b/lib/cloexec.c
deleted file mode 100644
index cf2308ca..00000000
--- a/lib/cloexec.c
+++ /dev/null
@@ -1,60 +0,0 @@
1/* closexec.c - set or clear the close-on-exec descriptor flag
2 Copyright (C) 1991, 2004, 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17
18 The code is taken from glibc/manual/llio.texi */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "cloexec.h"
25
26#include <unistd.h>
27#include <fcntl.h>
28
29#ifndef FD_CLOEXEC
30# define FD_CLOEXEC 1
31#endif
32
33/* Set the `FD_CLOEXEC' flag of DESC if VALUE is true,
34 or clear the flag if VALUE is false.
35 Return 0 on success, or -1 on error with `errno' set. */
36
37int
38set_cloexec_flag (int desc, bool value)
39{
40#if defined F_GETFD && defined F_SETFD
41
42 int flags = fcntl (desc, F_GETFD, 0);
43
44 if (0 <= flags)
45 {
46 int newflags = (value ? flags | FD_CLOEXEC : flags & ~FD_CLOEXEC);
47
48 if (flags == newflags
49 || fcntl (desc, F_SETFD, newflags) != -1)
50 return 0;
51 }
52
53 return -1;
54
55#else
56
57 return 0;
58
59#endif
60}
diff --git a/lib/cloexec.h b/lib/cloexec.h
deleted file mode 100644
index c25921d6..00000000
--- a/lib/cloexec.h
+++ /dev/null
@@ -1,2 +0,0 @@
1#include <stdbool.h>
2int set_cloexec_flag (int desc, bool value);
diff --git a/lib/creat-safer.c b/lib/creat-safer.c
deleted file mode 100644
index 4588de39..00000000
--- a/lib/creat-safer.c
+++ /dev/null
@@ -1,33 +0,0 @@
1/* Invoke creat, but avoid some glitches.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Jim Meyering. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "fcntl-safer.h"
25
26#include <fcntl.h>
27#include "unistd-safer.h"
28
29int
30creat_safer (char const *file, mode_t mode)
31{
32 return fd_safer (creat (file, mode));
33}
diff --git a/lib/dirname.h b/lib/dirname.h
deleted file mode 100644
index 1688ae81..00000000
--- a/lib/dirname.h
+++ /dev/null
@@ -1,47 +0,0 @@
1/* Take file names apart into directory and base names.
2
3 Copyright (C) 1998, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#ifndef DIRNAME_H_
20# define DIRNAME_H_ 1
21
22# include <stdbool.h>
23# include <stddef.h>
24
25# ifndef DIRECTORY_SEPARATOR
26# define DIRECTORY_SEPARATOR '/'
27# endif
28
29# ifndef ISSLASH
30# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
31# endif
32
33# ifndef FILE_SYSTEM_PREFIX_LEN
34# define FILE_SYSTEM_PREFIX_LEN(File_name) 0
35# endif
36
37# define IS_ABSOLUTE_FILE_NAME(F) ISSLASH ((F)[FILE_SYSTEM_PREFIX_LEN (F)])
38# define IS_RELATIVE_FILE_NAME(F) (! IS_ABSOLUTE_FILE_NAME (F))
39
40char *base_name (char const *file);
41char *dir_name (char const *file);
42size_t base_len (char const *file);
43size_t dir_len (char const *file);
44
45bool strip_trailing_slashes (char *file);
46
47#endif /* not DIRNAME_H_ */
diff --git a/lib/dup-safer.c b/lib/dup-safer.c
deleted file mode 100644
index 8cbee700..00000000
--- a/lib/dup-safer.c
+++ /dev/null
@@ -1,46 +0,0 @@
1/* Invoke dup, but avoid some glitches.
2 Copyright (C) 2001, 2004, 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Paul Eggert. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "unistd-safer.h"
25
26#include <fcntl.h>
27
28#include <unistd.h>
29#ifndef STDERR_FILENO
30# define STDERR_FILENO 2
31#endif
32
33/* Like dup, but do not return STDIN_FILENO, STDOUT_FILENO, or
34 STDERR_FILENO. */
35
36int
37dup_safer (int fd)
38{
39#ifdef F_DUPFD
40 return fcntl (fd, F_DUPFD, STDERR_FILENO + 1);
41#else
42 /* fd_safer calls us back, but eventually the recursion unwinds and
43 does the right thing. */
44 return fd_safer (dup (fd));
45#endif
46}
diff --git a/lib/error.c b/lib/error.c
deleted file mode 100644
index 45698be8..00000000
--- a/lib/error.c
+++ /dev/null
@@ -1,304 +0,0 @@
1/* Error handler for noninteractive utilities
2 Copyright (C) 1990-1998, 2000-2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
20
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25#include "error.h"
26
27#include <stdarg.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#if !_LIBC && ENABLE_NLS
33# include "gettext.h"
34#endif
35
36#ifdef _LIBC
37# include <wchar.h>
38# define mbsrtowcs __mbsrtowcs
39#endif
40
41#if USE_UNLOCKED_IO
42# include "unlocked-io.h"
43#endif
44
45#ifndef _
46# define _(String) String
47#endif
48
49/* If NULL, error will flush stdout, then print on stderr the program
50 name, a colon and a space. Otherwise, error will call this
51 function without parameters instead. */
52void (*error_print_progname) (void);
53
54/* This variable is incremented each time `error' is called. */
55unsigned int error_message_count;
56
57#ifdef _LIBC
58/* In the GNU C library, there is a predefined variable for this. */
59
60# define program_name program_invocation_name
61# include <errno.h>
62# include <libio/libioP.h>
63
64/* In GNU libc we want do not want to use the common name `error' directly.
65 Instead make it a weak alias. */
66extern void __error (int status, int errnum, const char *message, ...)
67 __attribute__ ((__format__ (__printf__, 3, 4)));
68extern void __error_at_line (int status, int errnum, const char *file_name,
69 unsigned int line_number, const char *message,
70 ...)
71 __attribute__ ((__format__ (__printf__, 5, 6)));;
72# define error __error
73# define error_at_line __error_at_line
74
75# include <libio/iolibio.h>
76# define fflush(s) INTUSE(_IO_fflush) (s)
77# undef putc
78# define putc(c, fp) INTUSE(_IO_putc) (c, fp)
79
80# include <bits/libc-lock.h>
81
82#else /* not _LIBC */
83
84# if !HAVE_DECL_STRERROR_R && STRERROR_R_CHAR_P
85# ifndef HAVE_DECL_STRERROR_R
86"this configure-time declaration test was not run"
87# endif
88char *strerror_r ();
89# endif
90
91# ifndef SIZE_MAX
92# define SIZE_MAX ((size_t) -1)
93# endif
94
95/* The calling program should define program_name and set it to the
96 name of the executing program. */
97extern char *program_name;
98
99# if HAVE_STRERROR_R || defined strerror_r
100# define __strerror_r strerror_r
101# endif
102#endif /* not _LIBC */
103
104static void
105print_errno_message (int errnum)
106{
107 char const *s = NULL;
108
109#if defined HAVE_STRERROR_R || _LIBC
110 char errbuf[1024];
111# if STRERROR_R_CHAR_P || _LIBC
112 s = __strerror_r (errnum, errbuf, sizeof errbuf);
113# else
114 if (__strerror_r (errnum, errbuf, sizeof errbuf) == 0)
115 s = errbuf;
116# endif
117#endif
118
119#if !_LIBC
120 if (! s && ! (s = strerror (errnum)))
121 s = _("Unknown system error");
122#endif
123
124#if _LIBC
125 if (_IO_fwide (stderr, 0) > 0)
126 {
127 __fwprintf (stderr, L": %s", s);
128 return;
129 }
130#endif
131
132 fprintf (stderr, ": %s", s);
133}
134
135static void
136error_tail (int status, int errnum, const char *message, va_list args)
137{
138#if _LIBC
139 if (_IO_fwide (stderr, 0) > 0)
140 {
141# define ALLOCA_LIMIT 2000
142 size_t len = strlen (message) + 1;
143 const wchar_t *wmessage = L"out of memory";
144 wchar_t *wbuf = (len < ALLOCA_LIMIT
145 ? alloca (len * sizeof *wbuf)
146 : len <= SIZE_MAX / sizeof *wbuf
147 ? malloc (len * sizeof *wbuf)
148 : NULL);
149
150 if (wbuf)
151 {
152 size_t res;
153 mbstate_t st;
154 const char *tmp = message;
155 memset (&st, '\0', sizeof (st));
156 res = mbsrtowcs (wbuf, &tmp, len, &st);
157 wmessage = res == (size_t) -1 ? L"???" : wbuf;
158 }
159
160 __vfwprintf (stderr, wmessage, args);
161 if (! (len < ALLOCA_LIMIT))
162 free (wbuf);
163 }
164 else
165#endif
166 vfprintf (stderr, message, args);
167 va_end (args);
168
169 ++error_message_count;
170 if (errnum)
171 print_errno_message (errnum);
172#if _LIBC
173 if (_IO_fwide (stderr, 0) > 0)
174 putwc (L'\n', stderr);
175 else
176#endif
177 putc ('\n', stderr);
178 fflush (stderr);
179 if (status)
180 exit (status);
181}
182
183
184/* Print the program name and error message MESSAGE, which is a printf-style
185 format string with optional args.
186 If ERRNUM is nonzero, print its corresponding system error message.
187 Exit with status STATUS if it is nonzero. */
188void
189error (int status, int errnum, const char *message, ...)
190{
191 va_list args;
192
193#if defined _LIBC && defined __libc_ptf_call
194 /* We do not want this call to be cut short by a thread
195 cancellation. Therefore disable cancellation for now. */
196 int state = PTHREAD_CANCEL_ENABLE;
197 __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state),
198 0);
199#endif
200
201 fflush (stdout);
202#ifdef _LIBC
203 _IO_flockfile (stderr);
204#endif
205 if (error_print_progname)
206 (*error_print_progname) ();
207 else
208 {
209#if _LIBC
210 if (_IO_fwide (stderr, 0) > 0)
211 __fwprintf (stderr, L"%s: ", program_name);
212 else
213#endif
214 fprintf (stderr, "%s: ", program_name);
215 }
216
217 va_start (args, message);
218 error_tail (status, errnum, message, args);
219
220#ifdef _LIBC
221 _IO_funlockfile (stderr);
222# ifdef __libc_ptf_call
223 __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
224# endif
225#endif
226}
227
228/* Sometimes we want to have at most one error per line. This
229 variable controls whether this mode is selected or not. */
230int error_one_per_line;
231
232void
233error_at_line (int status, int errnum, const char *file_name,
234 unsigned int line_number, const char *message, ...)
235{
236 va_list args;
237
238 if (error_one_per_line)
239 {
240 static const char *old_file_name;
241 static unsigned int old_line_number;
242
243 if (old_line_number == line_number
244 && (file_name == old_file_name
245 || strcmp (old_file_name, file_name) == 0))
246 /* Simply return and print nothing. */
247 return;
248
249 old_file_name = file_name;
250 old_line_number = line_number;
251 }
252
253#if defined _LIBC && defined __libc_ptf_call
254 /* We do not want this call to be cut short by a thread
255 cancellation. Therefore disable cancellation for now. */
256 int state = PTHREAD_CANCEL_ENABLE;
257 __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state),
258 0);
259#endif
260
261 fflush (stdout);
262#ifdef _LIBC
263 _IO_flockfile (stderr);
264#endif
265 if (error_print_progname)
266 (*error_print_progname) ();
267 else
268 {
269#if _LIBC
270 if (_IO_fwide (stderr, 0) > 0)
271 __fwprintf (stderr, L"%s: ", program_name);
272 else
273#endif
274 fprintf (stderr, "%s:", program_name);
275 }
276
277 if (file_name != NULL)
278 {
279#if _LIBC
280 if (_IO_fwide (stderr, 0) > 0)
281 __fwprintf (stderr, L"%s:%d: ", file_name, line_number);
282 else
283#endif
284 fprintf (stderr, "%s:%d: ", file_name, line_number);
285 }
286
287 va_start (args, message);
288 error_tail (status, errnum, message, args);
289
290#ifdef _LIBC
291 _IO_funlockfile (stderr);
292# ifdef __libc_ptf_call
293 __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
294# endif
295#endif
296}
297
298#ifdef _LIBC
299/* Make the weak alias. */
300# undef error
301# undef error_at_line
302weak_alias (__error, error)
303weak_alias (__error_at_line, error_at_line)
304#endif
diff --git a/lib/error.h b/lib/error.h
deleted file mode 100644
index e5220a2f..00000000
--- a/lib/error.h
+++ /dev/null
@@ -1,66 +0,0 @@
1/* Declaration for error-reporting function
2 Copyright (C) 1995, 1996, 1997, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#ifndef _ERROR_H
20#define _ERROR_H 1
21
22#ifndef __attribute__
23/* This feature is available in gcc versions 2.5 and later. */
24# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
25# define __attribute__(Spec) /* empty */
26# endif
27/* The __-protected variants of `format' and `printf' attributes
28 are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */
29# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7)
30# define __format__ format
31# define __printf__ printf
32# endif
33#endif
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38
39/* Print a message with `fprintf (stderr, FORMAT, ...)';
40 if ERRNUM is nonzero, follow it with ": " and strerror (ERRNUM).
41 If STATUS is nonzero, terminate the program with `exit (STATUS)'. */
42
43extern void error (int __status, int __errnum, const char *__format, ...)
44 __attribute__ ((__format__ (__printf__, 3, 4)));
45
46extern void error_at_line (int __status, int __errnum, const char *__fname,
47 unsigned int __lineno, const char *__format, ...)
48 __attribute__ ((__format__ (__printf__, 5, 6)));
49
50/* If NULL, error will flush stdout, then print on stderr the program
51 name, a colon and a space. Otherwise, error will call this
52 function without parameters instead. */
53extern void (*error_print_progname) (void);
54
55/* This variable is incremented each time `error' is called. */
56extern unsigned int error_message_count;
57
58/* Sometimes we want to have at most one error per line. This
59 variable controls whether this mode is selected or not. */
60extern int error_one_per_line;
61
62#ifdef __cplusplus
63}
64#endif
65
66#endif /* error.h */
diff --git a/lib/exit.h b/lib/exit.h
deleted file mode 100644
index 9dbfb7ce..00000000
--- a/lib/exit.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/* exit() function.
2 Copyright (C) 1995, 2001 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#ifndef _EXIT_H
19#define _EXIT_H
20
21/* Get exit() declaration. */
22#include <stdlib.h>
23
24/* Some systems do not define EXIT_*, even with STDC_HEADERS. */
25#ifndef EXIT_SUCCESS
26# define EXIT_SUCCESS 0
27#endif
28#ifndef EXIT_FAILURE
29# define EXIT_FAILURE 1
30#endif
31
32#endif /* _EXIT_H */
diff --git a/lib/exitfail.c b/lib/exitfail.c
deleted file mode 100644
index 27d38c32..00000000
--- a/lib/exitfail.c
+++ /dev/null
@@ -1,27 +0,0 @@
1/* Failure exit status
2
3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING.
17 If not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "exitfail.h"
25#include "exit.h"
26
27int volatile exit_failure = EXIT_FAILURE;
diff --git a/lib/exitfail.h b/lib/exitfail.h
deleted file mode 100644
index e46cf9c1..00000000
--- a/lib/exitfail.h
+++ /dev/null
@@ -1,20 +0,0 @@
1/* Failure exit status
2
3 Copyright (C) 2002 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING.
17 If not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20extern int volatile exit_failure;
diff --git a/lib/fcntl--.h b/lib/fcntl--.h
deleted file mode 100644
index 51b869e6..00000000
--- a/lib/fcntl--.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/* Like fcntl.h, but redefine some names to avoid glitches.
2
3 Copyright (C) 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#include <fcntl.h>
22#include "fcntl-safer.h"
23
24#undef open
25#define open open_safer
26
27#undef creat
28#define creat creat_safer
diff --git a/lib/fcntl-safer.h b/lib/fcntl-safer.h
deleted file mode 100644
index cab6aab1..00000000
--- a/lib/fcntl-safer.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* Invoke fcntl-like functions, but avoid some glitches.
2
3 Copyright (C) 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#include <sys/types.h>
22
23int open_safer (char const *, int, ...);
24int creat_safer (char const *, mode_t);
diff --git a/lib/fd-safer.c b/lib/fd-safer.c
deleted file mode 100644
index 5933bcbd..00000000
--- a/lib/fd-safer.c
+++ /dev/null
@@ -1,59 +0,0 @@
1/* Return a safer copy of a file descriptor.
2
3 Copyright (C) 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25#include "unistd-safer.h"
26
27#include <errno.h>
28
29#include <unistd.h>
30#ifndef STDIN_FILENO
31# define STDIN_FILENO 0
32#endif
33#ifndef STDERR_FILENO
34# define STDERR_FILENO 2
35#endif
36
37/* Return FD, unless FD would be a copy of standard input, output, or
38 error; in that case, return a duplicate of FD, closing FD. On
39 failure to duplicate, close FD, set errno, and return -1. Preserve
40 errno if FD is negative, so that the caller can always inspect
41 errno when the returned value is negative.
42
43 This function is usefully wrapped around functions that return file
44 descriptors, e.g., fd_safer (open ("file", O_RDONLY)). */
45
46int
47fd_safer (int fd)
48{
49 if (STDIN_FILENO <= fd && fd <= STDERR_FILENO)
50 {
51 int f = dup_safer (fd);
52 int e = errno;
53 close (fd);
54 errno = e;
55 fd = f;
56 }
57
58 return fd;
59}
diff --git a/lib/fsusage.c b/lib/fsusage.c
deleted file mode 100644
index b1377907..00000000
--- a/lib/fsusage.c
+++ /dev/null
@@ -1,289 +0,0 @@
1/* fsusage.c -- return space usage of mounted file systems
2
3 Copyright (C) 1991, 1992, 1996, 1998, 1999, 2002, 2003, 2004, 2005
4 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#if HAVE_INTTYPES_H
25# include <inttypes.h>
26#endif
27#if HAVE_STDINT_H
28# include <stdint.h>
29#endif
30#include <unistd.h>
31#ifndef UINTMAX_MAX
32# define UINTMAX_MAX ((uintmax_t) -1)
33#endif
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include "fsusage.h"
38
39#include <limits.h>
40
41#if HAVE_SYS_PARAM_H
42# include <sys/param.h>
43#endif
44
45#if HAVE_SYS_MOUNT_H
46# include <sys/mount.h>
47#endif
48
49#if HAVE_SYS_VFS_H
50# include <sys/vfs.h>
51#endif
52
53#if HAVE_SYS_FS_S5PARAM_H /* Fujitsu UXP/V */
54# include <sys/fs/s5param.h>
55#endif
56
57#if defined HAVE_SYS_FILSYS_H && !defined _CRAY
58# include <sys/filsys.h> /* SVR2 */
59#endif
60
61#include <fcntl.h>
62
63#if HAVE_SYS_STATFS_H
64# include <sys/statfs.h>
65#endif
66
67#if HAVE_DUSTAT_H /* AIX PS/2 */
68# include <sys/dustat.h>
69#endif
70
71#if HAVE_SYS_STATVFS_H /* SVR4 */
72# include <sys/statvfs.h>
73#endif
74
75#include "full-read.h"
76
77/* Many space usage primitives use all 1 bits to denote a value that is
78 not applicable or unknown. Propagate this information by returning
79 a uintmax_t value that is all 1 bits if X is all 1 bits, even if X
80 is unsigned and narrower than uintmax_t. */
81#define PROPAGATE_ALL_ONES(x) \
82 ((sizeof (x) < sizeof (uintmax_t) \
83 && (~ (x) == (sizeof (x) < sizeof (int) \
84 ? - (1 << (sizeof (x) * CHAR_BIT)) \
85 : 0))) \
86 ? UINTMAX_MAX : (x))
87
88/* Extract the top bit of X as an uintmax_t value. */
89#define EXTRACT_TOP_BIT(x) ((x) \
90 & ((uintmax_t) 1 << (sizeof (x) * CHAR_BIT - 1)))
91
92/* If a value is negative, many space usage primitives store it into an
93 integer variable by assignment, even if the variable's type is unsigned.
94 So, if a space usage variable X's top bit is set, convert X to the
95 uintmax_t value V such that (- (uintmax_t) V) is the negative of
96 the original value. If X's top bit is clear, just yield X.
97 Use PROPAGATE_TOP_BIT if the original value might be negative;
98 otherwise, use PROPAGATE_ALL_ONES. */
99#define PROPAGATE_TOP_BIT(x) ((x) | ~ (EXTRACT_TOP_BIT (x) - 1))
100
101/* Fill in the fields of FSP with information about space usage for
102 the file system on which FILE resides.
103 DISK is the device on which FILE is mounted, for space-getting
104 methods that need to know it.
105 Return 0 if successful, -1 if not. When returning -1, ensure that
106 ERRNO is either a system error value, or zero if DISK is NULL
107 on a system that requires a non-NULL value. */
108int
109get_fs_usage (char const *file, char const *disk, struct fs_usage *fsp)
110{
111#ifdef STAT_STATFS3_OSF1
112
113 struct statfs fsd;
114
115 if (statfs (file, &fsd, sizeof (struct statfs)) != 0)
116 return -1;
117
118 fsp->fsu_blocksize = PROPAGATE_ALL_ONES (fsd.f_fsize);
119
120#endif /* STAT_STATFS3_OSF1 */
121
122#ifdef STAT_STATFS2_FS_DATA /* Ultrix */
123
124 struct fs_data fsd;
125
126 if (statfs (file, &fsd) != 1)
127 return -1;
128
129 fsp->fsu_blocksize = 1024;
130 fsp->fsu_blocks = PROPAGATE_ALL_ONES (fsd.fd_req.btot);
131 fsp->fsu_bfree = PROPAGATE_ALL_ONES (fsd.fd_req.bfree);
132 fsp->fsu_bavail = PROPAGATE_TOP_BIT (fsd.fd_req.bfreen);
133 fsp->fsu_bavail_top_bit_set = EXTRACT_TOP_BIT (fsd.fd_req.bfreen) != 0;
134 fsp->fsu_files = PROPAGATE_ALL_ONES (fsd.fd_req.gtot);
135 fsp->fsu_ffree = PROPAGATE_ALL_ONES (fsd.fd_req.gfree);
136
137#endif /* STAT_STATFS2_FS_DATA */
138
139#ifdef STAT_READ_FILSYS /* SVR2 */
140# ifndef SUPERBOFF
141# define SUPERBOFF (SUPERB * 512)
142# endif
143
144 struct filsys fsd;
145 int fd;
146
147 if (! disk)
148 {
149 errno = 0;
150 return -1;
151 }
152
153 fd = open (disk, O_RDONLY);
154 if (fd < 0)
155 return -1;
156 lseek (fd, (off_t) SUPERBOFF, 0);
157 if (full_read (fd, (char *) &fsd, sizeof fsd) != sizeof fsd)
158 {
159 close (fd);
160 return -1;
161 }
162 close (fd);
163
164 fsp->fsu_blocksize = (fsd.s_type == Fs2b ? 1024 : 512);
165 fsp->fsu_blocks = PROPAGATE_ALL_ONES (fsd.s_fsize);
166 fsp->fsu_bfree = PROPAGATE_ALL_ONES (fsd.s_tfree);
167 fsp->fsu_bavail = PROPAGATE_TOP_BIT (fsd.s_tfree);
168 fsp->fsu_bavail_top_bit_set = EXTRACT_TOP_BIT (fsd.s_tfree) != 0;
169 fsp->fsu_files = (fsd.s_isize == -1
170 ? UINTMAX_MAX
171 : (fsd.s_isize - 2) * INOPB * (fsd.s_type == Fs2b ? 2 : 1));
172 fsp->fsu_ffree = PROPAGATE_ALL_ONES (fsd.s_tinode);
173
174#endif /* STAT_READ_FILSYS */
175
176#ifdef STAT_STATFS2_BSIZE /* 4.3BSD, SunOS 4, HP-UX, AIX */
177
178 struct statfs fsd;
179
180 if (statfs (file, &fsd) < 0)
181 return -1;
182
183 fsp->fsu_blocksize = PROPAGATE_ALL_ONES (fsd.f_bsize);
184
185# ifdef STATFS_TRUNCATES_BLOCK_COUNTS
186
187 /* In SunOS 4.1.2, 4.1.3, and 4.1.3_U1, the block counts in the
188 struct statfs are truncated to 2GB. These conditions detect that
189 truncation, presumably without botching the 4.1.1 case, in which
190 the values are not truncated. The correct counts are stored in
191 undocumented spare fields. */
192 if (fsd.f_blocks == 0x7fffffff / fsd.f_bsize && fsd.f_spare[0] > 0)
193 {
194 fsd.f_blocks = fsd.f_spare[0];
195 fsd.f_bfree = fsd.f_spare[1];
196 fsd.f_bavail = fsd.f_spare[2];
197 }
198# endif /* STATFS_TRUNCATES_BLOCK_COUNTS */
199
200#endif /* STAT_STATFS2_BSIZE */
201
202#ifdef STAT_STATFS2_FSIZE /* 4.4BSD */
203
204 struct statfs fsd;
205
206 if (statfs (file, &fsd) < 0)
207 return -1;
208
209 fsp->fsu_blocksize = PROPAGATE_ALL_ONES (fsd.f_fsize);
210
211#endif /* STAT_STATFS2_FSIZE */
212
213#ifdef STAT_STATFS4 /* SVR3, Dynix, Irix, AIX */
214
215# if !_AIX && !defined _SEQUENT_ && !defined DOLPHIN
216# define f_bavail f_bfree
217# endif
218
219 struct statfs fsd;
220
221 if (statfs (file, &fsd, sizeof fsd, 0) < 0)
222 return -1;
223
224 /* Empirically, the block counts on most SVR3 and SVR3-derived
225 systems seem to always be in terms of 512-byte blocks,
226 no matter what value f_bsize has. */
227# if _AIX || defined _CRAY
228 fsp->fsu_blocksize = PROPAGATE_ALL_ONES (fsd.f_bsize);
229# else
230 fsp->fsu_blocksize = 512;
231# endif
232
233#endif /* STAT_STATFS4 */
234
235#ifdef STAT_STATVFS /* SVR4 */
236
237 struct statvfs fsd;
238
239 if (statvfs (file, &fsd) < 0)
240 return -1;
241
242 /* f_frsize isn't guaranteed to be supported. */
243 fsp->fsu_blocksize = (fsd.f_frsize
244 ? PROPAGATE_ALL_ONES (fsd.f_frsize)
245 : PROPAGATE_ALL_ONES (fsd.f_bsize));
246
247#endif /* STAT_STATVFS */
248
249#if !defined STAT_STATFS2_FS_DATA && !defined STAT_READ_FILSYS
250 /* !Ultrix && !SVR2 */
251
252 fsp->fsu_blocks = PROPAGATE_ALL_ONES (fsd.f_blocks);
253 fsp->fsu_bfree = PROPAGATE_ALL_ONES (fsd.f_bfree);
254 fsp->fsu_bavail = PROPAGATE_TOP_BIT (fsd.f_bavail);
255 fsp->fsu_bavail_top_bit_set = EXTRACT_TOP_BIT (fsd.f_bavail) != 0;
256 fsp->fsu_files = PROPAGATE_ALL_ONES (fsd.f_files);
257 fsp->fsu_ffree = PROPAGATE_ALL_ONES (fsd.f_ffree);
258
259#endif /* not STAT_STATFS2_FS_DATA && not STAT_READ_FILSYS */
260
261 return 0;
262}
263
264#if defined _AIX && defined _I386
265/* AIX PS/2 does not supply statfs. */
266
267int
268statfs (char *file, struct statfs *fsb)
269{
270 struct stat stats;
271 struct dustat fsd;
272
273 if (stat (file, &stats) != 0)
274 return -1;
275 if (dustat (stats.st_dev, 0, &fsd, sizeof (fsd)))
276 return -1;
277 fsb->f_type = 0;
278 fsb->f_bsize = fsd.du_bsize;
279 fsb->f_blocks = fsd.du_fsize - fsd.du_isize;
280 fsb->f_bfree = fsd.du_tfree;
281 fsb->f_bavail = fsd.du_tfree;
282 fsb->f_files = (fsd.du_isize - 2) * fsd.du_inopb;
283 fsb->f_ffree = fsd.du_tinode;
284 fsb->f_fsid.val[0] = fsd.du_site;
285 fsb->f_fsid.val[1] = fsd.du_pckno;
286 return 0;
287}
288
289#endif /* _AIX && _I386 */
diff --git a/lib/fsusage.h b/lib/fsusage.h
deleted file mode 100644
index f3eda2d0..00000000
--- a/lib/fsusage.h
+++ /dev/null
@@ -1,40 +0,0 @@
1/* fsusage.h -- declarations for file system space usage info
2
3 Copyright (C) 1991, 1992, 1997, 2003, 2004, 2005 Free Software
4 Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20/* Space usage statistics for a file system. Blocks are 512-byte. */
21
22#if !defined FSUSAGE_H_
23# define FSUSAGE_H_
24
25# include <stdbool.h>
26
27struct fs_usage
28{
29 uintmax_t fsu_blocksize; /* Size of a block. */
30 uintmax_t fsu_blocks; /* Total blocks. */
31 uintmax_t fsu_bfree; /* Free blocks available to superuser. */
32 uintmax_t fsu_bavail; /* Free blocks available to non-superuser. */
33 bool fsu_bavail_top_bit_set; /* 1 if fsu_bavail represents a value < 0. */
34 uintmax_t fsu_files; /* Total file nodes. */
35 uintmax_t fsu_ffree; /* Free file nodes. */
36};
37
38int get_fs_usage (char const *file, char const *disk, struct fs_usage *fsp);
39
40#endif
diff --git a/lib/full-read.c b/lib/full-read.c
deleted file mode 100644
index 8c3472a4..00000000
--- a/lib/full-read.c
+++ /dev/null
@@ -1,19 +0,0 @@
1/* An interface to read that retries after partial reads and interrupts.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#define FULL_READ
19#include "full-write.c"
diff --git a/lib/full-read.h b/lib/full-read.h
deleted file mode 100644
index 05d83a76..00000000
--- a/lib/full-read.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/* An interface to read() that reads all it is asked to read.
2
3 Copyright (C) 2002 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, read to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#include <stddef.h>
20
21/* Read COUNT bytes at BUF to descriptor FD, retrying if interrupted
22 or if partial reads occur. Return the number of bytes successfully
23 read, setting errno if that is less than COUNT. errno = 0 means EOF. */
24extern size_t full_read (int fd, void *buf, size_t count);
diff --git a/lib/full-write.c b/lib/full-write.c
deleted file mode 100644
index 106f3c7e..00000000
--- a/lib/full-write.c
+++ /dev/null
@@ -1,83 +0,0 @@
1/* An interface to read and write that retries (if necessary) until complete.
2
3 Copyright (C) 1993, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 2004 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24/* Specification. */
25#ifdef FULL_READ
26# include "full-read.h"
27#else
28# include "full-write.h"
29#endif
30
31#include <errno.h>
32
33#ifdef FULL_READ
34# include "safe-read.h"
35# define safe_rw safe_read
36# define full_rw full_read
37# undef const
38# define const /* empty */
39#else
40# include "safe-write.h"
41# define safe_rw safe_write
42# define full_rw full_write
43#endif
44
45#ifdef FULL_READ
46/* Set errno to zero upon EOF. */
47# define ZERO_BYTE_TRANSFER_ERRNO 0
48#else
49/* Some buggy drivers return 0 when one tries to write beyond
50 a device's end. (Example: Linux 1.2.13 on /dev/fd0.)
51 Set errno to ENOSPC so they get a sensible diagnostic. */
52# define ZERO_BYTE_TRANSFER_ERRNO ENOSPC
53#endif
54
55/* Write(read) COUNT bytes at BUF to(from) descriptor FD, retrying if
56 interrupted or if a partial write(read) occurs. Return the number
57 of bytes transferred.
58 When writing, set errno if fewer than COUNT bytes are written.
59 When reading, if fewer than COUNT bytes are read, you must examine
60 errno to distinguish failure from EOF (errno == 0). */
61size_t
62full_rw (int fd, const void *buf, size_t count)
63{
64 size_t total = 0;
65 const char *ptr = buf;
66
67 while (count > 0)
68 {
69 size_t n_rw = safe_rw (fd, ptr, count);
70 if (n_rw == (size_t) -1)
71 break;
72 if (n_rw == 0)
73 {
74 errno = ZERO_BYTE_TRANSFER_ERRNO;
75 break;
76 }
77 total += n_rw;
78 ptr += n_rw;
79 count -= n_rw;
80 }
81
82 return total;
83}
diff --git a/lib/full-write.h b/lib/full-write.h
deleted file mode 100644
index d20d2fe4..00000000
--- a/lib/full-write.h
+++ /dev/null
@@ -1,35 +0,0 @@
1/* An interface to write() that writes all it is asked to write.
2
3 Copyright (C) 2002-2003 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#include <stddef.h>
20
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26
27/* Write COUNT bytes at BUF to descriptor FD, retrying if interrupted
28 or if partial writes occur. Return the number of bytes successfully
29 written, setting errno if that is less than COUNT. */
30extern size_t full_write (int fd, const void *buf, size_t count);
31
32
33#ifdef __cplusplus
34}
35#endif
diff --git a/lib/getloadavg.c b/lib/getloadavg.c
deleted file mode 100644
index 2a501314..00000000
--- a/lib/getloadavg.c
+++ /dev/null
@@ -1,1017 +0,0 @@
1/* Get the system load averages.
2
3 Copyright (C) 1985, 1986, 1987, 1988, 1989, 1991, 1992, 1993, 1994,
4 1995, 1997, 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
5
6 NOTE: The canonical source of this file is maintained with gnulib.
7 Bugs can be reported to bug-gnulib@gnu.org.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
22 USA. */
23
24/* Compile-time symbols that this file uses:
25
26 HAVE_PSTAT_GETDYNAMIC Define this if your system has the
27 pstat_getdynamic function. I think it
28 is unique to HPUX9. The best way to get the
29 definition is through the AC_FUNC_GETLOADAVG
30 macro that comes with autoconf 2.13 or newer.
31 If that isn't an option, then just put
32 AC_CHECK_FUNCS(pstat_getdynamic) in your
33 configure.in file.
34 FIXUP_KERNEL_SYMBOL_ADDR() Adjust address in returned struct nlist.
35 KERNEL_FILE Name of the kernel file to nlist.
36 LDAV_CVT() Scale the load average from the kernel.
37 Returns a double.
38 LDAV_SYMBOL Name of kernel symbol giving load average.
39 LOAD_AVE_TYPE Type of the load average array in the kernel.
40 Must be defined unless one of
41 apollo, DGUX, NeXT, or UMAX is defined;
42 or we have libkstat;
43 otherwise, no load average is available.
44 HAVE_NLIST_H nlist.h is available. NLIST_STRUCT defaults
45 to this.
46 NLIST_STRUCT Include nlist.h, not a.out.h, and
47 the nlist n_name element is a pointer,
48 not an array.
49 HAVE_STRUCT_NLIST_N_UN_N_NAME `n_un.n_name' is member of `struct nlist'.
50 LINUX_LDAV_FILE [__linux__, __CYGWIN__]: File containing
51 load averages.
52
53 Specific system predefines this file uses, aside from setting
54 default values if not emacs:
55
56 apollo
57 BSD Real BSD, not just BSD-like.
58 convex
59 DGUX
60 eunice UNIX emulator under VMS.
61 hpux
62 __MSDOS__ No-op for MSDOS.
63 NeXT
64 sgi
65 sequent Sequent Dynix 3.x.x (BSD)
66 _SEQUENT_ Sequent DYNIX/ptx 1.x.x (SYSV)
67 sony_news NEWS-OS (works at least for 4.1C)
68 UMAX
69 UMAX4_3
70 VMS
71 WINDOWS32 No-op for Windows95/NT.
72 __linux__ Linux: assumes /proc file system mounted.
73 Support from Michael K. Johnson.
74 __CYGWIN__ Cygwin emulates linux /proc/loadavg.
75 __NetBSD__ NetBSD: assumes /kern file system mounted.
76
77 In addition, to avoid nesting many #ifdefs, we internally set
78 LDAV_DONE to indicate that the load average has been computed.
79
80 We also #define LDAV_PRIVILEGED if a program will require
81 special installation to be able to call getloadavg. */
82
83/* This should always be first. */
84#ifdef HAVE_CONFIG_H
85# include <config.h>
86#endif
87
88#include <errno.h>
89#include <stdbool.h>
90#include <stdio.h>
91#include <stdlib.h>
92
93/* Exclude all the code except the test program at the end
94 if the system has its own `getloadavg' function. */
95
96#ifndef HAVE_GETLOADAVG
97
98# include <sys/types.h>
99
100/* Both the Emacs and non-Emacs sections want this. Some
101 configuration files' definitions for the LOAD_AVE_CVT macro (like
102 sparc.h's) use macros like FSCALE, defined here. */
103# if defined (unix) || defined (__unix)
104# include <sys/param.h>
105# endif
106
107# include "c-strtod.h"
108# include "cloexec.h"
109# include "intprops.h"
110# include "xalloc.h"
111
112/* The existing Emacs configuration files define a macro called
113 LOAD_AVE_CVT, which accepts a value of type LOAD_AVE_TYPE, and
114 returns the load average multiplied by 100. What we actually want
115 is a macro called LDAV_CVT, which returns the load average as an
116 unmultiplied double.
117
118 For backwards compatibility, we'll define LDAV_CVT in terms of
119 LOAD_AVE_CVT, but future machine config files should just define
120 LDAV_CVT directly. */
121
122# if !defined (LDAV_CVT) && defined (LOAD_AVE_CVT)
123# define LDAV_CVT(n) (LOAD_AVE_CVT (n) / 100.0)
124# endif
125
126# if !defined (BSD) && defined (ultrix)
127/* Ultrix behaves like BSD on Vaxen. */
128# define BSD
129# endif
130
131# ifdef NeXT
132/* NeXT in the 2.{0,1,2} releases defines BSD in <sys/param.h>, which
133 conflicts with the definition understood in this file, that this
134 really is BSD. */
135# undef BSD
136
137/* NeXT defines FSCALE in <sys/param.h>. However, we take FSCALE being
138 defined to mean that the nlist method should be used, which is not true. */
139# undef FSCALE
140# endif
141
142/* Same issues as for NeXT apply to the HURD-based GNU system. */
143# ifdef __GNU__
144# undef BSD
145# undef FSCALE
146# endif /* __GNU__ */
147
148/* Set values that are different from the defaults, which are
149 set a little farther down with #ifndef. */
150
151
152/* Some shorthands. */
153
154# if defined (HPUX) && !defined (hpux)
155# define hpux
156# endif
157
158# if defined (__hpux) && !defined (hpux)
159# define hpux
160# endif
161
162# if defined (__sun) && !defined (sun)
163# define sun
164# endif
165
166# if defined (hp300) && !defined (hpux)
167# define MORE_BSD
168# endif
169
170# if defined (ultrix) && defined (mips)
171# define decstation
172# endif
173
174# if defined (__SVR4) && !defined (SVR4)
175# define SVR4
176# endif
177
178# if (defined (sun) && defined (SVR4)) || defined (SOLARIS2)
179# define SUNOS_5
180# endif
181
182# if defined (__osf__) && (defined (__alpha) || defined (__alpha__))
183# define OSF_ALPHA
184# include <sys/mbuf.h>
185# include <sys/socket.h>
186# include <net/route.h>
187# include <sys/table.h>
188# endif
189
190# if defined (__osf__) && (defined (mips) || defined (__mips__))
191# define OSF_MIPS
192# include <sys/table.h>
193# endif
194
195/* UTek's /bin/cc on the 4300 has no architecture specific cpp define by
196 default, but _MACH_IND_SYS_TYPES is defined in <sys/types.h>. Combine
197 that with a couple of other things and we'll have a unique match. */
198# if !defined (tek4300) && defined (unix) && defined (m68k) && defined (mc68000) && defined (mc68020) && defined (_MACH_IND_SYS_TYPES)
199# define tek4300 /* Define by emacs, but not by other users. */
200# endif
201
202
203/* VAX C can't handle multi-line #ifs, or lines longer than 256 chars. */
204# ifndef LOAD_AVE_TYPE
205
206# ifdef MORE_BSD
207# define LOAD_AVE_TYPE long
208# endif
209
210# ifdef sun
211# define LOAD_AVE_TYPE long
212# endif
213
214# ifdef decstation
215# define LOAD_AVE_TYPE long
216# endif
217
218# ifdef _SEQUENT_
219# define LOAD_AVE_TYPE long
220# endif
221
222# ifdef sgi
223# define LOAD_AVE_TYPE long
224# endif
225
226# ifdef SVR4
227# define LOAD_AVE_TYPE long
228# endif
229
230# ifdef sony_news
231# define LOAD_AVE_TYPE long
232# endif
233
234# ifdef sequent
235# define LOAD_AVE_TYPE long
236# endif
237
238# ifdef OSF_ALPHA
239# define LOAD_AVE_TYPE long
240# endif
241
242# if defined (ardent) && defined (titan)
243# define LOAD_AVE_TYPE long
244# endif
245
246# ifdef tek4300
247# define LOAD_AVE_TYPE long
248# endif
249
250# if defined (alliant) && defined (i860) /* Alliant FX/2800 */
251# define LOAD_AVE_TYPE long
252# endif
253
254# ifdef _AIX
255# define LOAD_AVE_TYPE long
256# endif
257
258# ifdef convex
259# define LOAD_AVE_TYPE double
260# ifndef LDAV_CVT
261# define LDAV_CVT(n) (n)
262# endif
263# endif
264
265# endif /* No LOAD_AVE_TYPE. */
266
267# ifdef OSF_ALPHA
268/* <sys/param.h> defines an incorrect value for FSCALE on Alpha OSF/1,
269 according to ghazi@noc.rutgers.edu. */
270# undef FSCALE
271# define FSCALE 1024.0
272# endif
273
274# if defined (alliant) && defined (i860) /* Alliant FX/2800 */
275/* <sys/param.h> defines an incorrect value for FSCALE on an
276 Alliant FX/2800 Concentrix 2.2, according to ghazi@noc.rutgers.edu. */
277# undef FSCALE
278# define FSCALE 100.0
279# endif
280
281
282# ifndef FSCALE
283
284/* SunOS and some others define FSCALE in sys/param.h. */
285
286# ifdef MORE_BSD
287# define FSCALE 2048.0
288# endif
289
290# if defined (MIPS) || defined (SVR4) || defined (decstation)
291# define FSCALE 256
292# endif
293
294# if defined (sgi) || defined (sequent)
295/* Sometimes both MIPS and sgi are defined, so FSCALE was just defined
296 above under #ifdef MIPS. But we want the sgi value. */
297# undef FSCALE
298# define FSCALE 1000.0
299# endif
300
301# if defined (ardent) && defined (titan)
302# define FSCALE 65536.0
303# endif
304
305# ifdef tek4300
306# define FSCALE 100.0
307# endif
308
309# ifdef _AIX
310# define FSCALE 65536.0
311# endif
312
313# endif /* Not FSCALE. */
314
315# if !defined (LDAV_CVT) && defined (FSCALE)
316# define LDAV_CVT(n) (((double) (n)) / FSCALE)
317# endif
318
319# ifndef NLIST_STRUCT
320# if HAVE_NLIST_H
321# define NLIST_STRUCT
322# endif
323# endif
324
325# if defined (sgi) || (defined (mips) && !defined (BSD))
326# define FIXUP_KERNEL_SYMBOL_ADDR(nl) ((nl)[0].n_value &= ~(1 << 31))
327# endif
328
329
330# if !defined (KERNEL_FILE) && defined (sequent)
331# define KERNEL_FILE "/dynix"
332# endif
333
334# if !defined (KERNEL_FILE) && defined (hpux)
335# define KERNEL_FILE "/hp-ux"
336# endif
337
338# if !defined (KERNEL_FILE) && (defined (_SEQUENT_) || defined (MIPS) || defined (SVR4) || defined (ISC) || defined (sgi) || (defined (ardent) && defined (titan)))
339# define KERNEL_FILE "/unix"
340# endif
341
342
343# if !defined (LDAV_SYMBOL) && defined (alliant)
344# define LDAV_SYMBOL "_Loadavg"
345# endif
346
347# if !defined (LDAV_SYMBOL) && ((defined (hpux) && !defined (hp9000s300)) || defined (_SEQUENT_) || defined (SVR4) || defined (ISC) || defined (sgi) || (defined (ardent) && defined (titan)) || defined (_AIX))
348# define LDAV_SYMBOL "avenrun"
349# endif
350
351# include <unistd.h>
352
353/* LOAD_AVE_TYPE should only get defined if we're going to use the
354 nlist method. */
355# if !defined (LOAD_AVE_TYPE) && (defined (BSD) || defined (LDAV_CVT) || defined (KERNEL_FILE) || defined (LDAV_SYMBOL))
356# define LOAD_AVE_TYPE double
357# endif
358
359# ifdef LOAD_AVE_TYPE
360
361# ifndef VMS
362# ifndef __linux__
363# ifndef NLIST_STRUCT
364# include <a.out.h>
365# else /* NLIST_STRUCT */
366# include <nlist.h>
367# endif /* NLIST_STRUCT */
368
369# ifdef SUNOS_5
370# include <fcntl.h>
371# include <kvm.h>
372# include <kstat.h>
373# endif
374
375# if defined (hpux) && defined (HAVE_PSTAT_GETDYNAMIC)
376# include <sys/pstat.h>
377# endif
378
379# ifndef KERNEL_FILE
380# define KERNEL_FILE "/vmunix"
381# endif /* KERNEL_FILE */
382
383# ifndef LDAV_SYMBOL
384# define LDAV_SYMBOL "_avenrun"
385# endif /* LDAV_SYMBOL */
386# endif /* __linux__ */
387
388# else /* VMS */
389
390# ifndef eunice
391# include <iodef.h>
392# include <descrip.h>
393# else /* eunice */
394# include <vms/iodef.h>
395# endif /* eunice */
396# endif /* VMS */
397
398# ifndef LDAV_CVT
399# define LDAV_CVT(n) ((double) (n))
400# endif /* !LDAV_CVT */
401
402# endif /* LOAD_AVE_TYPE */
403
404# if defined (__GNU__) && !defined (NeXT)
405/* Note that NeXT Openstep defines __GNU__ even though it should not. */
406/* GNU system acts much like NeXT, for load average purposes,
407 but not exactly. */
408# define NeXT
409# define host_self mach_host_self
410# endif
411
412# ifdef NeXT
413# ifdef HAVE_MACH_MACH_H
414# include <mach/mach.h>
415# else
416# include <mach.h>
417# endif
418# endif /* NeXT */
419
420# ifdef sgi
421# include <sys/sysmp.h>
422# endif /* sgi */
423
424# ifdef UMAX
425# include <signal.h>
426# include <sys/time.h>
427# include <sys/wait.h>
428# include <sys/syscall.h>
429
430# ifdef UMAX_43
431# include <machine/cpu.h>
432# include <inq_stats/statistics.h>
433# include <inq_stats/sysstats.h>
434# include <inq_stats/cpustats.h>
435# include <inq_stats/procstats.h>
436# else /* Not UMAX_43. */
437# include <sys/sysdefs.h>
438# include <sys/statistics.h>
439# include <sys/sysstats.h>
440# include <sys/cpudefs.h>
441# include <sys/cpustats.h>
442# include <sys/procstats.h>
443# endif /* Not UMAX_43. */
444# endif /* UMAX */
445
446# ifdef DGUX
447# include <sys/dg_sys_info.h>
448# endif
449
450# include "fcntl--.h"
451
452/* Avoid static vars inside a function since in HPUX they dump as pure. */
453
454# ifdef NeXT
455static processor_set_t default_set;
456static bool getloadavg_initialized;
457# endif /* NeXT */
458
459# ifdef UMAX
460static unsigned int cpus = 0;
461static unsigned int samples;
462# endif /* UMAX */
463
464# ifdef DGUX
465static struct dg_sys_info_load_info load_info; /* what-a-mouthful! */
466# endif /* DGUX */
467
468# if !defined (HAVE_LIBKSTAT) && defined (LOAD_AVE_TYPE)
469/* File descriptor open to /dev/kmem or VMS load ave driver. */
470static int channel;
471/* True iff channel is valid. */
472static bool getloadavg_initialized;
473/* Offset in kmem to seek to read load average, or 0 means invalid. */
474static long offset;
475
476# if !defined (VMS) && !defined (sgi) && !defined (__linux__)
477static struct nlist nl[2];
478# endif /* Not VMS or sgi */
479
480# ifdef SUNOS_5
481static kvm_t *kd;
482# endif /* SUNOS_5 */
483
484# endif /* LOAD_AVE_TYPE && !HAVE_LIBKSTAT */
485
486/* Put the 1 minute, 5 minute and 15 minute load averages
487 into the first NELEM elements of LOADAVG.
488 Return the number written (never more than 3, but may be less than NELEM),
489 or -1 if an error occurred. */
490
491int
492getloadavg (double loadavg[], int nelem)
493{
494 int elem = 0; /* Return value. */
495
496# ifdef NO_GET_LOAD_AVG
497# define LDAV_DONE
498 /* Set errno to zero to indicate that there was no particular error;
499 this function just can't work at all on this system. */
500 errno = 0;
501 elem = -1;
502# endif
503
504# if !defined (LDAV_DONE) && defined (HAVE_LIBKSTAT)
505/* Use libkstat because we don't have to be root. */
506# define LDAV_DONE
507 kstat_ctl_t *kc;
508 kstat_t *ksp;
509 kstat_named_t *kn;
510
511 kc = kstat_open ();
512 if (kc == 0)
513 return -1;
514 ksp = kstat_lookup (kc, "unix", 0, "system_misc");
515 if (ksp == 0)
516 return -1;
517 if (kstat_read (kc, ksp, 0) == -1)
518 return -1;
519
520
521 kn = kstat_data_lookup (ksp, "avenrun_1min");
522 if (kn == 0)
523 {
524 /* Return -1 if no load average information is available. */
525 nelem = 0;
526 elem = -1;
527 }
528
529 if (nelem >= 1)
530 loadavg[elem++] = (double) kn->value.ul / FSCALE;
531
532 if (nelem >= 2)
533 {
534 kn = kstat_data_lookup (ksp, "avenrun_5min");
535 if (kn != 0)
536 {
537 loadavg[elem++] = (double) kn->value.ul / FSCALE;
538
539 if (nelem >= 3)
540 {
541 kn = kstat_data_lookup (ksp, "avenrun_15min");
542 if (kn != 0)
543 loadavg[elem++] = (double) kn->value.ul / FSCALE;
544 }
545 }
546 }
547
548 kstat_close (kc);
549# endif /* HAVE_LIBKSTAT */
550
551# if !defined (LDAV_DONE) && defined (hpux) && defined (HAVE_PSTAT_GETDYNAMIC)
552/* Use pstat_getdynamic() because we don't have to be root. */
553# define LDAV_DONE
554# undef LOAD_AVE_TYPE
555
556 struct pst_dynamic dyn_info;
557 if (pstat_getdynamic (&dyn_info, sizeof (dyn_info), 0, 0) < 0)
558 return -1;
559 if (nelem > 0)
560 loadavg[elem++] = dyn_info.psd_avg_1_min;
561 if (nelem > 1)
562 loadavg[elem++] = dyn_info.psd_avg_5_min;
563 if (nelem > 2)
564 loadavg[elem++] = dyn_info.psd_avg_15_min;
565
566# endif /* hpux && HAVE_PSTAT_GETDYNAMIC */
567
568# if !defined (LDAV_DONE) && (defined (__linux__) || defined (__CYGWIN__))
569# define LDAV_DONE
570# undef LOAD_AVE_TYPE
571
572# ifndef LINUX_LDAV_FILE
573# define LINUX_LDAV_FILE "/proc/loadavg"
574# endif
575
576 char ldavgbuf[3 * (INT_STRLEN_BOUND (int) + sizeof ".00 ")];
577 char const *ptr = ldavgbuf;
578 int fd, count;
579
580 fd = open (LINUX_LDAV_FILE, O_RDONLY);
581 if (fd == -1)
582 return -1;
583 count = read (fd, ldavgbuf, sizeof ldavgbuf - 1);
584 (void) close (fd);
585 if (count <= 0)
586 return -1;
587 ldavgbuf[count] = '\0';
588
589 for (elem = 0; elem < nelem; elem++)
590 {
591 char *endptr;
592 double d = c_strtod (ptr, &endptr);
593 if (ptr == endptr)
594 {
595 if (elem == 0)
596 return -1;
597 break;
598 }
599 loadavg[elem] = d;
600 ptr = endptr;
601 }
602
603 return elem;
604
605# endif /* __linux__ || __CYGWIN__ */
606
607# if !defined (LDAV_DONE) && defined (__NetBSD__)
608# define LDAV_DONE
609# undef LOAD_AVE_TYPE
610
611# ifndef NETBSD_LDAV_FILE
612# define NETBSD_LDAV_FILE "/kern/loadavg"
613# endif
614
615 unsigned long int load_ave[3], scale;
616 int count;
617 FILE *fp;
618
619 fp = fopen (NETBSD_LDAV_FILE, "r");
620 if (fp == NULL)
621 return -1;
622 count = fscanf (fp, "%lu %lu %lu %lu\n",
623 &load_ave[0], &load_ave[1], &load_ave[2],
624 &scale);
625 (void) fclose (fp);
626 if (count != 4)
627 return -1;
628
629 for (elem = 0; elem < nelem; elem++)
630 loadavg[elem] = (double) load_ave[elem] / (double) scale;
631
632 return elem;
633
634# endif /* __NetBSD__ */
635
636# if !defined (LDAV_DONE) && defined (NeXT)
637# define LDAV_DONE
638 /* The NeXT code was adapted from iscreen 3.2. */
639
640 host_t host;
641 struct processor_set_basic_info info;
642 unsigned int info_count;
643
644 /* We only know how to get the 1-minute average for this system,
645 so even if the caller asks for more than 1, we only return 1. */
646
647 if (!getloadavg_initialized)
648 {
649 if (processor_set_default (host_self (), &default_set) == KERN_SUCCESS)
650 getloadavg_initialized = true;
651 }
652
653 if (getloadavg_initialized)
654 {
655 info_count = PROCESSOR_SET_BASIC_INFO_COUNT;
656 if (processor_set_info (default_set, PROCESSOR_SET_BASIC_INFO, &host,
657 (processor_set_info_t) &info, &info_count)
658 != KERN_SUCCESS)
659 getloadavg_initialized = false;
660 else
661 {
662 if (nelem > 0)
663 loadavg[elem++] = (double) info.load_average / LOAD_SCALE;
664 }
665 }
666
667 if (!getloadavg_initialized)
668 return -1;
669# endif /* NeXT */
670
671# if !defined (LDAV_DONE) && defined (UMAX)
672# define LDAV_DONE
673/* UMAX 4.2, which runs on the Encore Multimax multiprocessor, does not
674 have a /dev/kmem. Information about the workings of the running kernel
675 can be gathered with inq_stats system calls.
676 We only know how to get the 1-minute average for this system. */
677
678 struct proc_summary proc_sum_data;
679 struct stat_descr proc_info;
680 double load;
681 register unsigned int i, j;
682
683 if (cpus == 0)
684 {
685 register unsigned int c, i;
686 struct cpu_config conf;
687 struct stat_descr desc;
688
689 desc.sd_next = 0;
690 desc.sd_subsys = SUBSYS_CPU;
691 desc.sd_type = CPUTYPE_CONFIG;
692 desc.sd_addr = (char *) &conf;
693 desc.sd_size = sizeof conf;
694
695 if (inq_stats (1, &desc))
696 return -1;
697
698 c = 0;
699 for (i = 0; i < conf.config_maxclass; ++i)
700 {
701 struct class_stats stats;
702 bzero ((char *) &stats, sizeof stats);
703
704 desc.sd_type = CPUTYPE_CLASS;
705 desc.sd_objid = i;
706 desc.sd_addr = (char *) &stats;
707 desc.sd_size = sizeof stats;
708
709 if (inq_stats (1, &desc))
710 return -1;
711
712 c += stats.class_numcpus;
713 }
714 cpus = c;
715 samples = cpus < 2 ? 3 : (2 * cpus / 3);
716 }
717
718 proc_info.sd_next = 0;
719 proc_info.sd_subsys = SUBSYS_PROC;
720 proc_info.sd_type = PROCTYPE_SUMMARY;
721 proc_info.sd_addr = (char *) &proc_sum_data;
722 proc_info.sd_size = sizeof (struct proc_summary);
723 proc_info.sd_sizeused = 0;
724
725 if (inq_stats (1, &proc_info) != 0)
726 return -1;
727
728 load = proc_sum_data.ps_nrunnable;
729 j = 0;
730 for (i = samples - 1; i > 0; --i)
731 {
732 load += proc_sum_data.ps_nrun[j];
733 if (j++ == PS_NRUNSIZE)
734 j = 0;
735 }
736
737 if (nelem > 0)
738 loadavg[elem++] = load / samples / cpus;
739# endif /* UMAX */
740
741# if !defined (LDAV_DONE) && defined (DGUX)
742# define LDAV_DONE
743 /* This call can return -1 for an error, but with good args
744 it's not supposed to fail. The first argument is for no
745 apparent reason of type `long int *'. */
746 dg_sys_info ((long int *) &load_info,
747 DG_SYS_INFO_LOAD_INFO_TYPE,
748 DG_SYS_INFO_LOAD_VERSION_0);
749
750 if (nelem > 0)
751 loadavg[elem++] = load_info.one_minute;
752 if (nelem > 1)
753 loadavg[elem++] = load_info.five_minute;
754 if (nelem > 2)
755 loadavg[elem++] = load_info.fifteen_minute;
756# endif /* DGUX */
757
758# if !defined (LDAV_DONE) && defined (apollo)
759# define LDAV_DONE
760/* Apollo code from lisch@mentorg.com (Ray Lischner).
761
762 This system call is not documented. The load average is obtained as
763 three long integers, for the load average over the past minute,
764 five minutes, and fifteen minutes. Each value is a scaled integer,
765 with 16 bits of integer part and 16 bits of fraction part.
766
767 I'm not sure which operating system first supported this system call,
768 but I know that SR10.2 supports it. */
769
770 extern void proc1_$get_loadav ();
771 unsigned long load_ave[3];
772
773 proc1_$get_loadav (load_ave);
774
775 if (nelem > 0)
776 loadavg[elem++] = load_ave[0] / 65536.0;
777 if (nelem > 1)
778 loadavg[elem++] = load_ave[1] / 65536.0;
779 if (nelem > 2)
780 loadavg[elem++] = load_ave[2] / 65536.0;
781# endif /* apollo */
782
783# if !defined (LDAV_DONE) && defined (OSF_MIPS)
784# define LDAV_DONE
785
786 struct tbl_loadavg load_ave;
787 table (TBL_LOADAVG, 0, &load_ave, 1, sizeof (load_ave));
788 loadavg[elem++]
789 = (load_ave.tl_lscale == 0
790 ? load_ave.tl_avenrun.d[0]
791 : (load_ave.tl_avenrun.l[0] / (double) load_ave.tl_lscale));
792# endif /* OSF_MIPS */
793
794# if !defined (LDAV_DONE) && (defined (__MSDOS__) || defined (WINDOWS32))
795# define LDAV_DONE
796
797 /* A faithful emulation is going to have to be saved for a rainy day. */
798 for ( ; elem < nelem; elem++)
799 {
800 loadavg[elem] = 0.0;
801 }
802# endif /* __MSDOS__ || WINDOWS32 */
803
804# if !defined (LDAV_DONE) && defined (OSF_ALPHA)
805# define LDAV_DONE
806
807 struct tbl_loadavg load_ave;
808 table (TBL_LOADAVG, 0, &load_ave, 1, sizeof (load_ave));
809 for (elem = 0; elem < nelem; elem++)
810 loadavg[elem]
811 = (load_ave.tl_lscale == 0
812 ? load_ave.tl_avenrun.d[elem]
813 : (load_ave.tl_avenrun.l[elem] / (double) load_ave.tl_lscale));
814# endif /* OSF_ALPHA */
815
816# if !defined (LDAV_DONE) && defined (VMS)
817 /* VMS specific code -- read from the Load Ave driver. */
818
819 LOAD_AVE_TYPE load_ave[3];
820 static bool getloadavg_initialized;
821# ifdef eunice
822 struct
823 {
824 int dsc$w_length;
825 char *dsc$a_pointer;
826 } descriptor;
827# endif
828
829 /* Ensure that there is a channel open to the load ave device. */
830 if (!getloadavg_initialized)
831 {
832 /* Attempt to open the channel. */
833# ifdef eunice
834 descriptor.dsc$w_length = 18;
835 descriptor.dsc$a_pointer = "$$VMS_LOAD_AVERAGE";
836# else
837 $DESCRIPTOR (descriptor, "LAV0:");
838# endif
839 if (sys$assign (&descriptor, &channel, 0, 0) & 1)
840 getloadavg_initialized = true;
841 }
842
843 /* Read the load average vector. */
844 if (getloadavg_initialized
845 && !(sys$qiow (0, channel, IO$_READVBLK, 0, 0, 0,
846 load_ave, 12, 0, 0, 0, 0) & 1))
847 {
848 sys$dassgn (channel);
849 getloadavg_initialized = false;
850 }
851
852 if (!getloadavg_initialized)
853 return -1;
854# endif /* VMS */
855
856# if !defined (LDAV_DONE) && defined (LOAD_AVE_TYPE) && !defined (VMS)
857
858 /* UNIX-specific code -- read the average from /dev/kmem. */
859
860# define LDAV_PRIVILEGED /* This code requires special installation. */
861
862 LOAD_AVE_TYPE load_ave[3];
863
864 /* Get the address of LDAV_SYMBOL. */
865 if (offset == 0)
866 {
867# ifndef sgi
868# ifndef NLIST_STRUCT
869 strcpy (nl[0].n_name, LDAV_SYMBOL);
870 strcpy (nl[1].n_name, "");
871# else /* NLIST_STRUCT */
872# ifdef HAVE_STRUCT_NLIST_N_UN_N_NAME
873 nl[0].n_un.n_name = LDAV_SYMBOL;
874 nl[1].n_un.n_name = 0;
875# else /* not HAVE_STRUCT_NLIST_N_UN_N_NAME */
876 nl[0].n_name = LDAV_SYMBOL;
877 nl[1].n_name = 0;
878# endif /* not HAVE_STRUCT_NLIST_N_UN_N_NAME */
879# endif /* NLIST_STRUCT */
880
881# ifndef SUNOS_5
882 if (
883# if !(defined (_AIX) && !defined (ps2))
884 nlist (KERNEL_FILE, nl)
885# else /* _AIX */
886 knlist (nl, 1, sizeof (nl[0]))
887# endif
888 >= 0)
889 /* Omit "&& nl[0].n_type != 0 " -- it breaks on Sun386i. */
890 {
891# ifdef FIXUP_KERNEL_SYMBOL_ADDR
892 FIXUP_KERNEL_SYMBOL_ADDR (nl);
893# endif
894 offset = nl[0].n_value;
895 }
896# endif /* !SUNOS_5 */
897# else /* sgi */
898 int ldav_off;
899
900 ldav_off = sysmp (MP_KERNADDR, MPKA_AVENRUN);
901 if (ldav_off != -1)
902 offset = (long int) ldav_off & 0x7fffffff;
903# endif /* sgi */
904 }
905
906 /* Make sure we have /dev/kmem open. */
907 if (!getloadavg_initialized)
908 {
909# ifndef SUNOS_5
910 channel = open ("/dev/kmem", O_RDONLY);
911 if (channel >= 0)
912 {
913 /* Set the channel to close on exec, so it does not
914 litter any child's descriptor table. */
915 set_cloexec_flag (channel, true);
916 getloadavg_initialized = true;
917 }
918# else /* SUNOS_5 */
919 /* We pass 0 for the kernel, corefile, and swapfile names
920 to use the currently running kernel. */
921 kd = kvm_open (0, 0, 0, O_RDONLY, 0);
922 if (kd != 0)
923 {
924 /* nlist the currently running kernel. */
925 kvm_nlist (kd, nl);
926 offset = nl[0].n_value;
927 getloadavg_initialized = true;
928 }
929# endif /* SUNOS_5 */
930 }
931
932 /* If we can, get the load average values. */
933 if (offset && getloadavg_initialized)
934 {
935 /* Try to read the load. */
936# ifndef SUNOS_5
937 if (lseek (channel, offset, 0) == -1L
938 || read (channel, (char *) load_ave, sizeof (load_ave))
939 != sizeof (load_ave))
940 {
941 close (channel);
942 getloadavg_initialized = false;
943 }
944# else /* SUNOS_5 */
945 if (kvm_read (kd, offset, (char *) load_ave, sizeof (load_ave))
946 != sizeof (load_ave))
947 {
948 kvm_close (kd);
949 getloadavg_initialized = false;
950 }
951# endif /* SUNOS_5 */
952 }
953
954 if (offset == 0 || !getloadavg_initialized)
955 return -1;
956# endif /* LOAD_AVE_TYPE and not VMS */
957
958# if !defined (LDAV_DONE) && defined (LOAD_AVE_TYPE) /* Including VMS. */
959 if (nelem > 0)
960 loadavg[elem++] = LDAV_CVT (load_ave[0]);
961 if (nelem > 1)
962 loadavg[elem++] = LDAV_CVT (load_ave[1]);
963 if (nelem > 2)
964 loadavg[elem++] = LDAV_CVT (load_ave[2]);
965
966# define LDAV_DONE
967# endif /* !LDAV_DONE && LOAD_AVE_TYPE */
968
969# if !defined LDAV_DONE
970 /* Set errno to zero to indicate that there was no particular error;
971 this function just can't work at all on this system. */
972 errno = 0;
973 elem = -1;
974# endif
975 return elem;
976}
977
978#endif /* ! HAVE_GETLOADAVG */
979
980#ifdef TEST
981int
982main (int argc, char **argv)
983{
984 int naptime = 0;
985
986 if (argc > 1)
987 naptime = atoi (argv[1]);
988
989 while (1)
990 {
991 double avg[3];
992 int loads;
993
994 errno = 0; /* Don't be misled if it doesn't set errno. */
995 loads = getloadavg (avg, 3);
996 if (loads == -1)
997 {
998 perror ("Error getting load average");
999 return EXIT_FAILURE;
1000 }
1001 if (loads > 0)
1002 printf ("1-minute: %f ", avg[0]);
1003 if (loads > 1)
1004 printf ("5-minute: %f ", avg[1]);
1005 if (loads > 2)
1006 printf ("15-minute: %f ", avg[2]);
1007 if (loads > 0)
1008 putchar ('\n');
1009
1010 if (naptime == 0)
1011 break;
1012 sleep (naptime);
1013 }
1014
1015 return EXIT_SUCCESS;
1016}
1017#endif /* TEST */
diff --git a/lib/getopt.c b/lib/getopt.c
deleted file mode 100644
index bcb81c83..00000000
--- a/lib/getopt.c
+++ /dev/null
@@ -1,1241 +0,0 @@
1/* Getopt for GNU.
2 NOTE: getopt is now part of the C library, so if you don't know what
3 "Keep this file name-space clean" means, talk to drepper@gnu.org
4 before changing it!
5 Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004
6 Free Software Foundation, Inc.
7 This file is part of the GNU C Library.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License along
20 with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
24 Ditto for AIX 3.2 and <stdlib.h>. */
25#ifndef _NO_PROTO
26# define _NO_PROTO
27#endif
28
29#ifdef HAVE_CONFIG_H
30# include <config.h>
31#endif
32
33#include <stdio.h>
34
35/* This needs to come after some library #include
36 to get __GNU_LIBRARY__ defined. */
37#ifdef __GNU_LIBRARY__
38/* Don't include stdlib.h for non-GNU C libraries because some of them
39 contain conflicting prototypes for getopt. */
40# include <stdlib.h>
41# include <unistd.h>
42#endif /* GNU C library. */
43
44#include <string.h>
45
46#ifdef VMS
47# include <unixlib.h>
48#endif
49
50#ifdef _LIBC
51# include <libintl.h>
52#else
53# include "gettext.h"
54# define _(msgid) gettext (msgid)
55#endif
56
57#if defined _LIBC && defined USE_IN_LIBIO
58# include <wchar.h>
59#endif
60
61#ifndef attribute_hidden
62# define attribute_hidden
63#endif
64
65/* Unlike standard Unix `getopt', functions like `getopt_long'
66 let the user intersperse the options with the other arguments.
67
68 As `getopt_long' works, it permutes the elements of ARGV so that,
69 when it is done, all the options precede everything else. Thus
70 all application programs are extended to handle flexible argument order.
71
72 Using `getopt' or setting the environment variable POSIXLY_CORRECT
73 disables permutation.
74 Then the application's behavior is completely standard.
75
76 GNU application programs can use a third alternative mode in which
77 they can distinguish the relative order of options and other arguments. */
78
79#include "getopt.h"
80#include "getopt_int.h"
81
82/* For communication from `getopt' to the caller.
83 When `getopt' finds an option that takes an argument,
84 the argument value is returned here.
85 Also, when `ordering' is RETURN_IN_ORDER,
86 each non-option ARGV-element is returned here. */
87
88char *optarg;
89
90/* Index in ARGV of the next element to be scanned.
91 This is used for communication to and from the caller
92 and for communication between successive calls to `getopt'.
93
94 On entry to `getopt', zero means this is the first call; initialize.
95
96 When `getopt' returns -1, this is the index of the first of the
97 non-option elements that the caller should itself scan.
98
99 Otherwise, `optind' communicates from one call to the next
100 how much of ARGV has been scanned so far. */
101
102/* 1003.2 says this must be 1 before any call. */
103int optind = 1;
104
105/* Callers store zero here to inhibit the error message
106 for unrecognized options. */
107
108int opterr = 1;
109
110/* Set to an option character which was unrecognized.
111 This must be initialized on some systems to avoid linking in the
112 system's own getopt implementation. */
113
114int optopt = '?';
115
116/* Keep a global copy of all internal members of getopt_data. */
117
118static struct _getopt_data getopt_data;
119
120
121#ifndef __GNU_LIBRARY__
122
123/* Avoid depending on library functions or files
124 whose names are inconsistent. */
125
126#ifndef getenv
127extern char *getenv ();
128#endif
129
130#endif /* not __GNU_LIBRARY__ */
131
132#ifdef _LIBC
133/* Stored original parameters.
134 XXX This is no good solution. We should rather copy the args so
135 that we can compare them later. But we must not use malloc(3). */
136extern int __libc_argc;
137extern char **__libc_argv;
138
139/* Bash 2.0 gives us an environment variable containing flags
140 indicating ARGV elements that should not be considered arguments. */
141
142# ifdef USE_NONOPTION_FLAGS
143/* Defined in getopt_init.c */
144extern char *__getopt_nonoption_flags;
145# endif
146
147# ifdef USE_NONOPTION_FLAGS
148# define SWAP_FLAGS(ch1, ch2) \
149 if (d->__nonoption_flags_len > 0) \
150 { \
151 char __tmp = __getopt_nonoption_flags[ch1]; \
152 __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
153 __getopt_nonoption_flags[ch2] = __tmp; \
154 }
155# else
156# define SWAP_FLAGS(ch1, ch2)
157# endif
158#else /* !_LIBC */
159# define SWAP_FLAGS(ch1, ch2)
160#endif /* _LIBC */
161
162/* Exchange two adjacent subsequences of ARGV.
163 One subsequence is elements [first_nonopt,last_nonopt)
164 which contains all the non-options that have been skipped so far.
165 The other is elements [last_nonopt,optind), which contains all
166 the options processed since those non-options were skipped.
167
168 `first_nonopt' and `last_nonopt' are relocated so that they describe
169 the new indices of the non-options in ARGV after they are moved. */
170
171static void
172exchange (char **argv, struct _getopt_data *d)
173{
174 int bottom = d->__first_nonopt;
175 int middle = d->__last_nonopt;
176 int top = d->optind;
177 char *tem;
178
179 /* Exchange the shorter segment with the far end of the longer segment.
180 That puts the shorter segment into the right place.
181 It leaves the longer segment in the right place overall,
182 but it consists of two parts that need to be swapped next. */
183
184#if defined _LIBC && defined USE_NONOPTION_FLAGS
185 /* First make sure the handling of the `__getopt_nonoption_flags'
186 string can work normally. Our top argument must be in the range
187 of the string. */
188 if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
189 {
190 /* We must extend the array. The user plays games with us and
191 presents new arguments. */
192 char *new_str = malloc (top + 1);
193 if (new_str == NULL)
194 d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
195 else
196 {
197 memset (__mempcpy (new_str, __getopt_nonoption_flags,
198 d->__nonoption_flags_max_len),
199 '\0', top + 1 - d->__nonoption_flags_max_len);
200 d->__nonoption_flags_max_len = top + 1;
201 __getopt_nonoption_flags = new_str;
202 }
203 }
204#endif
205
206 while (top > middle && middle > bottom)
207 {
208 if (top - middle > middle - bottom)
209 {
210 /* Bottom segment is the short one. */
211 int len = middle - bottom;
212 register int i;
213
214 /* Swap it with the top part of the top segment. */
215 for (i = 0; i < len; i++)
216 {
217 tem = argv[bottom + i];
218 argv[bottom + i] = argv[top - (middle - bottom) + i];
219 argv[top - (middle - bottom) + i] = tem;
220 SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
221 }
222 /* Exclude the moved bottom segment from further swapping. */
223 top -= len;
224 }
225 else
226 {
227 /* Top segment is the short one. */
228 int len = top - middle;
229 register int i;
230
231 /* Swap it with the bottom part of the bottom segment. */
232 for (i = 0; i < len; i++)
233 {
234 tem = argv[bottom + i];
235 argv[bottom + i] = argv[middle + i];
236 argv[middle + i] = tem;
237 SWAP_FLAGS (bottom + i, middle + i);
238 }
239 /* Exclude the moved top segment from further swapping. */
240 bottom += len;
241 }
242 }
243
244 /* Update records for the slots the non-options now occupy. */
245
246 d->__first_nonopt += (d->optind - d->__last_nonopt);
247 d->__last_nonopt = d->optind;
248}
249
250/* Initialize the internal data when the first call is made. */
251
252static const char *
253_getopt_initialize (int argc, char **argv, const char *optstring,
254 int posixly_correct, struct _getopt_data *d)
255{
256 /* Start processing options with ARGV-element 1 (since ARGV-element 0
257 is the program name); the sequence of previously skipped
258 non-option ARGV-elements is empty. */
259
260 d->__first_nonopt = d->__last_nonopt = d->optind;
261
262 d->__nextchar = NULL;
263
264 d->__posixly_correct = posixly_correct || !!getenv ("POSIXLY_CORRECT");
265
266 /* Determine how to handle the ordering of options and nonoptions. */
267
268 if (optstring[0] == '-')
269 {
270 d->__ordering = RETURN_IN_ORDER;
271 ++optstring;
272 }
273 else if (optstring[0] == '+')
274 {
275 d->__ordering = REQUIRE_ORDER;
276 ++optstring;
277 }
278 else if (d->__posixly_correct)
279 d->__ordering = REQUIRE_ORDER;
280 else
281 d->__ordering = PERMUTE;
282
283#if defined _LIBC && defined USE_NONOPTION_FLAGS
284 if (!d->__posixly_correct
285 && argc == __libc_argc && argv == __libc_argv)
286 {
287 if (d->__nonoption_flags_max_len == 0)
288 {
289 if (__getopt_nonoption_flags == NULL
290 || __getopt_nonoption_flags[0] == '\0')
291 d->__nonoption_flags_max_len = -1;
292 else
293 {
294 const char *orig_str = __getopt_nonoption_flags;
295 int len = d->__nonoption_flags_max_len = strlen (orig_str);
296 if (d->__nonoption_flags_max_len < argc)
297 d->__nonoption_flags_max_len = argc;
298 __getopt_nonoption_flags =
299 (char *) malloc (d->__nonoption_flags_max_len);
300 if (__getopt_nonoption_flags == NULL)
301 d->__nonoption_flags_max_len = -1;
302 else
303 memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
304 '\0', d->__nonoption_flags_max_len - len);
305 }
306 }
307 d->__nonoption_flags_len = d->__nonoption_flags_max_len;
308 }
309 else
310 d->__nonoption_flags_len = 0;
311#endif
312
313 return optstring;
314}
315
316/* Scan elements of ARGV (whose length is ARGC) for option characters
317 given in OPTSTRING.
318
319 If an element of ARGV starts with '-', and is not exactly "-" or "--",
320 then it is an option element. The characters of this element
321 (aside from the initial '-') are option characters. If `getopt'
322 is called repeatedly, it returns successively each of the option characters
323 from each of the option elements.
324
325 If `getopt' finds another option character, it returns that character,
326 updating `optind' and `nextchar' so that the next call to `getopt' can
327 resume the scan with the following option character or ARGV-element.
328
329 If there are no more option characters, `getopt' returns -1.
330 Then `optind' is the index in ARGV of the first ARGV-element
331 that is not an option. (The ARGV-elements have been permuted
332 so that those that are not options now come last.)
333
334 OPTSTRING is a string containing the legitimate option characters.
335 If an option character is seen that is not listed in OPTSTRING,
336 return '?' after printing an error message. If you set `opterr' to
337 zero, the error message is suppressed but we still return '?'.
338
339 If a char in OPTSTRING is followed by a colon, that means it wants an arg,
340 so the following text in the same ARGV-element, or the text of the following
341 ARGV-element, is returned in `optarg'. Two colons mean an option that
342 wants an optional arg; if there is text in the current ARGV-element,
343 it is returned in `optarg', otherwise `optarg' is set to zero.
344
345 If OPTSTRING starts with `-' or `+', it requests different methods of
346 handling the non-option ARGV-elements.
347 See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
348
349 Long-named options begin with `--' instead of `-'.
350 Their names may be abbreviated as long as the abbreviation is unique
351 or is an exact match for some defined option. If they have an
352 argument, it follows the option name in the same ARGV-element, separated
353 from the option name by a `=', or else the in next ARGV-element.
354 When `getopt' finds a long-named option, it returns 0 if that option's
355 `flag' field is nonzero, the value of the option's `val' field
356 if the `flag' field is zero.
357
358 LONGOPTS is a vector of `struct option' terminated by an
359 element containing a name which is zero.
360
361 LONGIND returns the index in LONGOPT of the long-named option found.
362 It is only valid when a long-named option has been found by the most
363 recent call.
364
365 If LONG_ONLY is nonzero, '-' as well as '--' can introduce
366 long-named options.
367
368 If POSIXLY_CORRECT is nonzero, behave as if the POSIXLY_CORRECT
369 environment variable were set. */
370
371int
372_getopt_internal_r (int argc, char **argv, const char *optstring,
373 const struct option *longopts, int *longind,
374 int long_only, int posixly_correct, struct _getopt_data *d)
375{
376 int print_errors = d->opterr;
377 if (optstring[0] == ':')
378 print_errors = 0;
379
380 if (argc < 1)
381 return -1;
382
383 d->optarg = NULL;
384
385 if (d->optind == 0 || !d->__initialized)
386 {
387 if (d->optind == 0)
388 d->optind = 1; /* Don't scan ARGV[0], the program name. */
389 optstring = _getopt_initialize (argc, argv, optstring,
390 posixly_correct, d);
391 d->__initialized = 1;
392 }
393
394 /* Test whether ARGV[optind] points to a non-option argument.
395 Either it does not have option syntax, or there is an environment flag
396 from the shell indicating it is not an option. The later information
397 is only used when the used in the GNU libc. */
398#if defined _LIBC && defined USE_NONOPTION_FLAGS
399# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
400 || (d->optind < d->__nonoption_flags_len \
401 && __getopt_nonoption_flags[d->optind] == '1'))
402#else
403# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
404#endif
405
406 if (d->__nextchar == NULL || *d->__nextchar == '\0')
407 {
408 /* Advance to the next ARGV-element. */
409
410 /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
411 moved back by the user (who may also have changed the arguments). */
412 if (d->__last_nonopt > d->optind)
413 d->__last_nonopt = d->optind;
414 if (d->__first_nonopt > d->optind)
415 d->__first_nonopt = d->optind;
416
417 if (d->__ordering == PERMUTE)
418 {
419 /* If we have just processed some options following some non-options,
420 exchange them so that the options come first. */
421
422 if (d->__first_nonopt != d->__last_nonopt
423 && d->__last_nonopt != d->optind)
424 exchange ((char **) argv, d);
425 else if (d->__last_nonopt != d->optind)
426 d->__first_nonopt = d->optind;
427
428 /* Skip any additional non-options
429 and extend the range of non-options previously skipped. */
430
431 while (d->optind < argc && NONOPTION_P)
432 d->optind++;
433 d->__last_nonopt = d->optind;
434 }
435
436 /* The special ARGV-element `--' means premature end of options.
437 Skip it like a null option,
438 then exchange with previous non-options as if it were an option,
439 then skip everything else like a non-option. */
440
441 if (d->optind != argc && !strcmp (argv[d->optind], "--"))
442 {
443 d->optind++;
444
445 if (d->__first_nonopt != d->__last_nonopt
446 && d->__last_nonopt != d->optind)
447 exchange ((char **) argv, d);
448 else if (d->__first_nonopt == d->__last_nonopt)
449 d->__first_nonopt = d->optind;
450 d->__last_nonopt = argc;
451
452 d->optind = argc;
453 }
454
455 /* If we have done all the ARGV-elements, stop the scan
456 and back over any non-options that we skipped and permuted. */
457
458 if (d->optind == argc)
459 {
460 /* Set the next-arg-index to point at the non-options
461 that we previously skipped, so the caller will digest them. */
462 if (d->__first_nonopt != d->__last_nonopt)
463 d->optind = d->__first_nonopt;
464 return -1;
465 }
466
467 /* If we have come to a non-option and did not permute it,
468 either stop the scan or describe it to the caller and pass it by. */
469
470 if (NONOPTION_P)
471 {
472 if (d->__ordering == REQUIRE_ORDER)
473 return -1;
474 d->optarg = argv[d->optind++];
475 return 1;
476 }
477
478 /* We have found another option-ARGV-element.
479 Skip the initial punctuation. */
480
481 d->__nextchar = (argv[d->optind] + 1
482 + (longopts != NULL && argv[d->optind][1] == '-'));
483 }
484
485 /* Decode the current option-ARGV-element. */
486
487 /* Check whether the ARGV-element is a long option.
488
489 If long_only and the ARGV-element has the form "-f", where f is
490 a valid short option, don't consider it an abbreviated form of
491 a long option that starts with f. Otherwise there would be no
492 way to give the -f short option.
493
494 On the other hand, if there's a long option "fubar" and
495 the ARGV-element is "-fu", do consider that an abbreviation of
496 the long option, just like "--fu", and not "-f" with arg "u".
497
498 This distinction seems to be the most useful approach. */
499
500 if (longopts != NULL
501 && (argv[d->optind][1] == '-'
502 || (long_only && (argv[d->optind][2]
503 || !strchr (optstring, argv[d->optind][1])))))
504 {
505 char *nameend;
506 const struct option *p;
507 const struct option *pfound = NULL;
508 int exact = 0;
509 int ambig = 0;
510 int indfound = -1;
511 int option_index;
512
513 for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
514 /* Do nothing. */ ;
515
516 /* Test all long options for either exact match
517 or abbreviated matches. */
518 for (p = longopts, option_index = 0; p->name; p++, option_index++)
519 if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
520 {
521 if ((unsigned int) (nameend - d->__nextchar)
522 == (unsigned int) strlen (p->name))
523 {
524 /* Exact match found. */
525 pfound = p;
526 indfound = option_index;
527 exact = 1;
528 break;
529 }
530 else if (pfound == NULL)
531 {
532 /* First nonexact match found. */
533 pfound = p;
534 indfound = option_index;
535 }
536 else if (long_only
537 || pfound->has_arg != p->has_arg
538 || pfound->flag != p->flag
539 || pfound->val != p->val)
540 /* Second or later nonexact match found. */
541 ambig = 1;
542 }
543
544 if (ambig && !exact)
545 {
546 if (print_errors)
547 {
548#if defined _LIBC && defined USE_IN_LIBIO
549 char *buf;
550
551 if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
552 argv[0], argv[d->optind]) >= 0)
553 {
554 _IO_flockfile (stderr);
555
556 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
557 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
558
559 if (_IO_fwide (stderr, 0) > 0)
560 __fwprintf (stderr, L"%s", buf);
561 else
562 fputs (buf, stderr);
563
564 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
565 _IO_funlockfile (stderr);
566
567 free (buf);
568 }
569#else
570 fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
571 argv[0], argv[d->optind]);
572#endif
573 }
574 d->__nextchar += strlen (d->__nextchar);
575 d->optind++;
576 d->optopt = 0;
577 return '?';
578 }
579
580 if (pfound != NULL)
581 {
582 option_index = indfound;
583 d->optind++;
584 if (*nameend)
585 {
586 /* Don't test has_arg with >, because some C compilers don't
587 allow it to be used on enums. */
588 if (pfound->has_arg)
589 d->optarg = nameend + 1;
590 else
591 {
592 if (print_errors)
593 {
594#if defined _LIBC && defined USE_IN_LIBIO
595 char *buf;
596 int n;
597#endif
598
599 if (argv[d->optind - 1][1] == '-')
600 {
601 /* --option */
602#if defined _LIBC && defined USE_IN_LIBIO
603 n = __asprintf (&buf, _("\
604%s: option `--%s' doesn't allow an argument\n"),
605 argv[0], pfound->name);
606#else
607 fprintf (stderr, _("\
608%s: option `--%s' doesn't allow an argument\n"),
609 argv[0], pfound->name);
610#endif
611 }
612 else
613 {
614 /* +option or -option */
615#if defined _LIBC && defined USE_IN_LIBIO
616 n = __asprintf (&buf, _("\
617%s: option `%c%s' doesn't allow an argument\n"),
618 argv[0], argv[d->optind - 1][0],
619 pfound->name);
620#else
621 fprintf (stderr, _("\
622%s: option `%c%s' doesn't allow an argument\n"),
623 argv[0], argv[d->optind - 1][0],
624 pfound->name);
625#endif
626 }
627
628#if defined _LIBC && defined USE_IN_LIBIO
629 if (n >= 0)
630 {
631 _IO_flockfile (stderr);
632
633 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
634 ((_IO_FILE *) stderr)->_flags2
635 |= _IO_FLAGS2_NOTCANCEL;
636
637 if (_IO_fwide (stderr, 0) > 0)
638 __fwprintf (stderr, L"%s", buf);
639 else
640 fputs (buf, stderr);
641
642 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
643 _IO_funlockfile (stderr);
644
645 free (buf);
646 }
647#endif
648 }
649
650 d->__nextchar += strlen (d->__nextchar);
651
652 d->optopt = pfound->val;
653 return '?';
654 }
655 }
656 else if (pfound->has_arg == 1)
657 {
658 if (d->optind < argc)
659 d->optarg = argv[d->optind++];
660 else
661 {
662 if (print_errors)
663 {
664#if defined _LIBC && defined USE_IN_LIBIO
665 char *buf;
666
667 if (__asprintf (&buf, _("\
668%s: option `%s' requires an argument\n"),
669 argv[0], argv[d->optind - 1]) >= 0)
670 {
671 _IO_flockfile (stderr);
672
673 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
674 ((_IO_FILE *) stderr)->_flags2
675 |= _IO_FLAGS2_NOTCANCEL;
676
677 if (_IO_fwide (stderr, 0) > 0)
678 __fwprintf (stderr, L"%s", buf);
679 else
680 fputs (buf, stderr);
681
682 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
683 _IO_funlockfile (stderr);
684
685 free (buf);
686 }
687#else
688 fprintf (stderr,
689 _("%s: option `%s' requires an argument\n"),
690 argv[0], argv[d->optind - 1]);
691#endif
692 }
693 d->__nextchar += strlen (d->__nextchar);
694 d->optopt = pfound->val;
695 return optstring[0] == ':' ? ':' : '?';
696 }
697 }
698 d->__nextchar += strlen (d->__nextchar);
699 if (longind != NULL)
700 *longind = option_index;
701 if (pfound->flag)
702 {
703 *(pfound->flag) = pfound->val;
704 return 0;
705 }
706 return pfound->val;
707 }
708
709 /* Can't find it as a long option. If this is not getopt_long_only,
710 or the option starts with '--' or is not a valid short
711 option, then it's an error.
712 Otherwise interpret it as a short option. */
713 if (!long_only || argv[d->optind][1] == '-'
714 || strchr (optstring, *d->__nextchar) == NULL)
715 {
716 if (print_errors)
717 {
718#if defined _LIBC && defined USE_IN_LIBIO
719 char *buf;
720 int n;
721#endif
722
723 if (argv[d->optind][1] == '-')
724 {
725 /* --option */
726#if defined _LIBC && defined USE_IN_LIBIO
727 n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
728 argv[0], d->__nextchar);
729#else
730 fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
731 argv[0], d->__nextchar);
732#endif
733 }
734 else
735 {
736 /* +option or -option */
737#if defined _LIBC && defined USE_IN_LIBIO
738 n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
739 argv[0], argv[d->optind][0], d->__nextchar);
740#else
741 fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
742 argv[0], argv[d->optind][0], d->__nextchar);
743#endif
744 }
745
746#if defined _LIBC && defined USE_IN_LIBIO
747 if (n >= 0)
748 {
749 _IO_flockfile (stderr);
750
751 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
752 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
753
754 if (_IO_fwide (stderr, 0) > 0)
755 __fwprintf (stderr, L"%s", buf);
756 else
757 fputs (buf, stderr);
758
759 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
760 _IO_funlockfile (stderr);
761
762 free (buf);
763 }
764#endif
765 }
766 d->__nextchar = (char *) "";
767 d->optind++;
768 d->optopt = 0;
769 return '?';
770 }
771 }
772
773 /* Look at and handle the next short option-character. */
774
775 {
776 char c = *d->__nextchar++;
777 char *temp = strchr (optstring, c);
778
779 /* Increment `optind' when we start to process its last character. */
780 if (*d->__nextchar == '\0')
781 ++d->optind;
782
783 if (temp == NULL || c == ':')
784 {
785 if (print_errors)
786 {
787#if defined _LIBC && defined USE_IN_LIBIO
788 char *buf;
789 int n;
790#endif
791
792 if (d->__posixly_correct)
793 {
794 /* 1003.2 specifies the format of this message. */
795#if defined _LIBC && defined USE_IN_LIBIO
796 n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
797 argv[0], c);
798#else
799 fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
800#endif
801 }
802 else
803 {
804#if defined _LIBC && defined USE_IN_LIBIO
805 n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
806 argv[0], c);
807#else
808 fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
809#endif
810 }
811
812#if defined _LIBC && defined USE_IN_LIBIO
813 if (n >= 0)
814 {
815 _IO_flockfile (stderr);
816
817 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
818 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
819
820 if (_IO_fwide (stderr, 0) > 0)
821 __fwprintf (stderr, L"%s", buf);
822 else
823 fputs (buf, stderr);
824
825 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
826 _IO_funlockfile (stderr);
827
828 free (buf);
829 }
830#endif
831 }
832 d->optopt = c;
833 return '?';
834 }
835 /* Convenience. Treat POSIX -W foo same as long option --foo */
836 if (temp[0] == 'W' && temp[1] == ';')
837 {
838 char *nameend;
839 const struct option *p;
840 const struct option *pfound = NULL;
841 int exact = 0;
842 int ambig = 0;
843 int indfound = 0;
844 int option_index;
845
846 /* This is an option that requires an argument. */
847 if (*d->__nextchar != '\0')
848 {
849 d->optarg = d->__nextchar;
850 /* If we end this ARGV-element by taking the rest as an arg,
851 we must advance to the next element now. */
852 d->optind++;
853 }
854 else if (d->optind == argc)
855 {
856 if (print_errors)
857 {
858 /* 1003.2 specifies the format of this message. */
859#if defined _LIBC && defined USE_IN_LIBIO
860 char *buf;
861
862 if (__asprintf (&buf,
863 _("%s: option requires an argument -- %c\n"),
864 argv[0], c) >= 0)
865 {
866 _IO_flockfile (stderr);
867
868 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
869 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
870
871 if (_IO_fwide (stderr, 0) > 0)
872 __fwprintf (stderr, L"%s", buf);
873 else
874 fputs (buf, stderr);
875
876 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
877 _IO_funlockfile (stderr);
878
879 free (buf);
880 }
881#else
882 fprintf (stderr, _("%s: option requires an argument -- %c\n"),
883 argv[0], c);
884#endif
885 }
886 d->optopt = c;
887 if (optstring[0] == ':')
888 c = ':';
889 else
890 c = '?';
891 return c;
892 }
893 else
894 /* We already incremented `d->optind' once;
895 increment it again when taking next ARGV-elt as argument. */
896 d->optarg = argv[d->optind++];
897
898 /* optarg is now the argument, see if it's in the
899 table of longopts. */
900
901 for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
902 nameend++)
903 /* Do nothing. */ ;
904
905 /* Test all long options for either exact match
906 or abbreviated matches. */
907 for (p = longopts, option_index = 0; p->name; p++, option_index++)
908 if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
909 {
910 if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
911 {
912 /* Exact match found. */
913 pfound = p;
914 indfound = option_index;
915 exact = 1;
916 break;
917 }
918 else if (pfound == NULL)
919 {
920 /* First nonexact match found. */
921 pfound = p;
922 indfound = option_index;
923 }
924 else
925 /* Second or later nonexact match found. */
926 ambig = 1;
927 }
928 if (ambig && !exact)
929 {
930 if (print_errors)
931 {
932#if defined _LIBC && defined USE_IN_LIBIO
933 char *buf;
934
935 if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
936 argv[0], argv[d->optind]) >= 0)
937 {
938 _IO_flockfile (stderr);
939
940 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
941 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
942
943 if (_IO_fwide (stderr, 0) > 0)
944 __fwprintf (stderr, L"%s", buf);
945 else
946 fputs (buf, stderr);
947
948 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
949 _IO_funlockfile (stderr);
950
951 free (buf);
952 }
953#else
954 fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
955 argv[0], argv[d->optind]);
956#endif
957 }
958 d->__nextchar += strlen (d->__nextchar);
959 d->optind++;
960 return '?';
961 }
962 if (pfound != NULL)
963 {
964 option_index = indfound;
965 if (*nameend)
966 {
967 /* Don't test has_arg with >, because some C compilers don't
968 allow it to be used on enums. */
969 if (pfound->has_arg)
970 d->optarg = nameend + 1;
971 else
972 {
973 if (print_errors)
974 {
975#if defined _LIBC && defined USE_IN_LIBIO
976 char *buf;
977
978 if (__asprintf (&buf, _("\
979%s: option `-W %s' doesn't allow an argument\n"),
980 argv[0], pfound->name) >= 0)
981 {
982 _IO_flockfile (stderr);
983
984 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
985 ((_IO_FILE *) stderr)->_flags2
986 |= _IO_FLAGS2_NOTCANCEL;
987
988 if (_IO_fwide (stderr, 0) > 0)
989 __fwprintf (stderr, L"%s", buf);
990 else
991 fputs (buf, stderr);
992
993 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
994 _IO_funlockfile (stderr);
995
996 free (buf);
997 }
998#else
999 fprintf (stderr, _("\
1000%s: option `-W %s' doesn't allow an argument\n"),
1001 argv[0], pfound->name);
1002#endif
1003 }
1004
1005 d->__nextchar += strlen (d->__nextchar);
1006 return '?';
1007 }
1008 }
1009 else if (pfound->has_arg == 1)
1010 {
1011 if (d->optind < argc)
1012 d->optarg = argv[d->optind++];
1013 else
1014 {
1015 if (print_errors)
1016 {
1017#if defined _LIBC && defined USE_IN_LIBIO
1018 char *buf;
1019
1020 if (__asprintf (&buf, _("\
1021%s: option `%s' requires an argument\n"),
1022 argv[0], argv[d->optind - 1]) >= 0)
1023 {
1024 _IO_flockfile (stderr);
1025
1026 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
1027 ((_IO_FILE *) stderr)->_flags2
1028 |= _IO_FLAGS2_NOTCANCEL;
1029
1030 if (_IO_fwide (stderr, 0) > 0)
1031 __fwprintf (stderr, L"%s", buf);
1032 else
1033 fputs (buf, stderr);
1034
1035 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
1036 _IO_funlockfile (stderr);
1037
1038 free (buf);
1039 }
1040#else
1041 fprintf (stderr,
1042 _("%s: option `%s' requires an argument\n"),
1043 argv[0], argv[d->optind - 1]);
1044#endif
1045 }
1046 d->__nextchar += strlen (d->__nextchar);
1047 return optstring[0] == ':' ? ':' : '?';
1048 }
1049 }
1050 d->__nextchar += strlen (d->__nextchar);
1051 if (longind != NULL)
1052 *longind = option_index;
1053 if (pfound->flag)
1054 {
1055 *(pfound->flag) = pfound->val;
1056 return 0;
1057 }
1058 return pfound->val;
1059 }
1060 d->__nextchar = NULL;
1061 return 'W'; /* Let the application handle it. */
1062 }
1063 if (temp[1] == ':')
1064 {
1065 if (temp[2] == ':')
1066 {
1067 /* This is an option that accepts an argument optionally. */
1068 if (*d->__nextchar != '\0')
1069 {
1070 d->optarg = d->__nextchar;
1071 d->optind++;
1072 }
1073 else
1074 d->optarg = NULL;
1075 d->__nextchar = NULL;
1076 }
1077 else
1078 {
1079 /* This is an option that requires an argument. */
1080 if (*d->__nextchar != '\0')
1081 {
1082 d->optarg = d->__nextchar;
1083 /* If we end this ARGV-element by taking the rest as an arg,
1084 we must advance to the next element now. */
1085 d->optind++;
1086 }
1087 else if (d->optind == argc)
1088 {
1089 if (print_errors)
1090 {
1091 /* 1003.2 specifies the format of this message. */
1092#if defined _LIBC && defined USE_IN_LIBIO
1093 char *buf;
1094
1095 if (__asprintf (&buf, _("\
1096%s: option requires an argument -- %c\n"),
1097 argv[0], c) >= 0)
1098 {
1099 _IO_flockfile (stderr);
1100
1101 int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
1102 ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
1103
1104 if (_IO_fwide (stderr, 0) > 0)
1105 __fwprintf (stderr, L"%s", buf);
1106 else
1107 fputs (buf, stderr);
1108
1109 ((_IO_FILE *) stderr)->_flags2 = old_flags2;
1110 _IO_funlockfile (stderr);
1111
1112 free (buf);
1113 }
1114#else
1115 fprintf (stderr,
1116 _("%s: option requires an argument -- %c\n"),
1117 argv[0], c);
1118#endif
1119 }
1120 d->optopt = c;
1121 if (optstring[0] == ':')
1122 c = ':';
1123 else
1124 c = '?';
1125 }
1126 else
1127 /* We already incremented `optind' once;
1128 increment it again when taking next ARGV-elt as argument. */
1129 d->optarg = argv[d->optind++];
1130 d->__nextchar = NULL;
1131 }
1132 }
1133 return c;
1134 }
1135}
1136
1137int
1138_getopt_internal (int argc, char **argv, const char *optstring,
1139 const struct option *longopts, int *longind,
1140 int long_only, int posixly_correct)
1141{
1142 int result;
1143
1144 getopt_data.optind = optind;
1145 getopt_data.opterr = opterr;
1146
1147 result = _getopt_internal_r (argc, argv, optstring, longopts, longind,
1148 long_only, posixly_correct, &getopt_data);
1149
1150 optind = getopt_data.optind;
1151 optarg = getopt_data.optarg;
1152 optopt = getopt_data.optopt;
1153
1154 return result;
1155}
1156
1157/* glibc gets a LSB-compliant getopt.
1158 Standalone applications get a POSIX-compliant getopt. */
1159#if _LIBC
1160enum { POSIXLY_CORRECT = 0 };
1161#else
1162enum { POSIXLY_CORRECT = 1 };
1163#endif
1164
1165int
1166getopt (int argc, char *const *argv, const char *optstring)
1167{
1168 return _getopt_internal (argc, (char **) argv, optstring, NULL, NULL, 0,
1169 POSIXLY_CORRECT);
1170}
1171
1172
1173#ifdef TEST
1174
1175/* Compile with -DTEST to make an executable for use in testing
1176 the above definition of `getopt'. */
1177
1178int
1179main (int argc, char **argv)
1180{
1181 int c;
1182 int digit_optind = 0;
1183
1184 while (1)
1185 {
1186 int this_option_optind = optind ? optind : 1;
1187
1188 c = getopt (argc, argv, "abc:d:0123456789");
1189 if (c == -1)
1190 break;
1191
1192 switch (c)
1193 {
1194 case '0':
1195 case '1':
1196 case '2':
1197 case '3':
1198 case '4':
1199 case '5':
1200 case '6':
1201 case '7':
1202 case '8':
1203 case '9':
1204 if (digit_optind != 0 && digit_optind != this_option_optind)
1205 printf ("digits occur in two different argv-elements.\n");
1206 digit_optind = this_option_optind;
1207 printf ("option %c\n", c);
1208 break;
1209
1210 case 'a':
1211 printf ("option a\n");
1212 break;
1213
1214 case 'b':
1215 printf ("option b\n");
1216 break;
1217
1218 case 'c':
1219 printf ("option c with value `%s'\n", optarg);
1220 break;
1221
1222 case '?':
1223 break;
1224
1225 default:
1226 printf ("?? getopt returned character code 0%o ??\n", c);
1227 }
1228 }
1229
1230 if (optind < argc)
1231 {
1232 printf ("non-option ARGV-elements: ");
1233 while (optind < argc)
1234 printf ("%s ", argv[optind++]);
1235 printf ("\n");
1236 }
1237
1238 exit (0);
1239}
1240
1241#endif /* TEST */
diff --git a/lib/getopt1.c b/lib/getopt1.c
deleted file mode 100644
index 25d79265..00000000
--- a/lib/getopt1.c
+++ /dev/null
@@ -1,174 +0,0 @@
1/* getopt_long and getopt_long_only entry points for GNU getopt.
2 Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#ifdef _LIBC
25# include <getopt.h>
26#else
27# include "getopt.h"
28#endif
29#include "getopt_int.h"
30
31#include <stdio.h>
32
33/* This needs to come after some library #include
34 to get __GNU_LIBRARY__ defined. */
35#ifdef __GNU_LIBRARY__
36#include <stdlib.h>
37#endif
38
39#ifndef NULL
40#define NULL 0
41#endif
42
43int
44getopt_long (int argc, char *__getopt_argv_const *argv, const char *options,
45 const struct option *long_options, int *opt_index)
46{
47 return _getopt_internal (argc, (char **) argv, options, long_options,
48 opt_index, 0, 0);
49}
50
51int
52_getopt_long_r (int argc, char **argv, const char *options,
53 const struct option *long_options, int *opt_index,
54 struct _getopt_data *d)
55{
56 return _getopt_internal_r (argc, argv, options, long_options, opt_index,
57 0, 0, d);
58}
59
60/* Like getopt_long, but '-' as well as '--' can indicate a long option.
61 If an option that starts with '-' (not '--') doesn't match a long option,
62 but does match a short option, it is parsed as a short option
63 instead. */
64
65int
66getopt_long_only (int argc, char *__getopt_argv_const *argv,
67 const char *options,
68 const struct option *long_options, int *opt_index)
69{
70 return _getopt_internal (argc, (char **) argv, options, long_options,
71 opt_index, 1, 0);
72}
73
74int
75_getopt_long_only_r (int argc, char **argv, const char *options,
76 const struct option *long_options, int *opt_index,
77 struct _getopt_data *d)
78{
79 return _getopt_internal_r (argc, argv, options, long_options, opt_index,
80 1, 0, d);
81}
82
83
84#ifdef TEST
85
86#include <stdio.h>
87
88int
89main (int argc, char **argv)
90{
91 int c;
92 int digit_optind = 0;
93
94 while (1)
95 {
96 int this_option_optind = optind ? optind : 1;
97 int option_index = 0;
98 static struct option long_options[] =
99 {
100 {"add", 1, 0, 0},
101 {"append", 0, 0, 0},
102 {"delete", 1, 0, 0},
103 {"verbose", 0, 0, 0},
104 {"create", 0, 0, 0},
105 {"file", 1, 0, 0},
106 {0, 0, 0, 0}
107 };
108
109 c = getopt_long (argc, argv, "abc:d:0123456789",
110 long_options, &option_index);
111 if (c == -1)
112 break;
113
114 switch (c)
115 {
116 case 0:
117 printf ("option %s", long_options[option_index].name);
118 if (optarg)
119 printf (" with arg %s", optarg);
120 printf ("\n");
121 break;
122
123 case '0':
124 case '1':
125 case '2':
126 case '3':
127 case '4':
128 case '5':
129 case '6':
130 case '7':
131 case '8':
132 case '9':
133 if (digit_optind != 0 && digit_optind != this_option_optind)
134 printf ("digits occur in two different argv-elements.\n");
135 digit_optind = this_option_optind;
136 printf ("option %c\n", c);
137 break;
138
139 case 'a':
140 printf ("option a\n");
141 break;
142
143 case 'b':
144 printf ("option b\n");
145 break;
146
147 case 'c':
148 printf ("option c with value `%s'\n", optarg);
149 break;
150
151 case 'd':
152 printf ("option d with value `%s'\n", optarg);
153 break;
154
155 case '?':
156 break;
157
158 default:
159 printf ("?? getopt returned character code 0%o ??\n", c);
160 }
161 }
162
163 if (optind < argc)
164 {
165 printf ("non-option ARGV-elements: ");
166 while (optind < argc)
167 printf ("%s ", argv[optind++]);
168 printf ("\n");
169 }
170
171 exit (0);
172}
173
174#endif /* TEST */
diff --git a/lib/getopt_.h b/lib/getopt_.h
deleted file mode 100644
index 3c406e53..00000000
--- a/lib/getopt_.h
+++ /dev/null
@@ -1,225 +0,0 @@
1/* Declarations for getopt.
2 Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2005
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifndef _GETOPT_H
21
22#ifndef __need_getopt
23# define _GETOPT_H 1
24#endif
25
26/* Standalone applications should #define __GETOPT_PREFIX to an
27 identifier that prefixes the external functions and variables
28 defined in this header. When this happens, include the
29 headers that might declare getopt so that they will not cause
30 confusion if included after this file. Then systematically rename
31 identifiers so that they do not collide with the system functions
32 and variables. Renaming avoids problems with some compilers and
33 linkers. */
34#if defined __GETOPT_PREFIX && !defined __need_getopt
35# include <stdlib.h>
36# include <stdio.h>
37# include <unistd.h>
38# undef __need_getopt
39# undef getopt
40# undef getopt_long
41# undef getopt_long_only
42# undef optarg
43# undef opterr
44# undef optind
45# undef optopt
46# define __GETOPT_CONCAT(x, y) x ## y
47# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y)
48# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y)
49# define getopt __GETOPT_ID (getopt)
50# define getopt_long __GETOPT_ID (getopt_long)
51# define getopt_long_only __GETOPT_ID (getopt_long_only)
52# define optarg __GETOPT_ID (optarg)
53# define opterr __GETOPT_ID (opterr)
54# define optind __GETOPT_ID (optind)
55# define optopt __GETOPT_ID (optopt)
56#endif
57
58/* Standalone applications get correct prototypes for getopt_long and
59 getopt_long_only; they declare "char **argv". libc uses prototypes
60 with "char *const *argv" that are incorrect because getopt_long and
61 getopt_long_only can permute argv; this is required for backward
62 compatibility (e.g., for LSB 2.0.1).
63
64 This used to be `#if defined __GETOPT_PREFIX && !defined __need_getopt',
65 but it caused redefinition warnings if both unistd.h and getopt.h were
66 included, since unistd.h includes getopt.h having previously defined
67 __need_getopt.
68
69 The only place where __getopt_argv_const is used is in definitions
70 of getopt_long and getopt_long_only below, but these are visible
71 only if __need_getopt is not defined, so it is quite safe to rewrite
72 the conditional as follows:
73*/
74#if !defined __need_getopt
75# if defined __GETOPT_PREFIX
76# define __getopt_argv_const /* empty */
77# else
78# define __getopt_argv_const const
79# endif
80#endif
81
82/* If __GNU_LIBRARY__ is not already defined, either we are being used
83 standalone, or this is the first header included in the source file.
84 If we are being used with glibc, we need to include <features.h>, but
85 that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
86 not defined, include <ctype.h>, which will pull in <features.h> for us
87 if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
88 doesn't flood the namespace with stuff the way some other headers do.) */
89#if !defined __GNU_LIBRARY__
90# include <ctype.h>
91#endif
92
93#ifndef __THROW
94# ifndef __GNUC_PREREQ
95# define __GNUC_PREREQ(maj, min) (0)
96# endif
97# if defined __cplusplus && __GNUC_PREREQ (2,8)
98# define __THROW throw ()
99# else
100# define __THROW
101# endif
102#endif
103
104#ifdef __cplusplus
105extern "C" {
106#endif
107
108/* For communication from `getopt' to the caller.
109 When `getopt' finds an option that takes an argument,
110 the argument value is returned here.
111 Also, when `ordering' is RETURN_IN_ORDER,
112 each non-option ARGV-element is returned here. */
113
114extern char *optarg;
115
116/* Index in ARGV of the next element to be scanned.
117 This is used for communication to and from the caller
118 and for communication between successive calls to `getopt'.
119
120 On entry to `getopt', zero means this is the first call; initialize.
121
122 When `getopt' returns -1, this is the index of the first of the
123 non-option elements that the caller should itself scan.
124
125 Otherwise, `optind' communicates from one call to the next
126 how much of ARGV has been scanned so far. */
127
128extern int optind;
129
130/* Callers store zero here to inhibit the error message `getopt' prints
131 for unrecognized options. */
132
133extern int opterr;
134
135/* Set to an option character which was unrecognized. */
136
137extern int optopt;
138
139#ifndef __need_getopt
140/* Describe the long-named options requested by the application.
141 The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
142 of `struct option' terminated by an element containing a name which is
143 zero.
144
145 The field `has_arg' is:
146 no_argument (or 0) if the option does not take an argument,
147 required_argument (or 1) if the option requires an argument,
148 optional_argument (or 2) if the option takes an optional argument.
149
150 If the field `flag' is not NULL, it points to a variable that is set
151 to the value given in the field `val' when the option is found, but
152 left unchanged if the option is not found.
153
154 To have a long-named option do something other than set an `int' to
155 a compiled-in constant, such as set a value from `optarg', set the
156 option's `flag' field to zero and its `val' field to a nonzero
157 value (the equivalent single-letter option character, if there is
158 one). For long options that have a zero `flag' field, `getopt'
159 returns the contents of the `val' field. */
160
161struct option
162{
163 const char *name;
164 /* has_arg can't be an enum because some compilers complain about
165 type mismatches in all the code that assumes it is an int. */
166 int has_arg;
167 int *flag;
168 int val;
169};
170
171/* Names for the values of the `has_arg' field of `struct option'. */
172
173# define no_argument 0
174# define required_argument 1
175# define optional_argument 2
176#endif /* need getopt */
177
178
179/* Get definitions and prototypes for functions to process the
180 arguments in ARGV (ARGC of them, minus the program name) for
181 options given in OPTS.
182
183 Return the option character from OPTS just read. Return -1 when
184 there are no more options. For unrecognized options, or options
185 missing arguments, `optopt' is set to the option letter, and '?' is
186 returned.
187
188 The OPTS string is a list of characters which are recognized option
189 letters, optionally followed by colons, specifying that that letter
190 takes an argument, to be placed in `optarg'.
191
192 If a letter in OPTS is followed by two colons, its argument is
193 optional. This behavior is specific to the GNU `getopt'.
194
195 The argument `--' causes premature termination of argument
196 scanning, explicitly telling `getopt' that there are no more
197 options.
198
199 If OPTS begins with `--', then non-option arguments are treated as
200 arguments to the option '\0'. This behavior is specific to the GNU
201 `getopt'. */
202
203extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
204 __THROW;
205
206#ifndef __need_getopt
207extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv,
208 const char *__shortopts,
209 const struct option *__longopts, int *__longind)
210 __THROW;
211extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv,
212 const char *__shortopts,
213 const struct option *__longopts, int *__longind)
214 __THROW;
215
216#endif
217
218#ifdef __cplusplus
219}
220#endif
221
222/* Make sure we later can get all the definitions and declarations. */
223#undef __need_getopt
224
225#endif /* getopt.h */
diff --git a/lib/getopt_int.h b/lib/getopt_int.h
deleted file mode 100644
index 401579fd..00000000
--- a/lib/getopt_int.h
+++ /dev/null
@@ -1,131 +0,0 @@
1/* Internal declarations for getopt.
2 Copyright (C) 1989-1994,1996-1999,2001,2003,2004
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifndef _GETOPT_INT_H
21#define _GETOPT_INT_H 1
22
23extern int _getopt_internal (int ___argc, char **___argv,
24 const char *__shortopts,
25 const struct option *__longopts, int *__longind,
26 int __long_only, int __posixly_correct);
27
28
29/* Reentrant versions which can handle parsing multiple argument
30 vectors at the same time. */
31
32/* Data type for reentrant functions. */
33struct _getopt_data
34{
35 /* These have exactly the same meaning as the corresponding global
36 variables, except that they are used for the reentrant
37 versions of getopt. */
38 int optind;
39 int opterr;
40 int optopt;
41 char *optarg;
42
43 /* Internal members. */
44
45 /* True if the internal members have been initialized. */
46 int __initialized;
47
48 /* The next char to be scanned in the option-element
49 in which the last option character we returned was found.
50 This allows us to pick up the scan where we left off.
51
52 If this is zero, or a null string, it means resume the scan
53 by advancing to the next ARGV-element. */
54 char *__nextchar;
55
56 /* Describe how to deal with options that follow non-option ARGV-elements.
57
58 If the caller did not specify anything,
59 the default is REQUIRE_ORDER if the environment variable
60 POSIXLY_CORRECT is defined, PERMUTE otherwise.
61
62 REQUIRE_ORDER means don't recognize them as options;
63 stop option processing when the first non-option is seen.
64 This is what Unix does.
65 This mode of operation is selected by either setting the environment
66 variable POSIXLY_CORRECT, or using `+' as the first character
67 of the list of option characters, or by calling getopt.
68
69 PERMUTE is the default. We permute the contents of ARGV as we
70 scan, so that eventually all the non-options are at the end.
71 This allows options to be given in any order, even with programs
72 that were not written to expect this.
73
74 RETURN_IN_ORDER is an option available to programs that were
75 written to expect options and other ARGV-elements in any order
76 and that care about the ordering of the two. We describe each
77 non-option ARGV-element as if it were the argument of an option
78 with character code 1. Using `-' as the first character of the
79 list of option characters selects this mode of operation.
80
81 The special argument `--' forces an end of option-scanning regardless
82 of the value of `ordering'. In the case of RETURN_IN_ORDER, only
83 `--' can cause `getopt' to return -1 with `optind' != ARGC. */
84
85 enum
86 {
87 REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
88 } __ordering;
89
90 /* If the POSIXLY_CORRECT environment variable is set
91 or getopt was called. */
92 int __posixly_correct;
93
94
95 /* Handle permutation of arguments. */
96
97 /* Describe the part of ARGV that contains non-options that have
98 been skipped. `first_nonopt' is the index in ARGV of the first
99 of them; `last_nonopt' is the index after the last of them. */
100
101 int __first_nonopt;
102 int __last_nonopt;
103
104#if defined _LIBC && defined USE_NONOPTION_FLAGS
105 int __nonoption_flags_max_len;
106 int __nonoption_flags_len;
107# endif
108};
109
110/* The initializer is necessary to set OPTIND and OPTERR to their
111 default values and to clear the initialization flag. */
112#define _GETOPT_DATA_INITIALIZER { 1, 1 }
113
114extern int _getopt_internal_r (int ___argc, char **___argv,
115 const char *__shortopts,
116 const struct option *__longopts, int *__longind,
117 int __long_only, int __posixly_correct,
118 struct _getopt_data *__data);
119
120extern int _getopt_long_r (int ___argc, char **___argv,
121 const char *__shortopts,
122 const struct option *__longopts, int *__longind,
123 struct _getopt_data *__data);
124
125extern int _getopt_long_only_r (int ___argc, char **___argv,
126 const char *__shortopts,
127 const struct option *__longopts,
128 int *__longind,
129 struct _getopt_data *__data);
130
131#endif /* getopt_int.h */
diff --git a/lib/gettext.h b/lib/gettext.h
deleted file mode 100644
index 285cb314..00000000
--- a/lib/gettext.h
+++ /dev/null
@@ -1,78 +0,0 @@
1/* Convenience header for conditional use of GNU <libintl.h>.
2 Copyright (C) 1995-1998, 2000-2002, 2004 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#ifndef _LIBGETTEXT_H
19#define _LIBGETTEXT_H 1
20
21/* NLS can be disabled through the configure --disable-nls option. */
22#if ENABLE_NLS
23
24/* Get declarations of GNU message catalog functions. */
25# include <libintl.h>
26
27#else
28
29/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which
30 chokes if dcgettext is defined as a macro. So include it now, to make
31 later inclusions of <locale.h> a NOP. We don't include <libintl.h>
32 as well because people using "gettext.h" will not include <libintl.h>,
33 and also including <libintl.h> would fail on SunOS 4, whereas <locale.h>
34 is OK. */
35#if defined(__sun)
36# include <locale.h>
37#endif
38
39/* Many header files from the libstdc++ coming with g++ 3.3 or newer include
40 <libintl.h>, which chokes if dcgettext is defined as a macro. So include
41 it now, to make later inclusions of <libintl.h> a NOP. */
42#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3)
43# include <cstdlib>
44# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H
45# include <libintl.h>
46# endif
47#endif
48
49/* Disabled NLS.
50 The casts to 'const char *' serve the purpose of producing warnings
51 for invalid uses of the value returned from these functions.
52 On pre-ANSI systems without 'const', the config.h file is supposed to
53 contain "#define const". */
54# define gettext(Msgid) ((const char *) (Msgid))
55# define dgettext(Domainname, Msgid) ((const char *) (Msgid))
56# define dcgettext(Domainname, Msgid, Category) ((const char *) (Msgid))
57# define ngettext(Msgid1, Msgid2, N) \
58 ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
59# define dngettext(Domainname, Msgid1, Msgid2, N) \
60 ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
61# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \
62 ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
63# define textdomain(Domainname) ((const char *) (Domainname))
64# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname))
65# define bind_textdomain_codeset(Domainname, Codeset) ((const char *) (Codeset))
66
67#endif
68
69/* A pseudo function call that serves as a marker for the automated
70 extraction of messages, but does not call gettext(). The run-time
71 translation is done at a different place in the code.
72 The argument, String, should be a literal string. Concatenated strings
73 and other string expressions won't work.
74 The macro's expansion is not parenthesized, so that it is suitable as
75 initializer for static 'char[]' or 'const char[]' variables. */
76#define gettext_noop(String) String
77
78#endif /* _LIBGETTEXT_H */
diff --git a/lib/intprops.h b/lib/intprops.h
deleted file mode 100644
index 65280b15..00000000
--- a/lib/intprops.h
+++ /dev/null
@@ -1,65 +0,0 @@
1/* intprops.h -- properties of integer types
2
3 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#include <limits.h>
22
23/* The extra casts in the following macros work around compiler bugs,
24 e.g., in Cray C 5.0.3.0. */
25
26/* True if the arithmetic type T is an integer type. bool counts as
27 an integer. */
28#define TYPE_IS_INTEGER(t) ((t) 1.5 == 1)
29
30/* True if negative values of the signed integer type T use two's
31 complement, ones' complement, or signed magnitude representation,
32 respectively. Much GNU code assumes two's complement, but some
33 people like to be portable to all possible C hosts. */
34#define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1)
35#define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0)
36#define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1)
37
38/* True if the arithmetic type T is signed. */
39#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
40
41/* The maximum and minimum values for the integer type T. These
42 macros have undefined behavior if T is signed and has padding bits.
43 If this is a problem for you, please let us know how to fix it for
44 your host. */
45#define TYPE_MINIMUM(t) \
46 ((t) (! TYPE_SIGNED (t) \
47 ? (t) 0 \
48 : TYPE_SIGNED_MAGNITUDE (t) \
49 ? ~ (t) 0 \
50 : ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1)))
51#define TYPE_MAXIMUM(t) \
52 ((t) (! TYPE_SIGNED (t) \
53 ? (t) -1 \
54 : ~ (~ (t) 0 << (sizeof (t) * CHAR_BIT - 1))))
55
56/* Bound on length of the string representing an integer type or expression T.
57 Subtract 1 for the sign bit if t is signed; log10 (2.0) < 146/485;
58 add 1 for integer division truncation; add 1 more for a minus sign
59 if needed. */
60#define INT_STRLEN_BOUND(t) \
61 ((sizeof (t) * CHAR_BIT - 1) * 146 / 485 + 2)
62
63/* Bound on buffer size needed to represent an integer type or expression T,
64 including the terminating null. */
65#define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND (t) + 1)
diff --git a/lib/malloc.c b/lib/malloc.c
deleted file mode 100644
index 58fa6116..00000000
--- a/lib/malloc.c
+++ /dev/null
@@ -1,36 +0,0 @@
1/* malloc() function that is glibc compatible.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* written by Jim Meyering */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23#undef malloc
24
25#include <stdlib.h>
26
27/* Allocate an N-byte block of memory from the heap.
28 If N is zero, allocate a 1-byte block. */
29
30void *
31rpl_malloc (size_t n)
32{
33 if (n == 0)
34 n = 1;
35 return malloc (n);
36}
diff --git a/lib/mountlist.c b/lib/mountlist.c
deleted file mode 100644
index a1dca0aa..00000000
--- a/lib/mountlist.c
+++ /dev/null
@@ -1,820 +0,0 @@
1/* mountlist.c -- return a list of mounted file systems
2
3 Copyright (C) 1991, 1992, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 2004, 2005 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "mountlist.h"
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "xalloc.h"
31
32#ifndef strstr
33char *strstr ();
34#endif
35
36#include <errno.h>
37
38#include <fcntl.h>
39
40#include <unistd.h>
41
42#if HAVE_SYS_PARAM_H
43# include <sys/param.h>
44#endif
45
46#if defined MOUNTED_GETFSSTAT /* OSF_1 and Darwin1.3.x */
47# if HAVE_SYS_UCRED_H
48# include <grp.h> /* needed on OSF V4.0 for definition of NGROUPS,
49 NGROUPS is used as an array dimension in ucred.h */
50# include <sys/ucred.h> /* needed by powerpc-apple-darwin1.3.7 */
51# endif
52# if HAVE_SYS_MOUNT_H
53# include <sys/mount.h>
54# endif
55# if HAVE_SYS_FS_TYPES_H
56# include <sys/fs_types.h> /* needed by powerpc-apple-darwin1.3.7 */
57# endif
58# if HAVE_STRUCT_FSSTAT_F_FSTYPENAME
59# define FS_TYPE(Ent) ((Ent).f_fstypename)
60# else
61# define FS_TYPE(Ent) mnt_names[(Ent).f_type]
62# endif
63#endif /* MOUNTED_GETFSSTAT */
64
65#ifdef MOUNTED_GETMNTENT1 /* 4.3BSD, SunOS, HP-UX, Dynix, Irix. */
66# include <mntent.h>
67# if !defined MOUNTED
68# if defined _PATH_MOUNTED /* GNU libc */
69# define MOUNTED _PATH_MOUNTED
70# endif
71# if defined MNT_MNTTAB /* HP-UX. */
72# define MOUNTED MNT_MNTTAB
73# endif
74# if defined MNTTABNAME /* Dynix. */
75# define MOUNTED MNTTABNAME
76# endif
77# endif
78#endif
79
80#ifdef MOUNTED_GETMNTINFO /* 4.4BSD. */
81# include <sys/mount.h>
82#endif
83
84#ifdef MOUNTED_GETMNT /* Ultrix. */
85# include <sys/mount.h>
86# include <sys/fs_types.h>
87#endif
88
89#ifdef MOUNTED_FS_STAT_DEV /* BeOS. */
90# include <fs_info.h>
91# include <dirent.h>
92#endif
93
94#ifdef MOUNTED_FREAD /* SVR2. */
95# include <mnttab.h>
96#endif
97
98#ifdef MOUNTED_FREAD_FSTYP /* SVR3. */
99# include <mnttab.h>
100# include <sys/fstyp.h>
101# include <sys/statfs.h>
102#endif
103
104#ifdef STAT_STATVFS
105# include <sys/statvfs.h>
106# define statfs statvfs
107#endif
108
109#ifdef MOUNTED_LISTMNTENT
110# include <mntent.h>
111#endif
112
113#ifdef MOUNTED_GETMNTENT2 /* SVR4. */
114# include <sys/mnttab.h>
115#endif
116
117#ifdef MOUNTED_VMOUNT /* AIX. */
118# include <fshelp.h>
119# include <sys/vfs.h>
120#endif
121
122#ifdef DOLPHIN
123/* So special that it's not worth putting this in autoconf. */
124# undef MOUNTED_FREAD_FSTYP
125# define MOUNTED_GETMNTTBL
126#endif
127
128#if HAVE_SYS_MNTENT_H
129/* This is to get MNTOPT_IGNORE on e.g. SVR4. */
130# include <sys/mntent.h>
131#endif
132
133#undef MNT_IGNORE
134#if defined MNTOPT_IGNORE && defined HAVE_HASMNTOPT
135# define MNT_IGNORE(M) hasmntopt ((M), MNTOPT_IGNORE)
136#else
137# define MNT_IGNORE(M) 0
138#endif
139
140#if USE_UNLOCKED_IO
141# include "unlocked-io.h"
142#endif
143
144#ifndef SIZE_MAX
145# define SIZE_MAX ((size_t) -1)
146#endif
147
148#ifndef ME_DUMMY
149# define ME_DUMMY(Fs_name, Fs_type) \
150 (strcmp (Fs_type, "autofs") == 0 \
151 || strcmp (Fs_type, "none") == 0 \
152 || strcmp (Fs_type, "proc") == 0 \
153 || strcmp (Fs_type, "subfs") == 0 \
154 /* for Irix 6.5 */ \
155 || strcmp (Fs_type, "ignore") == 0)
156#endif
157
158#ifndef ME_REMOTE
159/* A file system is `remote' if its Fs_name contains a `:'
160 or if (it is of type smbfs and its Fs_name starts with `//'). */
161# define ME_REMOTE(Fs_name, Fs_type) \
162 (strchr (Fs_name, ':') != 0 \
163 || ((Fs_name)[0] == '/' \
164 && (Fs_name)[1] == '/' \
165 && strcmp (Fs_type, "smbfs") == 0))
166#endif
167
168#if MOUNTED_GETMNTINFO
169
170# if ! HAVE_F_FSTYPENAME_IN_STATFS && ! STAT_STATVFS
171static char *
172fstype_to_string (short int t)
173{
174 switch (t)
175 {
176# ifdef MOUNT_PC
177 case MOUNT_PC:
178 return "pc";
179# endif
180# ifdef MOUNT_MFS
181 case MOUNT_MFS:
182 return "mfs";
183# endif
184# ifdef MOUNT_LO
185 case MOUNT_LO:
186 return "lo";
187# endif
188# ifdef MOUNT_TFS
189 case MOUNT_TFS:
190 return "tfs";
191# endif
192# ifdef MOUNT_TMP
193 case MOUNT_TMP:
194 return "tmp";
195# endif
196# ifdef MOUNT_UFS
197 case MOUNT_UFS:
198 return "ufs" ;
199# endif
200# ifdef MOUNT_NFS
201 case MOUNT_NFS:
202 return "nfs" ;
203# endif
204# ifdef MOUNT_MSDOS
205 case MOUNT_MSDOS:
206 return "msdos" ;
207# endif
208# ifdef MOUNT_LFS
209 case MOUNT_LFS:
210 return "lfs" ;
211# endif
212# ifdef MOUNT_LOFS
213 case MOUNT_LOFS:
214 return "lofs" ;
215# endif
216# ifdef MOUNT_FDESC
217 case MOUNT_FDESC:
218 return "fdesc" ;
219# endif
220# ifdef MOUNT_PORTAL
221 case MOUNT_PORTAL:
222 return "portal" ;
223# endif
224# ifdef MOUNT_NULL
225 case MOUNT_NULL:
226 return "null" ;
227# endif
228# ifdef MOUNT_UMAP
229 case MOUNT_UMAP:
230 return "umap" ;
231# endif
232# ifdef MOUNT_KERNFS
233 case MOUNT_KERNFS:
234 return "kernfs" ;
235# endif
236# ifdef MOUNT_PROCFS
237 case MOUNT_PROCFS:
238 return "procfs" ;
239# endif
240# ifdef MOUNT_AFS
241 case MOUNT_AFS:
242 return "afs" ;
243# endif
244# ifdef MOUNT_CD9660
245 case MOUNT_CD9660:
246 return "cd9660" ;
247# endif
248# ifdef MOUNT_UNION
249 case MOUNT_UNION:
250 return "union" ;
251# endif
252# ifdef MOUNT_DEVFS
253 case MOUNT_DEVFS:
254 return "devfs" ;
255# endif
256# ifdef MOUNT_EXT2FS
257 case MOUNT_EXT2FS:
258 return "ext2fs" ;
259# endif
260 default:
261 return "?";
262 }
263}
264# endif /* ! HAVE_F_FSTYPENAME_IN_STATFS */
265
266/* __NetBSD__ || BSD_NET2 || __OpenBSD__ */
267static char *
268fsp_to_string (const struct statfs *fsp)
269{
270# if defined HAVE_F_FSTYPENAME_IN_STATFS || defined STAT_STATVFS
271 return (char *) (fsp->f_fstypename);
272# else
273 return fstype_to_string (fsp->f_type);
274# endif
275}
276
277#endif /* MOUNTED_GETMNTINFO */
278
279#ifdef MOUNTED_VMOUNT /* AIX. */
280static char *
281fstype_to_string (int t)
282{
283 struct vfs_ent *e;
284
285 e = getvfsbytype (t);
286 if (!e || !e->vfsent_name)
287 return "none";
288 else
289 return e->vfsent_name;
290}
291#endif /* MOUNTED_VMOUNT */
292
293/* Return a list of the currently mounted file systems, or NULL on error.
294 Add each entry to the tail of the list so that they stay in order.
295 If NEED_FS_TYPE is true, ensure that the file system type fields in
296 the returned list are valid. Otherwise, they might not be. */
297
298struct mount_entry *
299read_file_system_list (bool need_fs_type)
300{
301 struct mount_entry *mount_list;
302 struct mount_entry *me;
303 struct mount_entry **mtail = &mount_list;
304
305#ifdef MOUNTED_LISTMNTENT
306 {
307 struct tabmntent *mntlist, *p;
308 struct mntent *mnt;
309 struct mount_entry *me;
310
311 /* the third and fourth arguments could be used to filter mounts,
312 but Crays doesn't seem to have any mounts that we want to
313 remove. Specifically, automount create normal NFS mounts.
314 */
315
316 if (listmntent (&mntlist, KMTAB, NULL, NULL) < 0)
317 return NULL;
318 for (p = mntlist; p; p = p->next) {
319 mnt = p->ment;
320 me = xmalloc (sizeof *me);
321 me->me_devname = xstrdup (mnt->mnt_fsname);
322 me->me_mountdir = xstrdup (mnt->mnt_dir);
323 me->me_type = xstrdup (mnt->mnt_type);
324 me->me_type_malloced = 1;
325 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
326 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
327 me->me_dev = -1;
328 *mtail = me;
329 mtail = &me->me_next;
330 }
331 freemntlist (mntlist);
332 }
333#endif
334
335#ifdef MOUNTED_GETMNTENT1 /* 4.3BSD, SunOS, HP-UX, Dynix, Irix. */
336 {
337 struct mntent *mnt;
338 char *table = MOUNTED;
339 FILE *fp;
340 char *devopt;
341
342 fp = setmntent (table, "r");
343 if (fp == NULL)
344 return NULL;
345
346 while ((mnt = getmntent (fp)))
347 {
348 me = xmalloc (sizeof *me);
349 me->me_devname = xstrdup (mnt->mnt_fsname);
350 me->me_mountdir = xstrdup (mnt->mnt_dir);
351 me->me_type = xstrdup (mnt->mnt_type);
352 me->me_type_malloced = 1;
353 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
354 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
355 devopt = strstr (mnt->mnt_opts, "dev=");
356 if (devopt)
357 me->me_dev = strtoul (devopt + 4, NULL, 16);
358 else
359 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
360
361 /* Add to the linked list. */
362 *mtail = me;
363 mtail = &me->me_next;
364 }
365
366 if (endmntent (fp) == 0)
367 goto free_then_fail;
368 }
369#endif /* MOUNTED_GETMNTENT1. */
370
371#ifdef MOUNTED_GETMNTINFO /* 4.4BSD. */
372 {
373 struct statfs *fsp;
374 int entries;
375
376 entries = getmntinfo (&fsp, MNT_NOWAIT);
377 if (entries < 0)
378 return NULL;
379 for (; entries-- > 0; fsp++)
380 {
381 char *fs_type = fsp_to_string (fsp);
382
383 me = xmalloc (sizeof *me);
384 me->me_devname = xstrdup (fsp->f_mntfromname);
385 me->me_mountdir = xstrdup (fsp->f_mntonname);
386 me->me_type = fs_type;
387 me->me_type_malloced = 0;
388 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
389 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
390 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
391
392 /* Add to the linked list. */
393 *mtail = me;
394 mtail = &me->me_next;
395 }
396 }
397#endif /* MOUNTED_GETMNTINFO */
398
399#ifdef MOUNTED_GETMNT /* Ultrix. */
400 {
401 int offset = 0;
402 int val;
403 struct fs_data fsd;
404
405 while (errno = 0,
406 0 < (val = getmnt (&offset, &fsd, sizeof (fsd), NOSTAT_MANY,
407 (char *) 0)))
408 {
409 me = xmalloc (sizeof *me);
410 me->me_devname = xstrdup (fsd.fd_req.devname);
411 me->me_mountdir = xstrdup (fsd.fd_req.path);
412 me->me_type = gt_names[fsd.fd_req.fstype];
413 me->me_type_malloced = 0;
414 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
415 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
416 me->me_dev = fsd.fd_req.dev;
417
418 /* Add to the linked list. */
419 *mtail = me;
420 mtail = &me->me_next;
421 }
422 if (val < 0)
423 goto free_then_fail;
424 }
425#endif /* MOUNTED_GETMNT. */
426
427#if defined MOUNTED_FS_STAT_DEV /* BeOS */
428 {
429 /* The next_dev() and fs_stat_dev() system calls give the list of
430 all file systems, including the information returned by statvfs()
431 (fs type, total blocks, free blocks etc.), but without the mount
432 point. But on BeOS all file systems except / are mounted in the
433 rootfs, directly under /.
434 The directory name of the mount point is often, but not always,
435 identical to the volume name of the device.
436 We therefore get the list of subdirectories of /, and the list
437 of all file systems, and match the two lists. */
438
439 DIR *dirp;
440 struct rootdir_entry
441 {
442 char *name;
443 dev_t dev;
444 ino_t ino;
445 struct rootdir_entry *next;
446 };
447 struct rootdir_entry *rootdir_list;
448 struct rootdir_entry **rootdir_tail;
449 int32 pos;
450 dev_t dev;
451 fs_info fi;
452
453 /* All volumes are mounted in the rootfs, directly under /. */
454 rootdir_list = NULL;
455 rootdir_tail = &rootdir_list;
456 dirp = opendir ("/");
457 if (dirp)
458 {
459 struct dirent *d;
460
461 while ((d = readdir (dirp)) != NULL)
462 {
463 char *name;
464 struct stat statbuf;
465
466 if (strcmp (d->d_name, "..") == 0)
467 continue;
468
469 if (strcmp (d->d_name, ".") == 0)
470 name = xstrdup ("/");
471 else
472 {
473 name = xmalloc (1 + strlen (d->d_name) + 1);
474 name[0] = '/';
475 strcpy (name + 1, d->d_name);
476 }
477
478 if (lstat (name, &statbuf) >= 0 && S_ISDIR (statbuf.st_mode))
479 {
480 struct rootdir_entry *re = xmalloc (sizeof *re);
481 re->name = name;
482 re->dev = statbuf.st_dev;
483 re->ino = statbuf.st_ino;
484
485 /* Add to the linked list. */
486 *rootdir_tail = re;
487 rootdir_tail = &re->next;
488 }
489 else
490 free (name);
491 }
492 closedir (dirp);
493 }
494 *rootdir_tail = NULL;
495
496 for (pos = 0; (dev = next_dev (&pos)) >= 0; )
497 if (fs_stat_dev (dev, &fi) >= 0)
498 {
499 /* Note: fi.dev == dev. */
500 struct rootdir_entry *re;
501
502 for (re = rootdir_list; re; re = re->next)
503 if (re->dev == fi.dev && re->ino == fi.root)
504 break;
505
506 me = xmalloc (sizeof *me);
507 me->me_devname = xstrdup (fi.device_name[0] != '\0' ? fi.device_name : fi.fsh_name);
508 me->me_mountdir = xstrdup (re != NULL ? re->name : fi.fsh_name);
509 me->me_type = xstrdup (fi.fsh_name);
510 me->me_type_malloced = 1;
511 me->me_dev = fi.dev;
512 me->me_dummy = 0;
513 me->me_remote = (fi.flags & B_FS_IS_SHARED) != 0;
514
515 /* Add to the linked list. */
516 *mtail = me;
517 mtail = &me->me_next;
518 }
519 *mtail = NULL;
520
521 while (rootdir_list != NULL)
522 {
523 struct rootdir_entry *re = rootdir_list;
524 rootdir_list = re->next;
525 free (re->name);
526 free (re);
527 }
528 }
529#endif /* MOUNTED_FS_STAT_DEV */
530
531#if defined MOUNTED_GETFSSTAT /* __alpha running OSF_1 */
532 {
533 int numsys, counter;
534 size_t bufsize;
535 struct statfs *stats;
536
537 numsys = getfsstat ((struct statfs *)0, 0L, MNT_NOWAIT);
538 if (numsys < 0)
539 return (NULL);
540 if (SIZE_MAX / sizeof *stats <= numsys)
541 xalloc_die ();
542
543 bufsize = (1 + numsys) * sizeof *stats;
544 stats = xmalloc (bufsize);
545 numsys = getfsstat (stats, bufsize, MNT_NOWAIT);
546
547 if (numsys < 0)
548 {
549 free (stats);
550 return (NULL);
551 }
552
553 for (counter = 0; counter < numsys; counter++)
554 {
555 me = xmalloc (sizeof *me);
556 me->me_devname = xstrdup (stats[counter].f_mntfromname);
557 me->me_mountdir = xstrdup (stats[counter].f_mntonname);
558 me->me_type = xstrdup (FS_TYPE (stats[counter]));
559 me->me_type_malloced = 1;
560 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
561 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
562 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
563
564 /* Add to the linked list. */
565 *mtail = me;
566 mtail = &me->me_next;
567 }
568
569 free (stats);
570 }
571#endif /* MOUNTED_GETFSSTAT */
572
573#if defined MOUNTED_FREAD || defined MOUNTED_FREAD_FSTYP /* SVR[23]. */
574 {
575 struct mnttab mnt;
576 char *table = "/etc/mnttab";
577 FILE *fp;
578
579 fp = fopen (table, "r");
580 if (fp == NULL)
581 return NULL;
582
583 while (fread (&mnt, sizeof mnt, 1, fp) > 0)
584 {
585 me = xmalloc (sizeof *me);
586# ifdef GETFSTYP /* SVR3. */
587 me->me_devname = xstrdup (mnt.mt_dev);
588# else
589 me->me_devname = xmalloc (strlen (mnt.mt_dev) + 6);
590 strcpy (me->me_devname, "/dev/");
591 strcpy (me->me_devname + 5, mnt.mt_dev);
592# endif
593 me->me_mountdir = xstrdup (mnt.mt_filsys);
594 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
595 me->me_type = "";
596 me->me_type_malloced = 0;
597# ifdef GETFSTYP /* SVR3. */
598 if (need_fs_type)
599 {
600 struct statfs fsd;
601 char typebuf[FSTYPSZ];
602
603 if (statfs (me->me_mountdir, &fsd, sizeof fsd, 0) != -1
604 && sysfs (GETFSTYP, fsd.f_fstyp, typebuf) != -1)
605 {
606 me->me_type = xstrdup (typebuf);
607 me->me_type_malloced = 1;
608 }
609 }
610# endif
611 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
612 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
613
614 /* Add to the linked list. */
615 *mtail = me;
616 mtail = &me->me_next;
617 }
618
619 if (ferror (fp))
620 {
621 /* The last fread() call must have failed. */
622 int saved_errno = errno;
623 fclose (fp);
624 errno = saved_errno;
625 goto free_then_fail;
626 }
627
628 if (fclose (fp) == EOF)
629 goto free_then_fail;
630 }
631#endif /* MOUNTED_FREAD || MOUNTED_FREAD_FSTYP. */
632
633#ifdef MOUNTED_GETMNTTBL /* DolphinOS goes it's own way */
634 {
635 struct mntent **mnttbl = getmnttbl (), **ent;
636 for (ent=mnttbl;*ent;ent++)
637 {
638 me = xmalloc (sizeof *me);
639 me->me_devname = xstrdup ( (*ent)->mt_resource);
640 me->me_mountdir = xstrdup ( (*ent)->mt_directory);
641 me->me_type = xstrdup ((*ent)->mt_fstype);
642 me->me_type_malloced = 1;
643 me->me_dummy = ME_DUMMY (me->me_devname, me->me_type);
644 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
645 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
646
647 /* Add to the linked list. */
648 *mtail = me;
649 mtail = &me->me_next;
650 }
651 endmnttbl ();
652 }
653#endif
654
655#ifdef MOUNTED_GETMNTENT2 /* SVR4. */
656 {
657 struct mnttab mnt;
658 char *table = MNTTAB;
659 FILE *fp;
660 int ret;
661 int lockfd = -1;
662
663# if defined F_RDLCK && defined F_SETLKW
664 /* MNTTAB_LOCK is a macro name of our own invention; it's not present in
665 e.g. Solaris 2.6. If the SVR4 folks ever define a macro
666 for this file name, we should use their macro name instead.
667 (Why not just lock MNTTAB directly? We don't know.) */
668# ifndef MNTTAB_LOCK
669# define MNTTAB_LOCK "/etc/.mnttab.lock"
670# endif
671 lockfd = open (MNTTAB_LOCK, O_RDONLY);
672 if (0 <= lockfd)
673 {
674 struct flock flock;
675 flock.l_type = F_RDLCK;
676 flock.l_whence = SEEK_SET;
677 flock.l_start = 0;
678 flock.l_len = 0;
679 while (fcntl (lockfd, F_SETLKW, &flock) == -1)
680 if (errno != EINTR)
681 {
682 int saved_errno = errno;
683 close (lockfd);
684 errno = saved_errno;
685 return NULL;
686 }
687 }
688 else if (errno != ENOENT)
689 return NULL;
690# endif
691
692 errno = 0;
693 fp = fopen (table, "r");
694 if (fp == NULL)
695 ret = errno;
696 else
697 {
698 while ((ret = getmntent (fp, &mnt)) == 0)
699 {
700 me = xmalloc (sizeof *me);
701 me->me_devname = xstrdup (mnt.mnt_special);
702 me->me_mountdir = xstrdup (mnt.mnt_mountp);
703 me->me_type = xstrdup (mnt.mnt_fstype);
704 me->me_type_malloced = 1;
705 me->me_dummy = MNT_IGNORE (&mnt) != 0;
706 me->me_remote = ME_REMOTE (me->me_devname, me->me_type);
707 me->me_dev = (dev_t) -1; /* Magic; means not known yet. */
708
709 /* Add to the linked list. */
710 *mtail = me;
711 mtail = &me->me_next;
712 }
713
714 ret = fclose (fp) == EOF ? errno : 0 < ret ? 0 : -1;
715 }
716
717 if (0 <= lockfd && close (lockfd) != 0)
718 ret = errno;
719
720 if (0 <= ret)
721 {
722 errno = ret;
723 goto free_then_fail;
724 }
725 }
726#endif /* MOUNTED_GETMNTENT2. */
727
728#ifdef MOUNTED_VMOUNT /* AIX. */
729 {
730 int bufsize;
731 char *entries, *thisent;
732 struct vmount *vmp;
733 int n_entries;
734 int i;
735
736 /* Ask how many bytes to allocate for the mounted file system info. */
737 if (mntctl (MCTL_QUERY, sizeof bufsize, (struct vmount *) &bufsize) != 0)
738 return NULL;
739 entries = xmalloc (bufsize);
740
741 /* Get the list of mounted file systems. */
742 n_entries = mntctl (MCTL_QUERY, bufsize, (struct vmount *) entries);
743 if (n_entries < 0)
744 {
745 int saved_errno = errno;
746 free (entries);
747 errno = saved_errno;
748 return NULL;
749 }
750
751 for (i = 0, thisent = entries;
752 i < n_entries;
753 i++, thisent += vmp->vmt_length)
754 {
755 char *options, *ignore;
756
757 vmp = (struct vmount *) thisent;
758 me = xmalloc (sizeof *me);
759 if (vmp->vmt_flags & MNT_REMOTE)
760 {
761 char *host, *dir;
762
763 me->me_remote = 1;
764 /* Prepend the remote dirname. */
765 host = thisent + vmp->vmt_data[VMT_HOSTNAME].vmt_off;
766 dir = thisent + vmp->vmt_data[VMT_OBJECT].vmt_off;
767 me->me_devname = xmalloc (strlen (host) + strlen (dir) + 2);
768 strcpy (me->me_devname, host);
769 strcat (me->me_devname, ":");
770 strcat (me->me_devname, dir);
771 }
772 else
773 {
774 me->me_remote = 0;
775 me->me_devname = xstrdup (thisent +
776 vmp->vmt_data[VMT_OBJECT].vmt_off);
777 }
778 me->me_mountdir = xstrdup (thisent + vmp->vmt_data[VMT_STUB].vmt_off);
779 me->me_type = xstrdup (fstype_to_string (vmp->vmt_gfstype));
780 me->me_type_malloced = 1;
781 options = thisent + vmp->vmt_data[VMT_ARGS].vmt_off;
782 ignore = strstr (options, "ignore");
783 me->me_dummy = (ignore
784 && (ignore == options || ignore[-1] == ',')
785 && (ignore[sizeof "ignore" - 1] == ','
786 || ignore[sizeof "ignore" - 1] == '\0'));
787 me->me_dev = (dev_t) -1; /* vmt_fsid might be the info we want. */
788
789 /* Add to the linked list. */
790 *mtail = me;
791 mtail = &me->me_next;
792 }
793 free (entries);
794 }
795#endif /* MOUNTED_VMOUNT. */
796
797 *mtail = NULL;
798 return mount_list;
799
800
801 free_then_fail:
802 {
803 int saved_errno = errno;
804 *mtail = NULL;
805
806 while (mount_list)
807 {
808 me = mount_list->me_next;
809 free (mount_list->me_devname);
810 free (mount_list->me_mountdir);
811 if (mount_list->me_type_malloced)
812 free (mount_list->me_type);
813 free (mount_list);
814 mount_list = me;
815 }
816
817 errno = saved_errno;
818 return NULL;
819 }
820}
diff --git a/lib/mountlist.h b/lib/mountlist.h
deleted file mode 100644
index 7f5a6f77..00000000
--- a/lib/mountlist.h
+++ /dev/null
@@ -1,41 +0,0 @@
1/* mountlist.h -- declarations for list of mounted file systems
2
3 Copyright (C) 1991, 1992, 1998, 2000, 2001, 2002, 2003, 2004, 2005
4 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifndef MOUNTLIST_H_
21# define MOUNTLIST_H_
22
23# include <stdbool.h>
24# include <sys/types.h>
25
26/* A mount table entry. */
27struct mount_entry
28{
29 char *me_devname; /* Device node name, including "/dev/". */
30 char *me_mountdir; /* Mount point directory name. */
31 char *me_type; /* "nfs", "4.2", etc. */
32 dev_t me_dev; /* Device number of me_mountdir. */
33 unsigned int me_dummy : 1; /* Nonzero for dummy file systems. */
34 unsigned int me_remote : 1; /* Nonzero for remote fileystems. */
35 unsigned int me_type_malloced : 1; /* Nonzero if me_type was malloced. */
36 struct mount_entry *me_next;
37};
38
39struct mount_entry *read_file_system_list (bool need_fs_type);
40
41#endif
diff --git a/lib/open-safer.c b/lib/open-safer.c
deleted file mode 100644
index d3ba894a..00000000
--- a/lib/open-safer.c
+++ /dev/null
@@ -1,51 +0,0 @@
1/* Invoke open, but avoid some glitches.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Paul Eggert. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "fcntl-safer.h"
25
26#include <fcntl.h>
27#include <stdarg.h>
28#include "unistd-safer.h"
29
30int
31open_safer (char const *file, int flags, ...)
32{
33 mode_t mode = 0;
34
35 if (flags & O_CREAT)
36 {
37 va_list ap;
38 va_start (ap, flags);
39
40 /* Assume mode_t promotes to int if and only if it is smaller.
41 This assumption isn't guaranteed by the C standard, but we
42 don't know of any real-world counterexamples. */
43 mode = (sizeof (mode_t) < sizeof (int)
44 ? va_arg (ap, int)
45 : va_arg (ap, mode_t));
46
47 va_end (ap);
48 }
49
50 return fd_safer (open (file, flags, mode));
51}
diff --git a/lib/pipe-safer.c b/lib/pipe-safer.c
deleted file mode 100644
index fb02d721..00000000
--- a/lib/pipe-safer.c
+++ /dev/null
@@ -1,50 +0,0 @@
1/* Invoke pipe, but avoid some glitches.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Jim Meyering. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "unistd-safer.h"
25
26#include <unistd.h>
27
28/* Like pipe, but ensure that neither of the file descriptors is
29 STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO. */
30
31int
32pipe_safer (int fd[2])
33{
34 int fail = pipe (fd);
35 if (fail)
36 return fail;
37
38 {
39 int i;
40 for (i = 0; i < 2; i++)
41 {
42 int f = fd_safer (fd[i]);
43 if (f < 0)
44 return -1;
45 fd[i] = f;
46 }
47 }
48
49 return 0;
50}
diff --git a/lib/realloc.c b/lib/realloc.c
deleted file mode 100644
index fe948222..00000000
--- a/lib/realloc.c
+++ /dev/null
@@ -1,46 +0,0 @@
1/* realloc() function that is glibc compatible.
2 Copyright (C) 1997, 2003, 2004 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* written by Jim Meyering */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23#undef realloc
24
25#include <stdlib.h>
26
27/* Change the size of an allocated block of memory P to N bytes,
28 with error checking. If N is zero, change it to 1. If P is NULL,
29 use malloc. */
30
31void *
32rpl_realloc (void *p, size_t n)
33{
34 if (n == 0)
35 {
36 n = 1;
37
38 /* In theory realloc might fail, so don't rely on it to free. */
39 free (p);
40 p = NULL;
41 }
42
43 if (p == NULL)
44 return malloc (n);
45 return realloc (p, n);
46}
diff --git a/lib/regcomp.c b/lib/regcomp.c
deleted file mode 100644
index 279b20c4..00000000
--- a/lib/regcomp.c
+++ /dev/null
@@ -1,3779 +0,0 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
21 Idx length, reg_syntax_t syntax);
22static void re_compile_fastmap_iter (regex_t *bufp,
23 const re_dfastate_t *init_state,
24 char *fastmap);
25static reg_errcode_t init_dfa (re_dfa_t *dfa, Idx pat_len);
26#ifdef RE_ENABLE_I18N
27static void free_charset (re_charset_t *cset);
28#endif /* RE_ENABLE_I18N */
29static void free_workarea_compile (regex_t *preg);
30static reg_errcode_t create_initial_state (re_dfa_t *dfa);
31#ifdef RE_ENABLE_I18N
32static void optimize_utf8 (re_dfa_t *dfa);
33#endif
34static reg_errcode_t analyze (regex_t *preg);
35static reg_errcode_t preorder (bin_tree_t *root,
36 reg_errcode_t (fn (void *, bin_tree_t *)),
37 void *extra);
38static reg_errcode_t postorder (bin_tree_t *root,
39 reg_errcode_t (fn (void *, bin_tree_t *)),
40 void *extra);
41static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
42static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
43static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
44 bin_tree_t *node);
45static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
46static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
47static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
48static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
49static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
50 unsigned int constraint);
51static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
52static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
53 Idx node, bool root);
54static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
55static Idx fetch_number (re_string_t *input, re_token_t *token,
56 reg_syntax_t syntax);
57static int peek_token (re_token_t *token, re_string_t *input,
58 reg_syntax_t syntax);
59static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
60 reg_syntax_t syntax, reg_errcode_t *err);
61static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
62 re_token_t *token, reg_syntax_t syntax,
63 Idx nest, reg_errcode_t *err);
64static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
65 re_token_t *token, reg_syntax_t syntax,
66 Idx nest, reg_errcode_t *err);
67static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
68 re_token_t *token, reg_syntax_t syntax,
69 Idx nest, reg_errcode_t *err);
70static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
71 re_token_t *token, reg_syntax_t syntax,
72 Idx nest, reg_errcode_t *err);
73static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
74 re_dfa_t *dfa, re_token_t *token,
75 reg_syntax_t syntax, reg_errcode_t *err);
76static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
77 re_token_t *token, reg_syntax_t syntax,
78 reg_errcode_t *err);
79static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
80 re_string_t *regexp,
81 re_token_t *token, int token_len,
82 re_dfa_t *dfa,
83 reg_syntax_t syntax,
84 bool accept_hyphen);
85static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
86 re_string_t *regexp,
87 re_token_t *token);
88#ifdef RE_ENABLE_I18N
89static reg_errcode_t build_equiv_class (bitset sbcset,
90 re_charset_t *mbcset,
91 Idx *equiv_class_alloc,
92 const unsigned char *name);
93static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
94 bitset sbcset,
95 re_charset_t *mbcset,
96 Idx *char_class_alloc,
97 const unsigned char *class_name,
98 reg_syntax_t syntax);
99#else /* not RE_ENABLE_I18N */
100static reg_errcode_t build_equiv_class (bitset sbcset,
101 const unsigned char *name);
102static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
103 bitset sbcset,
104 const unsigned char *class_name,
105 reg_syntax_t syntax);
106#endif /* not RE_ENABLE_I18N */
107static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
108 unsigned REG_TRANSLATE_TYPE trans,
109 const unsigned char *class_name,
110 const unsigned char *extra,
111 bool non_match, reg_errcode_t *err);
112static bin_tree_t *create_tree (re_dfa_t *dfa,
113 bin_tree_t *left, bin_tree_t *right,
114 re_token_type_t type);
115static bin_tree_t *create_token_tree (re_dfa_t *dfa,
116 bin_tree_t *left, bin_tree_t *right,
117 const re_token_t *token);
118static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
119static void free_token (re_token_t *node);
120static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
121static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
122
123/* This table gives an error message for each of the error codes listed
124 in regex.h. Obviously the order here has to be same as there.
125 POSIX doesn't require that we do anything for REG_NOERROR,
126 but why not be nice? */
127
128const char __re_error_msgid[] attribute_hidden =
129 {
130#define REG_NOERROR_IDX 0
131 gettext_noop ("Success") /* REG_NOERROR */
132 "\0"
133#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
134 gettext_noop ("No match") /* REG_NOMATCH */
135 "\0"
136#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
137 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
138 "\0"
139#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
140 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
141 "\0"
142#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
143 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
144 "\0"
145#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
146 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
147 "\0"
148#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
150 "\0"
151#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
153 "\0"
154#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
156 "\0"
157#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
158 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
159 "\0"
160#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
161 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
162 "\0"
163#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
164 gettext_noop ("Invalid range end") /* REG_ERANGE */
165 "\0"
166#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
167 gettext_noop ("Memory exhausted") /* REG_ESPACE */
168 "\0"
169#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
170 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
171 "\0"
172#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
173 gettext_noop ("Premature end of regular expression") /* REG_EEND */
174 "\0"
175#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
176 gettext_noop ("Regular expression too big") /* REG_ESIZE */
177 "\0"
178#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
179 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
180 };
181
182const size_t __re_error_msgid_idx[] attribute_hidden =
183 {
184 REG_NOERROR_IDX,
185 REG_NOMATCH_IDX,
186 REG_BADPAT_IDX,
187 REG_ECOLLATE_IDX,
188 REG_ECTYPE_IDX,
189 REG_EESCAPE_IDX,
190 REG_ESUBREG_IDX,
191 REG_EBRACK_IDX,
192 REG_EPAREN_IDX,
193 REG_EBRACE_IDX,
194 REG_BADBR_IDX,
195 REG_ERANGE_IDX,
196 REG_ESPACE_IDX,
197 REG_BADRPT_IDX,
198 REG_EEND_IDX,
199 REG_ESIZE_IDX,
200 REG_ERPAREN_IDX
201 };
202
203/* Entry points for GNU code. */
204
205/* re_compile_pattern is the GNU regular expression compiler: it
206 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
207 Returns 0 if the pattern was valid, otherwise an error string.
208
209 Assumes the `re_allocated' (and perhaps `re_buffer') and `translate' fields
210 are set in BUFP on entry. */
211
212const char *
213re_compile_pattern (const char *pattern, size_t length,
214 struct re_pattern_buffer *bufp)
215{
216 reg_errcode_t ret;
217
218 /* And GNU code determines whether or not to get register information
219 by passing null for the REGS argument to re_match, etc., not by
220 setting re_no_sub, unless REG_NO_SUB is set. */
221 bufp->re_no_sub = !!(re_syntax_options & REG_NO_SUB);
222
223 /* Match anchors at newline. */
224 bufp->re_newline_anchor = 1;
225
226 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
227
228 if (!ret)
229 return NULL;
230 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
231}
232#ifdef _LIBC
233weak_alias (__re_compile_pattern, re_compile_pattern)
234#endif
235
236/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
237 also be assigned to arbitrarily: each pattern buffer stores its own
238 syntax, so it can be changed between regex compilations. */
239/* This has no initializer because initialized variables in Emacs
240 become read-only after dumping. */
241reg_syntax_t re_syntax_options;
242
243
244/* Specify the precise syntax of regexps for compilation. This provides
245 for compatibility for various utilities which historically have
246 different, incompatible syntaxes.
247
248 The argument SYNTAX is a bit mask comprised of the various bits
249 defined in regex.h. We return the old syntax. */
250
251reg_syntax_t
252re_set_syntax (reg_syntax_t syntax)
253{
254 reg_syntax_t ret = re_syntax_options;
255
256 re_syntax_options = syntax;
257 return ret;
258}
259#ifdef _LIBC
260weak_alias (__re_set_syntax, re_set_syntax)
261#endif
262
263int
264re_compile_fastmap (struct re_pattern_buffer *bufp)
265{
266 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
267 char *fastmap = bufp->re_fastmap;
268
269 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
270 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
271 if (dfa->init_state != dfa->init_state_word)
272 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
273 if (dfa->init_state != dfa->init_state_nl)
274 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
275 if (dfa->init_state != dfa->init_state_begbuf)
276 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
277 bufp->re_fastmap_accurate = 1;
278 return 0;
279}
280#ifdef _LIBC
281weak_alias (__re_compile_fastmap, re_compile_fastmap)
282#endif
283
284static inline void
285__attribute ((always_inline))
286re_set_fastmap (char *fastmap, bool icase, int ch)
287{
288 fastmap[ch] = 1;
289 if (icase)
290 fastmap[tolower (ch)] = 1;
291}
292
293/* Helper function for re_compile_fastmap.
294 Compile fastmap for the initial_state INIT_STATE. */
295
296static void
297re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
298 char *fastmap)
299{
300 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
301 Idx node_cnt;
302 bool icase = (dfa->mb_cur_max == 1 && (bufp->re_syntax & REG_IGNORE_CASE));
303 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
304 {
305 Idx node = init_state->nodes.elems[node_cnt];
306 re_token_type_t type = dfa->nodes[node].type;
307
308 if (type == CHARACTER)
309 {
310 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
311#ifdef RE_ENABLE_I18N
312 if ((bufp->re_syntax & REG_IGNORE_CASE) && dfa->mb_cur_max > 1)
313 {
314 unsigned char buf[MB_LEN_MAX];
315 unsigned char *p;
316 wchar_t wc;
317 mbstate_t state;
318
319 p = buf;
320 *p++ = dfa->nodes[node].opr.c;
321 while (++node < dfa->nodes_len
322 && dfa->nodes[node].type == CHARACTER
323 && dfa->nodes[node].mb_partial)
324 *p++ = dfa->nodes[node].opr.c;
325 memset (&state, 0, sizeof (state));
326 if (mbrtowc (&wc, (const char *) buf, p - buf,
327 &state) == p - buf
328 && (__wcrtomb ((char *) buf, towlower (wc), &state)
329 != (size_t) -1))
330 re_set_fastmap (fastmap, false, buf[0]);
331 }
332#endif
333 }
334 else if (type == SIMPLE_BRACKET)
335 {
336 int i, j, ch;
337 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
338 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
339 if (dfa->nodes[node].opr.sbcset[i] & ((bitset_word) 1 << j))
340 re_set_fastmap (fastmap, icase, ch);
341 }
342#ifdef RE_ENABLE_I18N
343 else if (type == COMPLEX_BRACKET)
344 {
345 Idx i;
346 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
347 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
348 || cset->nranges || cset->nchar_classes)
349 {
350# ifdef _LIBC
351 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
352 {
353 /* In this case we want to catch the bytes which are
354 the first byte of any collation elements.
355 e.g. In da_DK, we want to catch 'a' since "aa"
356 is a valid collation element, and don't catch
357 'b' since 'b' is the only collation element
358 which starts from 'b'. */
359 const int32_t *table = (const int32_t *)
360 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
361 for (i = 0; i < SBC_MAX; ++i)
362 if (table[i] < 0)
363 re_set_fastmap (fastmap, icase, i);
364 }
365# else
366 if (dfa->mb_cur_max > 1)
367 for (i = 0; i < SBC_MAX; ++i)
368 if (__btowc (i) == WEOF)
369 re_set_fastmap (fastmap, icase, i);
370# endif /* not _LIBC */
371 }
372 for (i = 0; i < cset->nmbchars; ++i)
373 {
374 char buf[256];
375 mbstate_t state;
376 memset (&state, '\0', sizeof (state));
377 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
378 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
379 if ((bufp->re_syntax & REG_IGNORE_CASE) && dfa->mb_cur_max > 1)
380 {
381 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
382 != (size_t) -1)
383 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
384 }
385 }
386 }
387#endif /* RE_ENABLE_I18N */
388 else if (type == OP_PERIOD
389#ifdef RE_ENABLE_I18N
390 || type == OP_UTF8_PERIOD
391#endif /* RE_ENABLE_I18N */
392 || type == END_OF_RE)
393 {
394 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
395 if (type == END_OF_RE)
396 bufp->re_can_be_null = 1;
397 return;
398 }
399 }
400}
401
402/* Entry point for POSIX code. */
403/* regcomp takes a regular expression as a string and compiles it.
404
405 PREG is a regex_t *. We do not expect any fields to be initialized,
406 since POSIX says we shouldn't. Thus, we set
407
408 `re_buffer' to the compiled pattern;
409 `re_used' to the length of the compiled pattern;
410 `re_syntax' to REG_SYNTAX_POSIX_EXTENDED if the
411 REG_EXTENDED bit in CFLAGS is set; otherwise, to
412 REG_SYNTAX_POSIX_BASIC;
413 `re_newline_anchor' to REG_NEWLINE being set in CFLAGS;
414 `re_fastmap' to an allocated space for the fastmap;
415 `re_fastmap_accurate' to zero;
416 `re_nsub' to the number of subexpressions in PATTERN.
417
418 PATTERN is the address of the pattern string.
419
420 CFLAGS is a series of bits which affect compilation.
421
422 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
423 use POSIX basic syntax.
424
425 If REG_NEWLINE is set, then . and [^...] don't match newline.
426 Also, regexec will try a match beginning after every newline.
427
428 If REG_ICASE is set, then we considers upper- and lowercase
429 versions of letters to be equivalent when matching.
430
431 If REG_NOSUB is set, then when PREG is passed to regexec, that
432 routine will report only success or failure, and nothing about the
433 registers.
434
435 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
436 the return codes and their meanings.) */
437
438int
439regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
440{
441 reg_errcode_t ret;
442 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? REG_SYNTAX_POSIX_EXTENDED
443 : REG_SYNTAX_POSIX_BASIC);
444
445 preg->re_buffer = NULL;
446 preg->re_allocated = 0;
447 preg->re_used = 0;
448
449 /* Try to allocate space for the fastmap. */
450 preg->re_fastmap = re_malloc (char, SBC_MAX);
451 if (BE (preg->re_fastmap == NULL, 0))
452 return REG_ESPACE;
453
454 syntax |= (cflags & REG_ICASE) ? REG_IGNORE_CASE : 0;
455
456 /* If REG_NEWLINE is set, newlines are treated differently. */
457 if (cflags & REG_NEWLINE)
458 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
459 syntax &= ~REG_DOT_NEWLINE;
460 syntax |= REG_HAT_LISTS_NOT_NEWLINE;
461 /* It also changes the matching behavior. */
462 preg->re_newline_anchor = 1;
463 }
464 else
465 preg->re_newline_anchor = 0;
466 preg->re_no_sub = !!(cflags & REG_NOSUB);
467 preg->re_translate = NULL;
468
469 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
470
471 /* POSIX doesn't distinguish between an unmatched open-group and an
472 unmatched close-group: both are REG_EPAREN. */
473 if (ret == REG_ERPAREN)
474 ret = REG_EPAREN;
475
476 /* We have already checked preg->re_fastmap != NULL. */
477 if (BE (ret == REG_NOERROR, 1))
478 /* Compute the fastmap now, since regexec cannot modify the pattern
479 buffer. This function never fails in this implementation. */
480 (void) re_compile_fastmap (preg);
481 else
482 {
483 /* Some error occurred while compiling the expression. */
484 re_free (preg->re_fastmap);
485 preg->re_fastmap = NULL;
486 }
487
488 return (int) ret;
489}
490#ifdef _LIBC
491weak_alias (__regcomp, regcomp)
492#endif
493
494/* Returns a message corresponding to an error code, ERRCODE, returned
495 from either regcomp or regexec. We don't use PREG here. */
496
497size_t
498regerror (int errcode, const regex_t *__restrict preg,
499 char *__restrict errbuf, size_t errbuf_size)
500{
501 const char *msg;
502 size_t msg_size;
503
504 if (BE (errcode < 0
505 || errcode >= (int) (sizeof (__re_error_msgid_idx)
506 / sizeof (__re_error_msgid_idx[0])), 0))
507 /* Only error codes returned by the rest of the code should be passed
508 to this routine. If we are given anything else, or if other regex
509 code generates an invalid error code, then the program has a bug.
510 Dump core so we can fix it. */
511 abort ();
512
513 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
514
515 msg_size = strlen (msg) + 1; /* Includes the null. */
516
517 if (BE (errbuf_size != 0, 1))
518 {
519 if (BE (msg_size > errbuf_size, 0))
520 {
521#if defined HAVE_MEMPCPY || defined _LIBC
522 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
523#else
524 memcpy (errbuf, msg, errbuf_size - 1);
525 errbuf[errbuf_size - 1] = 0;
526#endif
527 }
528 else
529 memcpy (errbuf, msg, msg_size);
530 }
531
532 return msg_size;
533}
534#ifdef _LIBC
535weak_alias (__regerror, regerror)
536#endif
537
538
539#ifdef RE_ENABLE_I18N
540/* This static array is used for the map to single-byte characters when
541 UTF-8 is used. Otherwise we would allocate memory just to initialize
542 it the same all the time. UTF-8 is the preferred encoding so this is
543 a worthwhile optimization. */
544static const bitset utf8_sb_map =
545{
546 /* Set the first 128 bits. */
547# if 2 < BITSET_WORDS
548 BITSET_WORD_MAX,
549# endif
550# if 4 < BITSET_WORDS
551 BITSET_WORD_MAX,
552# endif
553# if 6 < BITSET_WORDS
554 BITSET_WORD_MAX,
555# endif
556# if 8 < BITSET_WORDS
557# error "Invalid BITSET_WORDS"
558# endif
559 (BITSET_WORD_MAX
560 >> (SBC_MAX % BITSET_WORD_BITS == 0
561 ? 0
562 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
563};
564#endif
565
566
567static void
568free_dfa_content (re_dfa_t *dfa)
569{
570 Idx i, j;
571
572 if (dfa->nodes)
573 for (i = 0; i < dfa->nodes_len; ++i)
574 free_token (dfa->nodes + i);
575 re_free (dfa->nexts);
576 for (i = 0; i < dfa->nodes_len; ++i)
577 {
578 if (dfa->eclosures != NULL)
579 re_node_set_free (dfa->eclosures + i);
580 if (dfa->inveclosures != NULL)
581 re_node_set_free (dfa->inveclosures + i);
582 if (dfa->edests != NULL)
583 re_node_set_free (dfa->edests + i);
584 }
585 re_free (dfa->edests);
586 re_free (dfa->eclosures);
587 re_free (dfa->inveclosures);
588 re_free (dfa->nodes);
589
590 if (dfa->state_table)
591 for (i = 0; i <= dfa->state_hash_mask; ++i)
592 {
593 struct re_state_table_entry *entry = dfa->state_table + i;
594 for (j = 0; j < entry->num; ++j)
595 {
596 re_dfastate_t *state = entry->array[j];
597 free_state (state);
598 }
599 re_free (entry->array);
600 }
601 re_free (dfa->state_table);
602#ifdef RE_ENABLE_I18N
603 if (dfa->sb_char != utf8_sb_map)
604 re_free (dfa->sb_char);
605#endif
606 re_free (dfa->subexp_map);
607#ifdef DEBUG
608 re_free (dfa->re_str);
609#endif
610
611 re_free (dfa);
612}
613
614
615/* Free dynamically allocated space used by PREG. */
616
617void
618regfree (regex_t *preg)
619{
620 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
621 if (BE (dfa != NULL, 1))
622 free_dfa_content (dfa);
623 preg->re_buffer = NULL;
624 preg->re_allocated = 0;
625
626 re_free (preg->re_fastmap);
627 preg->re_fastmap = NULL;
628
629 re_free (preg->re_translate);
630 preg->re_translate = NULL;
631}
632#ifdef _LIBC
633weak_alias (__regfree, regfree)
634#endif
635
636/* Entry points compatible with 4.2 BSD regex library. We don't define
637 them unless specifically requested. */
638
639#if defined _REGEX_RE_COMP || defined _LIBC
640
641/* BSD has one and only one pattern buffer. */
642static struct re_pattern_buffer re_comp_buf;
643
644char *
645# ifdef _LIBC
646/* Make these definitions weak in libc, so POSIX programs can redefine
647 these names if they don't use our functions, and still use
648 regcomp/regexec above without link errors. */
649weak_function
650# endif
651re_comp (const char *s)
652{
653 reg_errcode_t ret;
654 char *fastmap;
655
656 if (!s)
657 {
658 if (!re_comp_buf.re_buffer)
659 return gettext ("No previous regular expression");
660 return 0;
661 }
662
663 if (re_comp_buf.re_buffer)
664 {
665 fastmap = re_comp_buf.re_fastmap;
666 re_comp_buf.re_fastmap = NULL;
667 __regfree (&re_comp_buf);
668 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
669 re_comp_buf.re_fastmap = fastmap;
670 }
671
672 if (re_comp_buf.re_fastmap == NULL)
673 {
674 re_comp_buf.re_fastmap = (char *) malloc (SBC_MAX);
675 if (re_comp_buf.re_fastmap == NULL)
676 return (char *) gettext (__re_error_msgid
677 + __re_error_msgid_idx[(int) REG_ESPACE]);
678 }
679
680 /* Since `re_exec' always passes NULL for the `regs' argument, we
681 don't need to initialize the pattern buffer fields which affect it. */
682
683 /* Match anchors at newlines. */
684 re_comp_buf.re_newline_anchor = 1;
685
686 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
687
688 if (!ret)
689 return NULL;
690
691 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
692 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
693}
694
695#ifdef _LIBC
696libc_freeres_fn (free_mem)
697{
698 __regfree (&re_comp_buf);
699}
700#endif
701
702#endif /* _REGEX_RE_COMP */
703
704/* Internal entry point.
705 Compile the regular expression PATTERN, whose length is LENGTH.
706 SYNTAX indicate regular expression's syntax. */
707
708static reg_errcode_t
709re_compile_internal (regex_t *preg, const char *pattern, Idx length,
710 reg_syntax_t syntax)
711{
712 reg_errcode_t err = REG_NOERROR;
713 re_dfa_t *dfa;
714 re_string_t regexp;
715
716 /* Initialize the pattern buffer. */
717 preg->re_fastmap_accurate = 0;
718 preg->re_syntax = syntax;
719 preg->re_not_bol = preg->re_not_eol = 0;
720 preg->re_used = 0;
721 preg->re_nsub = 0;
722 preg->re_can_be_null = 0;
723 preg->re_regs_allocated = REG_UNALLOCATED;
724
725 /* Initialize the dfa. */
726 dfa = (re_dfa_t *) preg->re_buffer;
727 if (BE (preg->re_allocated < sizeof (re_dfa_t), 0))
728 {
729 /* If zero allocated, but buffer is non-null, try to realloc
730 enough space. This loses if buffer's address is bogus, but
731 that is the user's responsibility. If buffer is null this
732 is a simple allocation. */
733 dfa = re_realloc (preg->re_buffer, re_dfa_t, 1);
734 if (dfa == NULL)
735 return REG_ESPACE;
736 preg->re_allocated = sizeof (re_dfa_t);
737 preg->re_buffer = (unsigned char *) dfa;
738 }
739 preg->re_used = sizeof (re_dfa_t);
740
741 __libc_lock_init (dfa->lock);
742
743 err = init_dfa (dfa, length);
744 if (BE (err != REG_NOERROR, 0))
745 {
746 free_dfa_content (dfa);
747 preg->re_buffer = NULL;
748 preg->re_allocated = 0;
749 return err;
750 }
751#ifdef DEBUG
752 dfa->re_str = re_malloc (char, length + 1);
753 strncpy (dfa->re_str, pattern, length + 1);
754#endif
755
756 err = re_string_construct (&regexp, pattern, length, preg->re_translate,
757 syntax & REG_IGNORE_CASE, dfa);
758 if (BE (err != REG_NOERROR, 0))
759 {
760 re_compile_internal_free_return:
761 free_workarea_compile (preg);
762 re_string_destruct (&regexp);
763 free_dfa_content (dfa);
764 preg->re_buffer = NULL;
765 preg->re_allocated = 0;
766 return err;
767 }
768
769 /* Parse the regular expression, and build a structure tree. */
770 preg->re_nsub = 0;
771 dfa->str_tree = parse (&regexp, preg, syntax, &err);
772 if (BE (dfa->str_tree == NULL, 0))
773 goto re_compile_internal_free_return;
774
775 /* Analyze the tree and create the nfa. */
776 err = analyze (preg);
777 if (BE (err != REG_NOERROR, 0))
778 goto re_compile_internal_free_return;
779
780#ifdef RE_ENABLE_I18N
781 /* If possible, do searching in single byte encoding to speed things up. */
782 if (dfa->is_utf8 && !(syntax & REG_IGNORE_CASE) && preg->re_translate == NULL)
783 optimize_utf8 (dfa);
784#endif
785
786 /* Then create the initial state of the dfa. */
787 err = create_initial_state (dfa);
788
789 /* Release work areas. */
790 free_workarea_compile (preg);
791 re_string_destruct (&regexp);
792
793 if (BE (err != REG_NOERROR, 0))
794 {
795 free_dfa_content (dfa);
796 preg->re_buffer = NULL;
797 preg->re_allocated = 0;
798 }
799
800 return err;
801}
802
803/* Initialize DFA. We use the length of the regular expression PAT_LEN
804 as the initial length of some arrays. */
805
806static reg_errcode_t
807init_dfa (re_dfa_t *dfa, Idx pat_len)
808{
809 __re_size_t table_size;
810#ifndef _LIBC
811 char *codeset_name;
812#endif
813
814 memset (dfa, '\0', sizeof (re_dfa_t));
815
816 /* Force allocation of str_tree_storage the first time. */
817 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
818
819 dfa->nodes_alloc = pat_len + 1;
820 dfa->nodes = re_xmalloc (re_token_t, dfa->nodes_alloc);
821
822 /* table_size = 2 ^ ceil(log pat_len) */
823 for (table_size = 1; table_size <= pat_len; table_size <<= 1)
824 if (0 < (Idx) -1 && table_size == 0)
825 return REG_ESPACE;
826
827 dfa->state_table = re_calloc (struct re_state_table_entry, table_size);
828 dfa->state_hash_mask = table_size - 1;
829
830 dfa->mb_cur_max = MB_CUR_MAX;
831#ifdef _LIBC
832 if (dfa->mb_cur_max == 6
833 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
834 dfa->is_utf8 = 1;
835 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
836 != 0);
837#else
838# ifdef HAVE_LANGINFO_CODESET
839 codeset_name = nl_langinfo (CODESET);
840# else
841 codeset_name = getenv ("LC_ALL");
842 if (codeset_name == NULL || codeset_name[0] == '\0')
843 codeset_name = getenv ("LC_CTYPE");
844 if (codeset_name == NULL || codeset_name[0] == '\0')
845 codeset_name = getenv ("LANG");
846 if (codeset_name == NULL)
847 codeset_name = "";
848 else if (strchr (codeset_name, '.') != NULL)
849 codeset_name = strchr (codeset_name, '.') + 1;
850# endif
851
852 if (strcasecmp (codeset_name, "UTF-8") == 0
853 || strcasecmp (codeset_name, "UTF8") == 0)
854 dfa->is_utf8 = 1;
855
856 /* We check exhaustively in the loop below if this charset is a
857 superset of ASCII. */
858 dfa->map_notascii = 0;
859#endif
860
861#ifdef RE_ENABLE_I18N
862 if (dfa->mb_cur_max > 1)
863 {
864 if (dfa->is_utf8)
865 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
866 else
867 {
868 int i, j, ch;
869
870 dfa->sb_char = re_calloc (bitset_word, BITSET_WORDS);
871 if (BE (dfa->sb_char == NULL, 0))
872 return REG_ESPACE;
873
874 /* Set the bits corresponding to single byte chars. */
875 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
876 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
877 {
878 wint_t wch = __btowc (ch);
879 if (wch != WEOF)
880 dfa->sb_char[i] |= (bitset_word) 1 << j;
881# ifndef _LIBC
882 if (isascii (ch) && wch != ch)
883 dfa->map_notascii = 1;
884# endif
885 }
886 }
887 }
888#endif
889
890 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
891 return REG_ESPACE;
892 return REG_NOERROR;
893}
894
895/* Initialize WORD_CHAR table, which indicate which character is
896 "word". In this case "word" means that it is the word construction
897 character used by some operators like "\<", "\>", etc. */
898
899static void
900init_word_char (re_dfa_t *dfa)
901{
902 int i, j, ch;
903 dfa->word_ops_used = 1;
904 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
905 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
906 if (isalnum (ch) || ch == '_')
907 dfa->word_char[i] |= (bitset_word) 1 << j;
908}
909
910/* Free the work area which are only used while compiling. */
911
912static void
913free_workarea_compile (regex_t *preg)
914{
915 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
916 bin_tree_storage_t *storage, *next;
917 for (storage = dfa->str_tree_storage; storage; storage = next)
918 {
919 next = storage->next;
920 re_free (storage);
921 }
922 dfa->str_tree_storage = NULL;
923 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
924 dfa->str_tree = NULL;
925 re_free (dfa->org_indices);
926 dfa->org_indices = NULL;
927}
928
929/* Create initial states for all contexts. */
930
931static reg_errcode_t
932create_initial_state (re_dfa_t *dfa)
933{
934 Idx first, i;
935 reg_errcode_t err;
936 re_node_set init_nodes;
937
938 /* Initial states have the epsilon closure of the node which is
939 the first node of the regular expression. */
940 first = dfa->str_tree->first->node_idx;
941 dfa->init_node = first;
942 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
943 if (BE (err != REG_NOERROR, 0))
944 return err;
945
946 /* The back-references which are in initial states can epsilon transit,
947 since in this case all of the subexpressions can be null.
948 Then we add epsilon closures of the nodes which are the next nodes of
949 the back-references. */
950 if (dfa->nbackref > 0)
951 for (i = 0; i < init_nodes.nelem; ++i)
952 {
953 Idx node_idx = init_nodes.elems[i];
954 re_token_type_t type = dfa->nodes[node_idx].type;
955
956 Idx clexp_idx;
957 if (type != OP_BACK_REF)
958 continue;
959 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
960 {
961 re_token_t *clexp_node;
962 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
963 if (clexp_node->type == OP_CLOSE_SUBEXP
964 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
965 break;
966 }
967 if (clexp_idx == init_nodes.nelem)
968 continue;
969
970 if (type == OP_BACK_REF)
971 {
972 Idx dest_idx = dfa->edests[node_idx].elems[0];
973 if (!re_node_set_contains (&init_nodes, dest_idx))
974 {
975 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
976 i = 0;
977 }
978 }
979 }
980
981 /* It must be the first time to invoke acquire_state. */
982 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
983 /* We don't check ERR here, since the initial state must not be NULL. */
984 if (BE (dfa->init_state == NULL, 0))
985 return err;
986 if (dfa->init_state->has_constraint)
987 {
988 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
989 CONTEXT_WORD);
990 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
991 CONTEXT_NEWLINE);
992 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
993 &init_nodes,
994 CONTEXT_NEWLINE
995 | CONTEXT_BEGBUF);
996 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
997 || dfa->init_state_begbuf == NULL, 0))
998 return err;
999 }
1000 else
1001 dfa->init_state_word = dfa->init_state_nl
1002 = dfa->init_state_begbuf = dfa->init_state;
1003
1004 re_node_set_free (&init_nodes);
1005 return REG_NOERROR;
1006}
1007
1008#ifdef RE_ENABLE_I18N
1009/* If it is possible to do searching in single byte encoding instead of UTF-8
1010 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1011 DFA nodes where needed. */
1012
1013static void
1014optimize_utf8 (re_dfa_t *dfa)
1015{
1016 Idx node;
1017 int i;
1018 bool mb_chars = false;
1019 bool has_period = false;
1020
1021 for (node = 0; node < dfa->nodes_len; ++node)
1022 switch (dfa->nodes[node].type)
1023 {
1024 case CHARACTER:
1025 if (dfa->nodes[node].opr.c >= 0x80)
1026 mb_chars = true;
1027 break;
1028 case ANCHOR:
1029 switch (dfa->nodes[node].opr.idx)
1030 {
1031 case LINE_FIRST:
1032 case LINE_LAST:
1033 case BUF_FIRST:
1034 case BUF_LAST:
1035 break;
1036 default:
1037 /* Word anchors etc. cannot be handled. */
1038 return;
1039 }
1040 break;
1041 case OP_PERIOD:
1042 has_period = true;
1043 break;
1044 case OP_BACK_REF:
1045 case OP_ALT:
1046 case END_OF_RE:
1047 case OP_DUP_ASTERISK:
1048 case OP_OPEN_SUBEXP:
1049 case OP_CLOSE_SUBEXP:
1050 break;
1051 case COMPLEX_BRACKET:
1052 return;
1053 case SIMPLE_BRACKET:
1054 /* Just double check. */
1055 {
1056 int rshift =
1057 (SBC_MAX / 2 % BITSET_WORD_BITS == 0
1058 ? 0
1059 : BITSET_WORD_BITS - SBC_MAX / 2 % BITSET_WORD_BITS);
1060 for (i = SBC_MAX / 2 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1061 {
1062 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
1063 return;
1064 rshift = 0;
1065 }
1066 }
1067 break;
1068 default:
1069 abort ();
1070 }
1071
1072 if (mb_chars || has_period)
1073 for (node = 0; node < dfa->nodes_len; ++node)
1074 {
1075 if (dfa->nodes[node].type == CHARACTER
1076 && dfa->nodes[node].opr.c >= 0x80)
1077 dfa->nodes[node].mb_partial = 0;
1078 else if (dfa->nodes[node].type == OP_PERIOD)
1079 dfa->nodes[node].type = OP_UTF8_PERIOD;
1080 }
1081
1082 /* The search can be in single byte locale. */
1083 dfa->mb_cur_max = 1;
1084 dfa->is_utf8 = 0;
1085 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1086}
1087#endif
1088
1089/* Analyze the structure tree, and calculate "first", "next", "edest",
1090 "eclosure", and "inveclosure". */
1091
1092static reg_errcode_t
1093analyze (regex_t *preg)
1094{
1095 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1096 reg_errcode_t ret;
1097
1098 /* Allocate arrays. */
1099 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
1100 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1101 dfa->edests = re_xmalloc (re_node_set, dfa->nodes_alloc);
1102 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1103 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1104 || dfa->eclosures == NULL, 0))
1105 return REG_ESPACE;
1106
1107 dfa->subexp_map = re_xmalloc (Idx, preg->re_nsub);
1108 if (dfa->subexp_map != NULL)
1109 {
1110 Idx i;
1111 for (i = 0; i < preg->re_nsub; i++)
1112 dfa->subexp_map[i] = i;
1113 preorder (dfa->str_tree, optimize_subexps, dfa);
1114 for (i = 0; i < preg->re_nsub; i++)
1115 if (dfa->subexp_map[i] != i)
1116 break;
1117 if (i == preg->re_nsub)
1118 {
1119 free (dfa->subexp_map);
1120 dfa->subexp_map = NULL;
1121 }
1122 }
1123
1124 ret = postorder (dfa->str_tree, lower_subexps, preg);
1125 if (BE (ret != REG_NOERROR, 0))
1126 return ret;
1127 ret = postorder (dfa->str_tree, calc_first, dfa);
1128 if (BE (ret != REG_NOERROR, 0))
1129 return ret;
1130 preorder (dfa->str_tree, calc_next, dfa);
1131 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1132 if (BE (ret != REG_NOERROR, 0))
1133 return ret;
1134 ret = calc_eclosure (dfa);
1135 if (BE (ret != REG_NOERROR, 0))
1136 return ret;
1137
1138 /* We only need this during the prune_impossible_nodes pass in regexec.c;
1139 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1140 if ((!preg->re_no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1141 || dfa->nbackref)
1142 {
1143 dfa->inveclosures = re_xmalloc (re_node_set, dfa->nodes_len);
1144 if (BE (dfa->inveclosures == NULL, 0))
1145 return REG_ESPACE;
1146 ret = calc_inveclosure (dfa);
1147 }
1148
1149 return ret;
1150}
1151
1152/* Our parse trees are very unbalanced, so we cannot use a stack to
1153 implement parse tree visits. Instead, we use parent pointers and
1154 some hairy code in these two functions. */
1155static reg_errcode_t
1156postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1157 void *extra)
1158{
1159 bin_tree_t *node, *prev;
1160
1161 for (node = root; ; )
1162 {
1163 /* Descend down the tree, preferably to the left (or to the right
1164 if that's the only child). */
1165 while (node->left || node->right)
1166 if (node->left)
1167 node = node->left;
1168 else
1169 node = node->right;
1170
1171 do
1172 {
1173 reg_errcode_t err = fn (extra, node);
1174 if (BE (err != REG_NOERROR, 0))
1175 return err;
1176 if (node->parent == NULL)
1177 return REG_NOERROR;
1178 prev = node;
1179 node = node->parent;
1180 }
1181 /* Go up while we have a node that is reached from the right. */
1182 while (node->right == prev || node->right == NULL);
1183 node = node->right;
1184 }
1185}
1186
1187static reg_errcode_t
1188preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1189 void *extra)
1190{
1191 bin_tree_t *node;
1192
1193 for (node = root; ; )
1194 {
1195 reg_errcode_t err = fn (extra, node);
1196 if (BE (err != REG_NOERROR, 0))
1197 return err;
1198
1199 /* Go to the left node, or up and to the right. */
1200 if (node->left)
1201 node = node->left;
1202 else
1203 {
1204 bin_tree_t *prev = NULL;
1205 while (node->right == prev || node->right == NULL)
1206 {
1207 prev = node;
1208 node = node->parent;
1209 if (!node)
1210 return REG_NOERROR;
1211 }
1212 node = node->right;
1213 }
1214 }
1215}
1216
1217/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1218 re_search_internal to map the inner one's opr.idx to this one's. Adjust
1219 backreferences as well. Requires a preorder visit. */
1220static reg_errcode_t
1221optimize_subexps (void *extra, bin_tree_t *node)
1222{
1223 re_dfa_t *dfa = (re_dfa_t *) extra;
1224
1225 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1226 {
1227 int idx = node->token.opr.idx;
1228 node->token.opr.idx = dfa->subexp_map[idx];
1229 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1230 }
1231
1232 else if (node->token.type == SUBEXP
1233 && node->left && node->left->token.type == SUBEXP)
1234 {
1235 Idx other_idx = node->left->token.opr.idx;
1236
1237 node->left = node->left->left;
1238 if (node->left)
1239 node->left->parent = node;
1240
1241 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1242 if (other_idx < BITSET_WORD_BITS)
1243 dfa->used_bkref_map &= ~ ((bitset_word) 1 << other_idx);
1244 }
1245
1246 return REG_NOERROR;
1247}
1248
1249/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1250 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1251static reg_errcode_t
1252lower_subexps (void *extra, bin_tree_t *node)
1253{
1254 regex_t *preg = (regex_t *) extra;
1255 reg_errcode_t err = REG_NOERROR;
1256
1257 if (node->left && node->left->token.type == SUBEXP)
1258 {
1259 node->left = lower_subexp (&err, preg, node->left);
1260 if (node->left)
1261 node->left->parent = node;
1262 }
1263 if (node->right && node->right->token.type == SUBEXP)
1264 {
1265 node->right = lower_subexp (&err, preg, node->right);
1266 if (node->right)
1267 node->right->parent = node;
1268 }
1269
1270 return err;
1271}
1272
1273static bin_tree_t *
1274lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1275{
1276 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1277 bin_tree_t *body = node->left;
1278 bin_tree_t *op, *cls, *tree1, *tree;
1279
1280 if (preg->re_no_sub
1281 /* We do not optimize empty subexpressions, because otherwise we may
1282 have bad CONCAT nodes with NULL children. This is obviously not
1283 very common, so we do not lose much. An example that triggers
1284 this case is the sed "script" /\(\)/x. */
1285 && node->left != NULL
1286 && ! (node->token.opr.idx < BITSET_WORD_BITS
1287 && dfa->used_bkref_map & ((bitset_word) 1 << node->token.opr.idx)))
1288 return node->left;
1289
1290 /* Convert the SUBEXP node to the concatenation of an
1291 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1292 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1293 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1294 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1295 tree = create_tree (dfa, op, tree1, CONCAT);
1296 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
1297 {
1298 *err = REG_ESPACE;
1299 return NULL;
1300 }
1301
1302 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1303 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1304 return tree;
1305}
1306
1307/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1308 nodes. Requires a postorder visit. */
1309static reg_errcode_t
1310calc_first (void *extra, bin_tree_t *node)
1311{
1312 re_dfa_t *dfa = (re_dfa_t *) extra;
1313 if (node->token.type == CONCAT)
1314 {
1315 node->first = node->left->first;
1316 node->node_idx = node->left->node_idx;
1317 }
1318 else
1319 {
1320 node->first = node;
1321 node->node_idx = re_dfa_add_node (dfa, node->token);
1322 if (BE (node->node_idx == REG_MISSING, 0))
1323 return REG_ESPACE;
1324 }
1325 return REG_NOERROR;
1326}
1327
1328/* Pass 2: compute NEXT on the tree. Preorder visit. */
1329static reg_errcode_t
1330calc_next (void *extra, bin_tree_t *node)
1331{
1332 switch (node->token.type)
1333 {
1334 case OP_DUP_ASTERISK:
1335 node->left->next = node;
1336 break;
1337 case CONCAT:
1338 node->left->next = node->right->first;
1339 node->right->next = node->next;
1340 break;
1341 default:
1342 if (node->left)
1343 node->left->next = node->next;
1344 if (node->right)
1345 node->right->next = node->next;
1346 break;
1347 }
1348 return REG_NOERROR;
1349}
1350
1351/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1352static reg_errcode_t
1353link_nfa_nodes (void *extra, bin_tree_t *node)
1354{
1355 re_dfa_t *dfa = (re_dfa_t *) extra;
1356 Idx idx = node->node_idx;
1357 reg_errcode_t err = REG_NOERROR;
1358
1359 switch (node->token.type)
1360 {
1361 case CONCAT:
1362 break;
1363
1364 case END_OF_RE:
1365 assert (node->next == NULL);
1366 break;
1367
1368 case OP_DUP_ASTERISK:
1369 case OP_ALT:
1370 {
1371 Idx left, right;
1372 dfa->has_plural_match = 1;
1373 if (node->left != NULL)
1374 left = node->left->first->node_idx;
1375 else
1376 left = node->next->node_idx;
1377 if (node->right != NULL)
1378 right = node->right->first->node_idx;
1379 else
1380 right = node->next->node_idx;
1381 assert (REG_VALID_INDEX (left));
1382 assert (REG_VALID_INDEX (right));
1383 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1384 }
1385 break;
1386
1387 case ANCHOR:
1388 case OP_OPEN_SUBEXP:
1389 case OP_CLOSE_SUBEXP:
1390 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1391 break;
1392
1393 case OP_BACK_REF:
1394 dfa->nexts[idx] = node->next->node_idx;
1395 if (node->token.type == OP_BACK_REF)
1396 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1397 break;
1398
1399 default:
1400 assert (!IS_EPSILON_NODE (node->token.type));
1401 dfa->nexts[idx] = node->next->node_idx;
1402 break;
1403 }
1404
1405 return err;
1406}
1407
1408/* Duplicate the epsilon closure of the node ROOT_NODE.
1409 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1410 to their own constraint. */
1411
1412static reg_errcode_t
1413duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node,
1414 Idx top_clone_node, Idx root_node,
1415 unsigned int init_constraint)
1416{
1417 Idx org_node, clone_node;
1418 bool ok;
1419 unsigned int constraint = init_constraint;
1420 for (org_node = top_org_node, clone_node = top_clone_node;;)
1421 {
1422 Idx org_dest, clone_dest;
1423 if (dfa->nodes[org_node].type == OP_BACK_REF)
1424 {
1425 /* If the back reference epsilon-transit, its destination must
1426 also have the constraint. Then duplicate the epsilon closure
1427 of the destination of the back reference, and store it in
1428 edests of the back reference. */
1429 org_dest = dfa->nexts[org_node];
1430 re_node_set_empty (dfa->edests + clone_node);
1431 clone_dest = duplicate_node (dfa, org_dest, constraint);
1432 if (BE (clone_dest == REG_MISSING, 0))
1433 return REG_ESPACE;
1434 dfa->nexts[clone_node] = dfa->nexts[org_node];
1435 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1436 if (BE (! ok, 0))
1437 return REG_ESPACE;
1438 }
1439 else if (dfa->edests[org_node].nelem == 0)
1440 {
1441 /* In case of the node can't epsilon-transit, don't duplicate the
1442 destination and store the original destination as the
1443 destination of the node. */
1444 dfa->nexts[clone_node] = dfa->nexts[org_node];
1445 break;
1446 }
1447 else if (dfa->edests[org_node].nelem == 1)
1448 {
1449 /* In case of the node can epsilon-transit, and it has only one
1450 destination. */
1451 org_dest = dfa->edests[org_node].elems[0];
1452 re_node_set_empty (dfa->edests + clone_node);
1453 if (dfa->nodes[org_node].type == ANCHOR)
1454 {
1455 /* In case of the node has another constraint, append it. */
1456 if (org_node == root_node && clone_node != org_node)
1457 {
1458 /* ...but if the node is root_node itself, it means the
1459 epsilon closure have a loop, then tie it to the
1460 destination of the root_node. */
1461 ok = re_node_set_insert (dfa->edests + clone_node,
1462 org_dest);
1463 if (BE (! ok, 0))
1464 return REG_ESPACE;
1465 break;
1466 }
1467 constraint |= dfa->nodes[org_node].opr.ctx_type;
1468 }
1469 clone_dest = duplicate_node (dfa, org_dest, constraint);
1470 if (BE (clone_dest == REG_MISSING, 0))
1471 return REG_ESPACE;
1472 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1473 if (BE (! ok, 0))
1474 return REG_ESPACE;
1475 }
1476 else /* dfa->edests[org_node].nelem == 2 */
1477 {
1478 /* In case of the node can epsilon-transit, and it has two
1479 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
1480 org_dest = dfa->edests[org_node].elems[0];
1481 re_node_set_empty (dfa->edests + clone_node);
1482 /* Search for a duplicated node which satisfies the constraint. */
1483 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1484 if (clone_dest == REG_MISSING)
1485 {
1486 /* There are no such a duplicated node, create a new one. */
1487 reg_errcode_t err;
1488 clone_dest = duplicate_node (dfa, org_dest, constraint);
1489 if (BE (clone_dest == REG_MISSING, 0))
1490 return REG_ESPACE;
1491 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1492 if (BE (! ok, 0))
1493 return REG_ESPACE;
1494 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1495 root_node, constraint);
1496 if (BE (err != REG_NOERROR, 0))
1497 return err;
1498 }
1499 else
1500 {
1501 /* There are a duplicated node which satisfy the constraint,
1502 use it to avoid infinite loop. */
1503 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1504 if (BE (! ok, 0))
1505 return REG_ESPACE;
1506 }
1507
1508 org_dest = dfa->edests[org_node].elems[1];
1509 clone_dest = duplicate_node (dfa, org_dest, constraint);
1510 if (BE (clone_dest == REG_MISSING, 0))
1511 return REG_ESPACE;
1512 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1513 if (BE (! ok, 0))
1514 return REG_ESPACE;
1515 }
1516 org_node = org_dest;
1517 clone_node = clone_dest;
1518 }
1519 return REG_NOERROR;
1520}
1521
1522/* Search for a node which is duplicated from the node ORG_NODE, and
1523 satisfies the constraint CONSTRAINT. */
1524
1525static Idx
1526search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1527 unsigned int constraint)
1528{
1529 Idx idx;
1530 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1531 {
1532 if (org_node == dfa->org_indices[idx]
1533 && constraint == dfa->nodes[idx].constraint)
1534 return idx; /* Found. */
1535 }
1536 return REG_MISSING; /* Not found. */
1537}
1538
1539/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1540 Return the index of the new node, or REG_MISSING if insufficient storage is
1541 available. */
1542
1543static Idx
1544duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
1545{
1546 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1547 if (BE (dup_idx != REG_MISSING, 1))
1548 {
1549 dfa->nodes[dup_idx].constraint = constraint;
1550 if (dfa->nodes[org_idx].type == ANCHOR)
1551 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
1552 dfa->nodes[dup_idx].duplicated = 1;
1553
1554 /* Store the index of the original node. */
1555 dfa->org_indices[dup_idx] = org_idx;
1556 }
1557 return dup_idx;
1558}
1559
1560static reg_errcode_t
1561calc_inveclosure (re_dfa_t *dfa)
1562{
1563 Idx src, idx;
1564 bool ok;
1565 for (idx = 0; idx < dfa->nodes_len; ++idx)
1566 re_node_set_init_empty (dfa->inveclosures + idx);
1567
1568 for (src = 0; src < dfa->nodes_len; ++src)
1569 {
1570 Idx *elems = dfa->eclosures[src].elems;
1571 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1572 {
1573 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1574 if (BE (! ok, 0))
1575 return REG_ESPACE;
1576 }
1577 }
1578
1579 return REG_NOERROR;
1580}
1581
1582/* Calculate "eclosure" for all the node in DFA. */
1583
1584static reg_errcode_t
1585calc_eclosure (re_dfa_t *dfa)
1586{
1587 Idx node_idx;
1588 bool incomplete;
1589#ifdef DEBUG
1590 assert (dfa->nodes_len > 0);
1591#endif
1592 incomplete = false;
1593 /* For each nodes, calculate epsilon closure. */
1594 for (node_idx = 0; ; ++node_idx)
1595 {
1596 reg_errcode_t err;
1597 re_node_set eclosure_elem;
1598 if (node_idx == dfa->nodes_len)
1599 {
1600 if (!incomplete)
1601 break;
1602 incomplete = false;
1603 node_idx = 0;
1604 }
1605
1606#ifdef DEBUG
1607 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1608#endif
1609
1610 /* If we have already calculated, skip it. */
1611 if (dfa->eclosures[node_idx].nelem != 0)
1612 continue;
1613 /* Calculate epsilon closure of `node_idx'. */
1614 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1615 if (BE (err != REG_NOERROR, 0))
1616 return err;
1617
1618 if (dfa->eclosures[node_idx].nelem == 0)
1619 {
1620 incomplete = true;
1621 re_node_set_free (&eclosure_elem);
1622 }
1623 }
1624 return REG_NOERROR;
1625}
1626
1627/* Calculate epsilon closure of NODE. */
1628
1629static reg_errcode_t
1630calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1631{
1632 reg_errcode_t err;
1633 unsigned int constraint;
1634 Idx i;
1635 bool incomplete;
1636 bool ok;
1637 re_node_set eclosure;
1638 incomplete = false;
1639 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1640 if (BE (err != REG_NOERROR, 0))
1641 return err;
1642
1643 /* This indicates that we are calculating this node now.
1644 We reference this value to avoid infinite loop. */
1645 dfa->eclosures[node].nelem = REG_MISSING;
1646
1647 constraint = ((dfa->nodes[node].type == ANCHOR)
1648 ? dfa->nodes[node].opr.ctx_type : 0);
1649 /* If the current node has constraints, duplicate all nodes.
1650 Since they must inherit the constraints. */
1651 if (constraint
1652 && dfa->edests[node].nelem
1653 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1654 {
1655 Idx org_node, cur_node;
1656 org_node = cur_node = node;
1657 err = duplicate_node_closure (dfa, node, node, node, constraint);
1658 if (BE (err != REG_NOERROR, 0))
1659 return err;
1660 }
1661
1662 /* Expand each epsilon destination nodes. */
1663 if (IS_EPSILON_NODE(dfa->nodes[node].type))
1664 for (i = 0; i < dfa->edests[node].nelem; ++i)
1665 {
1666 re_node_set eclosure_elem;
1667 Idx edest = dfa->edests[node].elems[i];
1668 /* If calculating the epsilon closure of `edest' is in progress,
1669 return intermediate result. */
1670 if (dfa->eclosures[edest].nelem == REG_MISSING)
1671 {
1672 incomplete = true;
1673 continue;
1674 }
1675 /* If we haven't calculated the epsilon closure of `edest' yet,
1676 calculate now. Otherwise use calculated epsilon closure. */
1677 if (dfa->eclosures[edest].nelem == 0)
1678 {
1679 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1680 if (BE (err != REG_NOERROR, 0))
1681 return err;
1682 }
1683 else
1684 eclosure_elem = dfa->eclosures[edest];
1685 /* Merge the epsilon closure of `edest'. */
1686 re_node_set_merge (&eclosure, &eclosure_elem);
1687 /* If the epsilon closure of `edest' is incomplete,
1688 the epsilon closure of this node is also incomplete. */
1689 if (dfa->eclosures[edest].nelem == 0)
1690 {
1691 incomplete = true;
1692 re_node_set_free (&eclosure_elem);
1693 }
1694 }
1695
1696 /* Epsilon closures include itself. */
1697 ok = re_node_set_insert (&eclosure, node);
1698 if (BE (! ok, 0))
1699 return REG_ESPACE;
1700 if (incomplete && !root)
1701 dfa->eclosures[node].nelem = 0;
1702 else
1703 dfa->eclosures[node] = eclosure;
1704 *new_set = eclosure;
1705 return REG_NOERROR;
1706}
1707
1708/* Functions for token which are used in the parser. */
1709
1710/* Fetch a token from INPUT.
1711 We must not use this function inside bracket expressions. */
1712
1713static void
1714fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
1715{
1716 re_string_skip_bytes (input, peek_token (result, input, syntax));
1717}
1718
1719/* Peek a token from INPUT, and return the length of the token.
1720 We must not use this function inside bracket expressions. */
1721
1722static int
1723peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1724{
1725 unsigned char c;
1726
1727 if (re_string_eoi (input))
1728 {
1729 token->type = END_OF_RE;
1730 return 0;
1731 }
1732
1733 c = re_string_peek_byte (input, 0);
1734 token->opr.c = c;
1735
1736 token->word_char = 0;
1737#ifdef RE_ENABLE_I18N
1738 token->mb_partial = 0;
1739 if (input->mb_cur_max > 1 &&
1740 !re_string_first_byte (input, re_string_cur_idx (input)))
1741 {
1742 token->type = CHARACTER;
1743 token->mb_partial = 1;
1744 return 1;
1745 }
1746#endif
1747 if (c == '\\')
1748 {
1749 unsigned char c2;
1750 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1751 {
1752 token->type = BACK_SLASH;
1753 return 1;
1754 }
1755
1756 c2 = re_string_peek_byte_case (input, 1);
1757 token->opr.c = c2;
1758 token->type = CHARACTER;
1759#ifdef RE_ENABLE_I18N
1760 if (input->mb_cur_max > 1)
1761 {
1762 wint_t wc = re_string_wchar_at (input,
1763 re_string_cur_idx (input) + 1);
1764 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1765 }
1766 else
1767#endif
1768 token->word_char = IS_WORD_CHAR (c2) != 0;
1769
1770 switch (c2)
1771 {
1772 case '|':
1773 if (!(syntax & REG_LIMITED_OPS) && !(syntax & REG_NO_BK_VBAR))
1774 token->type = OP_ALT;
1775 break;
1776 case '1': case '2': case '3': case '4': case '5':
1777 case '6': case '7': case '8': case '9':
1778 if (!(syntax & REG_NO_BK_REFS))
1779 {
1780 token->type = OP_BACK_REF;
1781 token->opr.idx = c2 - '1';
1782 }
1783 break;
1784 case '<':
1785 if (!(syntax & REG_NO_GNU_OPS))
1786 {
1787 token->type = ANCHOR;
1788 token->opr.ctx_type = WORD_FIRST;
1789 }
1790 break;
1791 case '>':
1792 if (!(syntax & REG_NO_GNU_OPS))
1793 {
1794 token->type = ANCHOR;
1795 token->opr.ctx_type = WORD_LAST;
1796 }
1797 break;
1798 case 'b':
1799 if (!(syntax & REG_NO_GNU_OPS))
1800 {
1801 token->type = ANCHOR;
1802 token->opr.ctx_type = WORD_DELIM;
1803 }
1804 break;
1805 case 'B':
1806 if (!(syntax & REG_NO_GNU_OPS))
1807 {
1808 token->type = ANCHOR;
1809 token->opr.ctx_type = NOT_WORD_DELIM;
1810 }
1811 break;
1812 case 'w':
1813 if (!(syntax & REG_NO_GNU_OPS))
1814 token->type = OP_WORD;
1815 break;
1816 case 'W':
1817 if (!(syntax & REG_NO_GNU_OPS))
1818 token->type = OP_NOTWORD;
1819 break;
1820 case 's':
1821 if (!(syntax & REG_NO_GNU_OPS))
1822 token->type = OP_SPACE;
1823 break;
1824 case 'S':
1825 if (!(syntax & REG_NO_GNU_OPS))
1826 token->type = OP_NOTSPACE;
1827 break;
1828 case '`':
1829 if (!(syntax & REG_NO_GNU_OPS))
1830 {
1831 token->type = ANCHOR;
1832 token->opr.ctx_type = BUF_FIRST;
1833 }
1834 break;
1835 case '\'':
1836 if (!(syntax & REG_NO_GNU_OPS))
1837 {
1838 token->type = ANCHOR;
1839 token->opr.ctx_type = BUF_LAST;
1840 }
1841 break;
1842 case '(':
1843 if (!(syntax & REG_NO_BK_PARENS))
1844 token->type = OP_OPEN_SUBEXP;
1845 break;
1846 case ')':
1847 if (!(syntax & REG_NO_BK_PARENS))
1848 token->type = OP_CLOSE_SUBEXP;
1849 break;
1850 case '+':
1851 if (!(syntax & REG_LIMITED_OPS) && (syntax & REG_BK_PLUS_QM))
1852 token->type = OP_DUP_PLUS;
1853 break;
1854 case '?':
1855 if (!(syntax & REG_LIMITED_OPS) && (syntax & REG_BK_PLUS_QM))
1856 token->type = OP_DUP_QUESTION;
1857 break;
1858 case '{':
1859 if ((syntax & REG_INTERVALS) && (!(syntax & REG_NO_BK_BRACES)))
1860 token->type = OP_OPEN_DUP_NUM;
1861 break;
1862 case '}':
1863 if ((syntax & REG_INTERVALS) && (!(syntax & REG_NO_BK_BRACES)))
1864 token->type = OP_CLOSE_DUP_NUM;
1865 break;
1866 default:
1867 break;
1868 }
1869 return 2;
1870 }
1871
1872 token->type = CHARACTER;
1873#ifdef RE_ENABLE_I18N
1874 if (input->mb_cur_max > 1)
1875 {
1876 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1877 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1878 }
1879 else
1880#endif
1881 token->word_char = IS_WORD_CHAR (token->opr.c);
1882
1883 switch (c)
1884 {
1885 case '\n':
1886 if (syntax & REG_NEWLINE_ALT)
1887 token->type = OP_ALT;
1888 break;
1889 case '|':
1890 if (!(syntax & REG_LIMITED_OPS) && (syntax & REG_NO_BK_VBAR))
1891 token->type = OP_ALT;
1892 break;
1893 case '*':
1894 token->type = OP_DUP_ASTERISK;
1895 break;
1896 case '+':
1897 if (!(syntax & REG_LIMITED_OPS) && !(syntax & REG_BK_PLUS_QM))
1898 token->type = OP_DUP_PLUS;
1899 break;
1900 case '?':
1901 if (!(syntax & REG_LIMITED_OPS) && !(syntax & REG_BK_PLUS_QM))
1902 token->type = OP_DUP_QUESTION;
1903 break;
1904 case '{':
1905 if ((syntax & REG_INTERVALS) && (syntax & REG_NO_BK_BRACES))
1906 token->type = OP_OPEN_DUP_NUM;
1907 break;
1908 case '}':
1909 if ((syntax & REG_INTERVALS) && (syntax & REG_NO_BK_BRACES))
1910 token->type = OP_CLOSE_DUP_NUM;
1911 break;
1912 case '(':
1913 if (syntax & REG_NO_BK_PARENS)
1914 token->type = OP_OPEN_SUBEXP;
1915 break;
1916 case ')':
1917 if (syntax & REG_NO_BK_PARENS)
1918 token->type = OP_CLOSE_SUBEXP;
1919 break;
1920 case '[':
1921 token->type = OP_OPEN_BRACKET;
1922 break;
1923 case '.':
1924 token->type = OP_PERIOD;
1925 break;
1926 case '^':
1927 if (!(syntax & (REG_CONTEXT_INDEP_ANCHORS | REG_CARET_ANCHORS_HERE)) &&
1928 re_string_cur_idx (input) != 0)
1929 {
1930 char prev = re_string_peek_byte (input, -1);
1931 if (!(syntax & REG_NEWLINE_ALT) || prev != '\n')
1932 break;
1933 }
1934 token->type = ANCHOR;
1935 token->opr.ctx_type = LINE_FIRST;
1936 break;
1937 case '$':
1938 if (!(syntax & REG_CONTEXT_INDEP_ANCHORS) &&
1939 re_string_cur_idx (input) + 1 != re_string_length (input))
1940 {
1941 re_token_t next;
1942 re_string_skip_bytes (input, 1);
1943 peek_token (&next, input, syntax);
1944 re_string_skip_bytes (input, -1);
1945 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1946 break;
1947 }
1948 token->type = ANCHOR;
1949 token->opr.ctx_type = LINE_LAST;
1950 break;
1951 default:
1952 break;
1953 }
1954 return 1;
1955}
1956
1957/* Peek a token from INPUT, and return the length of the token.
1958 We must not use this function out of bracket expressions. */
1959
1960static int
1961peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1962{
1963 unsigned char c;
1964 if (re_string_eoi (input))
1965 {
1966 token->type = END_OF_RE;
1967 return 0;
1968 }
1969 c = re_string_peek_byte (input, 0);
1970 token->opr.c = c;
1971
1972#ifdef RE_ENABLE_I18N
1973 if (input->mb_cur_max > 1 &&
1974 !re_string_first_byte (input, re_string_cur_idx (input)))
1975 {
1976 token->type = CHARACTER;
1977 return 1;
1978 }
1979#endif /* RE_ENABLE_I18N */
1980
1981 if (c == '\\' && (syntax & REG_BACKSLASH_ESCAPE_IN_LISTS)
1982 && re_string_cur_idx (input) + 1 < re_string_length (input))
1983 {
1984 /* In this case, '\' escape a character. */
1985 unsigned char c2;
1986 re_string_skip_bytes (input, 1);
1987 c2 = re_string_peek_byte (input, 0);
1988 token->opr.c = c2;
1989 token->type = CHARACTER;
1990 return 1;
1991 }
1992 if (c == '[') /* '[' is a special char in a bracket exps. */
1993 {
1994 unsigned char c2;
1995 int token_len;
1996 if (re_string_cur_idx (input) + 1 < re_string_length (input))
1997 c2 = re_string_peek_byte (input, 1);
1998 else
1999 c2 = 0;
2000 token->opr.c = c2;
2001 token_len = 2;
2002 switch (c2)
2003 {
2004 case '.':
2005 token->type = OP_OPEN_COLL_ELEM;
2006 break;
2007 case '=':
2008 token->type = OP_OPEN_EQUIV_CLASS;
2009 break;
2010 case ':':
2011 if (syntax & REG_CHAR_CLASSES)
2012 {
2013 token->type = OP_OPEN_CHAR_CLASS;
2014 break;
2015 }
2016 /* else fall through. */
2017 default:
2018 token->type = CHARACTER;
2019 token->opr.c = c;
2020 token_len = 1;
2021 break;
2022 }
2023 return token_len;
2024 }
2025 switch (c)
2026 {
2027 case '-':
2028 token->type = OP_CHARSET_RANGE;
2029 break;
2030 case ']':
2031 token->type = OP_CLOSE_BRACKET;
2032 break;
2033 case '^':
2034 token->type = OP_NON_MATCH_LIST;
2035 break;
2036 default:
2037 token->type = CHARACTER;
2038 }
2039 return 1;
2040}
2041
2042/* Functions for parser. */
2043
2044/* Entry point of the parser.
2045 Parse the regular expression REGEXP and return the structure tree.
2046 If an error is occured, ERR is set by error code, and return NULL.
2047 This function build the following tree, from regular expression <reg_exp>:
2048 CAT
2049 / \
2050 / \
2051 <reg_exp> EOR
2052
2053 CAT means concatenation.
2054 EOR means end of regular expression. */
2055
2056static bin_tree_t *
2057parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2058 reg_errcode_t *err)
2059{
2060 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
2061 bin_tree_t *tree, *eor, *root;
2062 re_token_t current_token;
2063 dfa->syntax = syntax;
2064 fetch_token (&current_token, regexp, syntax | REG_CARET_ANCHORS_HERE);
2065 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2066 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2067 return NULL;
2068 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2069 if (tree != NULL)
2070 root = create_tree (dfa, tree, eor, CONCAT);
2071 else
2072 root = eor;
2073 if (BE (eor == NULL || root == NULL, 0))
2074 {
2075 *err = REG_ESPACE;
2076 return NULL;
2077 }
2078 return root;
2079}
2080
2081/* This function build the following tree, from regular expression
2082 <branch1>|<branch2>:
2083 ALT
2084 / \
2085 / \
2086 <branch1> <branch2>
2087
2088 ALT means alternative, which represents the operator `|'. */
2089
2090static bin_tree_t *
2091parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2092 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2093{
2094 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
2095 bin_tree_t *tree, *branch = NULL;
2096 tree = parse_branch (regexp, preg, token, syntax, nest, err);
2097 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2098 return NULL;
2099
2100 while (token->type == OP_ALT)
2101 {
2102 fetch_token (token, regexp, syntax | REG_CARET_ANCHORS_HERE);
2103 if (token->type != OP_ALT && token->type != END_OF_RE
2104 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2105 {
2106 branch = parse_branch (regexp, preg, token, syntax, nest, err);
2107 if (BE (*err != REG_NOERROR && branch == NULL, 0))
2108 return NULL;
2109 }
2110 else
2111 branch = NULL;
2112 tree = create_tree (dfa, tree, branch, OP_ALT);
2113 if (BE (tree == NULL, 0))
2114 {
2115 *err = REG_ESPACE;
2116 return NULL;
2117 }
2118 }
2119 return tree;
2120}
2121
2122/* This function build the following tree, from regular expression
2123 <exp1><exp2>:
2124 CAT
2125 / \
2126 / \
2127 <exp1> <exp2>
2128
2129 CAT means concatenation. */
2130
2131static bin_tree_t *
2132parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
2133 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2134{
2135 bin_tree_t *tree, *exp;
2136 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
2137 tree = parse_expression (regexp, preg, token, syntax, nest, err);
2138 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2139 return NULL;
2140
2141 while (token->type != OP_ALT && token->type != END_OF_RE
2142 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
2143 {
2144 exp = parse_expression (regexp, preg, token, syntax, nest, err);
2145 if (BE (*err != REG_NOERROR && exp == NULL, 0))
2146 {
2147 return NULL;
2148 }
2149 if (tree != NULL && exp != NULL)
2150 {
2151 tree = create_tree (dfa, tree, exp, CONCAT);
2152 if (tree == NULL)
2153 {
2154 *err = REG_ESPACE;
2155 return NULL;
2156 }
2157 }
2158 else if (tree == NULL)
2159 tree = exp;
2160 /* Otherwise exp == NULL, we don't need to create new tree. */
2161 }
2162 return tree;
2163}
2164
2165/* This function build the following tree, from regular expression a*:
2166 *
2167 |
2168 a
2169*/
2170
2171static bin_tree_t *
2172parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2173 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2174{
2175 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
2176 bin_tree_t *tree;
2177 switch (token->type)
2178 {
2179 case CHARACTER:
2180 tree = create_token_tree (dfa, NULL, NULL, token);
2181 if (BE (tree == NULL, 0))
2182 {
2183 *err = REG_ESPACE;
2184 return NULL;
2185 }
2186#ifdef RE_ENABLE_I18N
2187 if (dfa->mb_cur_max > 1)
2188 {
2189 while (!re_string_eoi (regexp)
2190 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2191 {
2192 bin_tree_t *mbc_remain;
2193 fetch_token (token, regexp, syntax);
2194 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2195 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2196 if (BE (mbc_remain == NULL || tree == NULL, 0))
2197 {
2198 *err = REG_ESPACE;
2199 return NULL;
2200 }
2201 }
2202 }
2203#endif
2204 break;
2205 case OP_OPEN_SUBEXP:
2206 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2207 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2208 return NULL;
2209 break;
2210 case OP_OPEN_BRACKET:
2211 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2212 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2213 return NULL;
2214 break;
2215 case OP_BACK_REF:
2216 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2217 {
2218 *err = REG_ESUBREG;
2219 return NULL;
2220 }
2221 dfa->used_bkref_map |= 1 << token->opr.idx;
2222 tree = create_token_tree (dfa, NULL, NULL, token);
2223 if (BE (tree == NULL, 0))
2224 {
2225 *err = REG_ESPACE;
2226 return NULL;
2227 }
2228 ++dfa->nbackref;
2229 dfa->has_mb_node = 1;
2230 break;
2231 case OP_OPEN_DUP_NUM:
2232 if (syntax & REG_CONTEXT_INVALID_DUP)
2233 {
2234 *err = REG_BADRPT;
2235 return NULL;
2236 }
2237 /* FALLTHROUGH */
2238 case OP_DUP_ASTERISK:
2239 case OP_DUP_PLUS:
2240 case OP_DUP_QUESTION:
2241 if (syntax & REG_CONTEXT_INVALID_OPS)
2242 {
2243 *err = REG_BADRPT;
2244 return NULL;
2245 }
2246 else if (syntax & REG_CONTEXT_INDEP_OPS)
2247 {
2248 fetch_token (token, regexp, syntax);
2249 return parse_expression (regexp, preg, token, syntax, nest, err);
2250 }
2251 /* else fall through */
2252 case OP_CLOSE_SUBEXP:
2253 if ((token->type == OP_CLOSE_SUBEXP) &&
2254 !(syntax & REG_UNMATCHED_RIGHT_PAREN_ORD))
2255 {
2256 *err = REG_ERPAREN;
2257 return NULL;
2258 }
2259 /* else fall through */
2260 case OP_CLOSE_DUP_NUM:
2261 /* We treat it as a normal character. */
2262
2263 /* Then we can these characters as normal characters. */
2264 token->type = CHARACTER;
2265 /* mb_partial and word_char bits should be initialized already
2266 by peek_token. */
2267 tree = create_token_tree (dfa, NULL, NULL, token);
2268 if (BE (tree == NULL, 0))
2269 {
2270 *err = REG_ESPACE;
2271 return NULL;
2272 }
2273 break;
2274 case ANCHOR:
2275 if ((token->opr.ctx_type
2276 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
2277 && dfa->word_ops_used == 0)
2278 init_word_char (dfa);
2279 if (token->opr.ctx_type == WORD_DELIM
2280 || token->opr.ctx_type == NOT_WORD_DELIM)
2281 {
2282 bin_tree_t *tree_first, *tree_last;
2283 if (token->opr.ctx_type == WORD_DELIM)
2284 {
2285 token->opr.ctx_type = WORD_FIRST;
2286 tree_first = create_token_tree (dfa, NULL, NULL, token);
2287 token->opr.ctx_type = WORD_LAST;
2288 }
2289 else
2290 {
2291 token->opr.ctx_type = INSIDE_WORD;
2292 tree_first = create_token_tree (dfa, NULL, NULL, token);
2293 token->opr.ctx_type = INSIDE_NOTWORD;
2294 }
2295 tree_last = create_token_tree (dfa, NULL, NULL, token);
2296 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2297 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
2298 {
2299 *err = REG_ESPACE;
2300 return NULL;
2301 }
2302 }
2303 else
2304 {
2305 tree = create_token_tree (dfa, NULL, NULL, token);
2306 if (BE (tree == NULL, 0))
2307 {
2308 *err = REG_ESPACE;
2309 return NULL;
2310 }
2311 }
2312 /* We must return here, since ANCHORs can't be followed
2313 by repetition operators.
2314 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2315 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2316 fetch_token (token, regexp, syntax);
2317 return tree;
2318 case OP_PERIOD:
2319 tree = create_token_tree (dfa, NULL, NULL, token);
2320 if (BE (tree == NULL, 0))
2321 {
2322 *err = REG_ESPACE;
2323 return NULL;
2324 }
2325 if (dfa->mb_cur_max > 1)
2326 dfa->has_mb_node = 1;
2327 break;
2328 case OP_WORD:
2329 case OP_NOTWORD:
2330 tree = build_charclass_op (dfa, regexp->trans,
2331 (const unsigned char *) "alnum",
2332 (const unsigned char *) "_",
2333 token->type == OP_NOTWORD, err);
2334 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2335 return NULL;
2336 break;
2337 case OP_SPACE:
2338 case OP_NOTSPACE:
2339 tree = build_charclass_op (dfa, regexp->trans,
2340 (const unsigned char *) "space",
2341 (const unsigned char *) "",
2342 token->type == OP_NOTSPACE, err);
2343 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2344 return NULL;
2345 break;
2346 case OP_ALT:
2347 case END_OF_RE:
2348 return NULL;
2349 case BACK_SLASH:
2350 *err = REG_EESCAPE;
2351 return NULL;
2352 default:
2353 /* Must not happen? */
2354#ifdef DEBUG
2355 assert (0);
2356#endif
2357 return NULL;
2358 }
2359 fetch_token (token, regexp, syntax);
2360
2361 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2362 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2363 {
2364 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2365 if (BE (*err != REG_NOERROR && tree == NULL, 0))
2366 return NULL;
2367 /* In BRE consecutive duplications are not allowed. */
2368 if ((syntax & REG_CONTEXT_INVALID_DUP)
2369 && (token->type == OP_DUP_ASTERISK
2370 || token->type == OP_OPEN_DUP_NUM))
2371 {
2372 *err = REG_BADRPT;
2373 return NULL;
2374 }
2375 }
2376
2377 return tree;
2378}
2379
2380/* This function build the following tree, from regular expression
2381 (<reg_exp>):
2382 SUBEXP
2383 |
2384 <reg_exp>
2385*/
2386
2387static bin_tree_t *
2388parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2389 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2390{
2391 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
2392 bin_tree_t *tree;
2393 size_t cur_nsub;
2394 cur_nsub = preg->re_nsub++;
2395
2396 fetch_token (token, regexp, syntax | REG_CARET_ANCHORS_HERE);
2397
2398 /* The subexpression may be a null string. */
2399 if (token->type == OP_CLOSE_SUBEXP)
2400 tree = NULL;
2401 else
2402 {
2403 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2404 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2405 *err = REG_EPAREN;
2406 if (BE (*err != REG_NOERROR, 0))
2407 return NULL;
2408 }
2409
2410 if (cur_nsub <= '9' - '1')
2411 dfa->completed_bkref_map |= 1 << cur_nsub;
2412
2413 tree = create_tree (dfa, tree, NULL, SUBEXP);
2414 if (BE (tree == NULL, 0))
2415 {
2416 *err = REG_ESPACE;
2417 return NULL;
2418 }
2419 tree->token.opr.idx = cur_nsub;
2420 return tree;
2421}
2422
2423/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2424
2425static bin_tree_t *
2426parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2427 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
2428{
2429 bin_tree_t *tree = NULL, *old_tree = NULL;
2430 Idx i, start, end, start_idx = re_string_cur_idx (regexp);
2431 re_token_t start_token = *token;
2432
2433 if (token->type == OP_OPEN_DUP_NUM)
2434 {
2435 end = 0;
2436 start = fetch_number (regexp, token, syntax);
2437 if (start == REG_MISSING)
2438 {
2439 if (token->type == CHARACTER && token->opr.c == ',')
2440 start = 0; /* We treat "{,m}" as "{0,m}". */
2441 else
2442 {
2443 *err = REG_BADBR; /* <re>{} is invalid. */
2444 return NULL;
2445 }
2446 }
2447 if (BE (start != REG_ERROR, 1))
2448 {
2449 /* We treat "{n}" as "{n,n}". */
2450 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2451 : ((token->type == CHARACTER && token->opr.c == ',')
2452 ? fetch_number (regexp, token, syntax) : REG_ERROR));
2453 }
2454 if (BE (start == REG_ERROR || end == REG_ERROR, 0))
2455 {
2456 /* Invalid sequence. */
2457 if (BE (!(syntax & REG_INVALID_INTERVAL_ORD), 0))
2458 {
2459 if (token->type == END_OF_RE)
2460 *err = REG_EBRACE;
2461 else
2462 *err = REG_BADBR;
2463
2464 return NULL;
2465 }
2466
2467 /* If the syntax bit is set, rollback. */
2468 re_string_set_index (regexp, start_idx);
2469 *token = start_token;
2470 token->type = CHARACTER;
2471 /* mb_partial and word_char bits should be already initialized by
2472 peek_token. */
2473 return elem;
2474 }
2475
2476 if (BE (end != REG_MISSING && start > end, 0))
2477 {
2478 /* First number greater than second. */
2479 *err = REG_BADBR;
2480 return NULL;
2481 }
2482 }
2483 else
2484 {
2485 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2486 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
2487 }
2488
2489 fetch_token (token, regexp, syntax);
2490
2491 if (BE (elem == NULL, 0))
2492 return NULL;
2493 if (BE (start == 0 && end == 0, 0))
2494 {
2495 postorder (elem, free_tree, NULL);
2496 return NULL;
2497 }
2498
2499 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2500 if (BE (start > 0, 0))
2501 {
2502 tree = elem;
2503 for (i = 2; i <= start; ++i)
2504 {
2505 elem = duplicate_tree (elem, dfa);
2506 tree = create_tree (dfa, tree, elem, CONCAT);
2507 if (BE (elem == NULL || tree == NULL, 0))
2508 goto parse_dup_op_espace;
2509 }
2510
2511 if (start == end)
2512 return tree;
2513
2514 /* Duplicate ELEM before it is marked optional. */
2515 elem = duplicate_tree (elem, dfa);
2516 old_tree = tree;
2517 }
2518 else
2519 old_tree = NULL;
2520
2521 if (elem->token.type == SUBEXP)
2522 postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2523
2524 tree = create_tree (dfa, elem, NULL,
2525 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
2526 if (BE (tree == NULL, 0))
2527 goto parse_dup_op_espace;
2528
2529 /* This loop is actually executed only when end != REG_MISSING,
2530 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2531 already created the start+1-th copy. */
2532 if ((Idx) -1 < 0 || end != REG_MISSING)
2533 for (i = start + 2; i <= end; ++i)
2534 {
2535 elem = duplicate_tree (elem, dfa);
2536 tree = create_tree (dfa, tree, elem, CONCAT);
2537 if (BE (elem == NULL || tree == NULL, 0))
2538 goto parse_dup_op_espace;
2539
2540 tree = create_tree (dfa, tree, NULL, OP_ALT);
2541 if (BE (tree == NULL, 0))
2542 goto parse_dup_op_espace;
2543 }
2544
2545 if (old_tree)
2546 tree = create_tree (dfa, old_tree, tree, CONCAT);
2547
2548 return tree;
2549
2550 parse_dup_op_espace:
2551 *err = REG_ESPACE;
2552 return NULL;
2553}
2554
2555/* Size of the names for collating symbol/equivalence_class/character_class.
2556 I'm not sure, but maybe enough. */
2557#define BRACKET_NAME_BUF_SIZE 32
2558
2559#ifndef _LIBC
2560 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2561 Build the range expression which starts from START_ELEM, and ends
2562 at END_ELEM. The result are written to MBCSET and SBCSET.
2563 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2564 mbcset->range_ends, is a pointer argument sinse we may
2565 update it. */
2566
2567static reg_errcode_t
2568build_range_exp (bitset sbcset,
2569# ifdef RE_ENABLE_I18N
2570 re_charset_t *mbcset, Idx *range_alloc,
2571# endif
2572 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2573{
2574 unsigned int start_ch, end_ch;
2575 /* Equivalence Classes and Character Classes can't be a range start/end. */
2576 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2577 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2578 0))
2579 return REG_ERANGE;
2580
2581 /* We can handle no multi character collating elements without libc
2582 support. */
2583 if (BE ((start_elem->type == COLL_SYM
2584 && strlen ((char *) start_elem->opr.name) > 1)
2585 || (end_elem->type == COLL_SYM
2586 && strlen ((char *) end_elem->opr.name) > 1), 0))
2587 return REG_ECOLLATE;
2588
2589# ifdef RE_ENABLE_I18N
2590 {
2591 wchar_t wc;
2592 wint_t start_wc, end_wc;
2593 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2594
2595 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2596 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2597 : 0));
2598 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2599 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2600 : 0));
2601 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2602 ? __btowc (start_ch) : start_elem->opr.wch);
2603 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2604 ? __btowc (end_ch) : end_elem->opr.wch);
2605 if (start_wc == WEOF || end_wc == WEOF)
2606 return REG_ECOLLATE;
2607 cmp_buf[0] = start_wc;
2608 cmp_buf[4] = end_wc;
2609 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2610 return REG_ERANGE;
2611
2612 /* Got valid collation sequence values, add them as a new entry.
2613 However, for !_LIBC we have no collation elements: if the
2614 character set is single byte, the single byte character set
2615 that we build below suffices. parse_bracket_exp passes
2616 no MBCSET if dfa->mb_cur_max == 1. */
2617 if (mbcset)
2618 {
2619 /* Check the space of the arrays. */
2620 if (BE (*range_alloc == mbcset->nranges, 0))
2621 {
2622 /* There is not enough space, need realloc. */
2623 wchar_t *new_array_start, *new_array_end;
2624 Idx new_nranges;
2625
2626 new_nranges = mbcset->nranges;
2627 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2628 are NULL if *range_alloc == 0. */
2629 new_array_start = re_x2realloc (mbcset->range_starts, wchar_t,
2630 &new_nranges);
2631 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2632 new_nranges);
2633
2634 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2635 return REG_ESPACE;
2636
2637 mbcset->range_starts = new_array_start;
2638 mbcset->range_ends = new_array_end;
2639 *range_alloc = new_nranges;
2640 }
2641
2642 mbcset->range_starts[mbcset->nranges] = start_wc;
2643 mbcset->range_ends[mbcset->nranges++] = end_wc;
2644 }
2645
2646 /* Build the table for single byte characters. */
2647 for (wc = 0; wc < SBC_MAX; ++wc)
2648 {
2649 cmp_buf[2] = wc;
2650 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2651 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2652 bitset_set (sbcset, wc);
2653 }
2654 }
2655# else /* not RE_ENABLE_I18N */
2656 {
2657 unsigned int ch;
2658 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2659 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2660 : 0));
2661 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2662 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2663 : 0));
2664 if (start_ch > end_ch)
2665 return REG_ERANGE;
2666 /* Build the table for single byte characters. */
2667 for (ch = 0; ch < SBC_MAX; ++ch)
2668 if (start_ch <= ch && ch <= end_ch)
2669 bitset_set (sbcset, ch);
2670 }
2671# endif /* not RE_ENABLE_I18N */
2672 return REG_NOERROR;
2673}
2674#endif /* not _LIBC */
2675
2676#ifndef _LIBC
2677/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2678 Build the collating element which is represented by NAME.
2679 The result are written to MBCSET and SBCSET.
2680 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2681 pointer argument since we may update it. */
2682
2683static reg_errcode_t
2684build_collating_symbol (bitset sbcset,
2685# ifdef RE_ENABLE_I18N
2686 re_charset_t *mbcset, Idx *coll_sym_alloc,
2687# endif
2688 const unsigned char *name)
2689{
2690 size_t name_len = strlen ((const char *) name);
2691 if (BE (name_len != 1, 0))
2692 return REG_ECOLLATE;
2693 else
2694 {
2695 bitset_set (sbcset, name[0]);
2696 return REG_NOERROR;
2697 }
2698}
2699#endif /* not _LIBC */
2700
2701/* This function parse bracket expression like "[abc]", "[a-c]",
2702 "[[.a-a.]]" etc. */
2703
2704static bin_tree_t *
2705parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2706 reg_syntax_t syntax, reg_errcode_t *err)
2707{
2708#ifdef _LIBC
2709 const unsigned char *collseqmb;
2710 const char *collseqwc;
2711 uint32_t nrules;
2712 int32_t table_size;
2713 const int32_t *symb_table;
2714 const unsigned char *extra;
2715
2716 /* Local function for parse_bracket_exp used in _LIBC environement.
2717 Seek the collating symbol entry correspondings to NAME.
2718 Return the index of the symbol in the SYMB_TABLE. */
2719
2720 auto inline int32_t
2721 __attribute ((always_inline))
2722 seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2723 {
2724 int32_t hash = elem_hash ((const char *) name, name_len);
2725 int32_t elem = hash % table_size;
2726 int32_t second = hash % (table_size - 2);
2727 while (symb_table[2 * elem] != 0)
2728 {
2729 /* First compare the hashing value. */
2730 if (symb_table[2 * elem] == hash
2731 /* Compare the length of the name. */
2732 && name_len == extra[symb_table[2 * elem + 1]]
2733 /* Compare the name. */
2734 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2735 name_len) == 0)
2736 {
2737 /* Yep, this is the entry. */
2738 break;
2739 }
2740
2741 /* Next entry. */
2742 elem += second;
2743 }
2744 return elem;
2745 }
2746
2747 /* Local function for parse_bracket_exp used in _LIBC environement.
2748 Look up the collation sequence value of BR_ELEM.
2749 Return the value if succeeded, UINT_MAX otherwise. */
2750
2751 auto inline unsigned int
2752 __attribute ((always_inline))
2753 lookup_collation_sequence_value (bracket_elem_t *br_elem)
2754 {
2755 if (br_elem->type == SB_CHAR)
2756 {
2757 /*
2758 if (MB_CUR_MAX == 1)
2759 */
2760 if (nrules == 0)
2761 return collseqmb[br_elem->opr.ch];
2762 else
2763 {
2764 wint_t wc = __btowc (br_elem->opr.ch);
2765 return __collseq_table_lookup (collseqwc, wc);
2766 }
2767 }
2768 else if (br_elem->type == MB_CHAR)
2769 {
2770 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2771 }
2772 else if (br_elem->type == COLL_SYM)
2773 {
2774 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2775 if (nrules != 0)
2776 {
2777 int32_t elem, idx;
2778 elem = seek_collating_symbol_entry (br_elem->opr.name,
2779 sym_name_len);
2780 if (symb_table[2 * elem] != 0)
2781 {
2782 /* We found the entry. */
2783 idx = symb_table[2 * elem + 1];
2784 /* Skip the name of collating element name. */
2785 idx += 1 + extra[idx];
2786 /* Skip the byte sequence of the collating element. */
2787 idx += 1 + extra[idx];
2788 /* Adjust for the alignment. */
2789 idx = (idx + 3) & ~3;
2790 /* Skip the multibyte collation sequence value. */
2791 idx += sizeof (unsigned int);
2792 /* Skip the wide char sequence of the collating element. */
2793 idx += sizeof (unsigned int) *
2794 (1 + *(unsigned int *) (extra + idx));
2795 /* Return the collation sequence value. */
2796 return *(unsigned int *) (extra + idx);
2797 }
2798 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2799 {
2800 /* No valid character. Match it as a single byte
2801 character. */
2802 return collseqmb[br_elem->opr.name[0]];
2803 }
2804 }
2805 else if (sym_name_len == 1)
2806 return collseqmb[br_elem->opr.name[0]];
2807 }
2808 return UINT_MAX;
2809 }
2810
2811 /* Local function for parse_bracket_exp used in _LIBC environement.
2812 Build the range expression which starts from START_ELEM, and ends
2813 at END_ELEM. The result are written to MBCSET and SBCSET.
2814 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2815 mbcset->range_ends, is a pointer argument sinse we may
2816 update it. */
2817
2818 auto inline reg_errcode_t
2819 __attribute ((always_inline))
2820 build_range_exp (bitset sbcset, re_charset_t *mbcset,
2821 Idx *range_alloc,
2822 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2823 {
2824 unsigned int ch;
2825 uint32_t start_collseq;
2826 uint32_t end_collseq;
2827
2828 /* Equivalence Classes and Character Classes can't be a range
2829 start/end. */
2830 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2831 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2832 0))
2833 return REG_ERANGE;
2834
2835 start_collseq = lookup_collation_sequence_value (start_elem);
2836 end_collseq = lookup_collation_sequence_value (end_elem);
2837 /* Check start/end collation sequence values. */
2838 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
2839 return REG_ECOLLATE;
2840 if (BE ((syntax & REG_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2841 return REG_ERANGE;
2842
2843 /* Got valid collation sequence values, add them as a new entry.
2844 However, if we have no collation elements, and the character set
2845 is single byte, the single byte character set that we
2846 build below suffices. */
2847 if (nrules > 0 || dfa->mb_cur_max > 1)
2848 {
2849 /* Check the space of the arrays. */
2850 if (BE (*range_alloc == mbcset->nranges, 0))
2851 {
2852 /* There is not enough space, need realloc. */
2853 uint32_t *new_array_start;
2854 uint32_t *new_array_end;
2855 Idx new_nranges;
2856
2857 new_nranges = mbcset->nranges;
2858 new_array_start = re_x2realloc (mbcset->range_starts, uint32_t,
2859 &new_nranges);
2860 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2861 new_nranges);
2862
2863 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2864 return REG_ESPACE;
2865
2866 mbcset->range_starts = new_array_start;
2867 mbcset->range_ends = new_array_end;
2868 *range_alloc = new_nranges;
2869 }
2870
2871 mbcset->range_starts[mbcset->nranges] = start_collseq;
2872 mbcset->range_ends[mbcset->nranges++] = end_collseq;
2873 }
2874
2875 /* Build the table for single byte characters. */
2876 for (ch = 0; ch < SBC_MAX; ch++)
2877 {
2878 uint32_t ch_collseq;
2879 /*
2880 if (MB_CUR_MAX == 1)
2881 */
2882 if (nrules == 0)
2883 ch_collseq = collseqmb[ch];
2884 else
2885 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2886 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2887 bitset_set (sbcset, ch);
2888 }
2889 return REG_NOERROR;
2890 }
2891
2892 /* Local function for parse_bracket_exp used in _LIBC environement.
2893 Build the collating element which is represented by NAME.
2894 The result are written to MBCSET and SBCSET.
2895 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2896 pointer argument sinse we may update it. */
2897
2898 auto inline reg_errcode_t
2899 __attribute ((always_inline))
2900 build_collating_symbol (bitset sbcset, re_charset_t *mbcset,
2901 Idx *coll_sym_alloc, const unsigned char *name)
2902 {
2903 int32_t elem, idx;
2904 size_t name_len = strlen ((const char *) name);
2905 if (nrules != 0)
2906 {
2907 elem = seek_collating_symbol_entry (name, name_len);
2908 if (symb_table[2 * elem] != 0)
2909 {
2910 /* We found the entry. */
2911 idx = symb_table[2 * elem + 1];
2912 /* Skip the name of collating element name. */
2913 idx += 1 + extra[idx];
2914 }
2915 else if (symb_table[2 * elem] == 0 && name_len == 1)
2916 {
2917 /* No valid character, treat it as a normal
2918 character. */
2919 bitset_set (sbcset, name[0]);
2920 return REG_NOERROR;
2921 }
2922 else
2923 return REG_ECOLLATE;
2924
2925 /* Got valid collation sequence, add it as a new entry. */
2926 /* Check the space of the arrays. */
2927 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
2928 {
2929 /* Not enough, realloc it. */
2930 Idx new_coll_sym_alloc = mbcset->ncoll_syms;
2931 /* Use realloc since mbcset->coll_syms is NULL
2932 if *alloc == 0. */
2933 int32_t *new_coll_syms = re_x2realloc (mbcset->coll_syms, int32_t,
2934 &new_coll_sym_alloc);
2935 if (BE (new_coll_syms == NULL, 0))
2936 return REG_ESPACE;
2937 mbcset->coll_syms = new_coll_syms;
2938 *coll_sym_alloc = new_coll_sym_alloc;
2939 }
2940 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
2941 return REG_NOERROR;
2942 }
2943 else
2944 {
2945 if (BE (name_len != 1, 0))
2946 return REG_ECOLLATE;
2947 else
2948 {
2949 bitset_set (sbcset, name[0]);
2950 return REG_NOERROR;
2951 }
2952 }
2953 }
2954#endif
2955
2956 re_token_t br_token;
2957 re_bitset_ptr_t sbcset;
2958#ifdef RE_ENABLE_I18N
2959 re_charset_t *mbcset;
2960 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
2961 Idx equiv_class_alloc = 0, char_class_alloc = 0;
2962#endif /* not RE_ENABLE_I18N */
2963 bool non_match = false;
2964 bin_tree_t *work_tree;
2965 int token_len;
2966 bool first_round = true;
2967#ifdef _LIBC
2968 collseqmb = (const unsigned char *)
2969 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
2970 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2971 if (nrules)
2972 {
2973 /*
2974 if (MB_CUR_MAX > 1)
2975 */
2976 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2977 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
2978 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
2979 _NL_COLLATE_SYMB_TABLEMB);
2980 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
2981 _NL_COLLATE_SYMB_EXTRAMB);
2982 }
2983#endif
2984 sbcset = re_calloc (bitset_word, BITSET_WORDS);
2985#ifdef RE_ENABLE_I18N
2986 mbcset = re_calloc (re_charset_t, 1);
2987#endif /* RE_ENABLE_I18N */
2988#ifdef RE_ENABLE_I18N
2989 if (BE (sbcset == NULL || mbcset == NULL, 0))
2990#else
2991 if (BE (sbcset == NULL, 0))
2992#endif /* RE_ENABLE_I18N */
2993 {
2994 *err = REG_ESPACE;
2995 return NULL;
2996 }
2997
2998 token_len = peek_token_bracket (token, regexp, syntax);
2999 if (BE (token->type == END_OF_RE, 0))
3000 {
3001 *err = REG_BADPAT;
3002 goto parse_bracket_exp_free_return;
3003 }
3004 if (token->type == OP_NON_MATCH_LIST)
3005 {
3006#ifdef RE_ENABLE_I18N
3007 mbcset->non_match = 1;
3008#endif /* not RE_ENABLE_I18N */
3009 non_match = true;
3010 if (syntax & REG_HAT_LISTS_NOT_NEWLINE)
3011 bitset_set (sbcset, '\0');
3012 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3013 token_len = peek_token_bracket (token, regexp, syntax);
3014 if (BE (token->type == END_OF_RE, 0))
3015 {
3016 *err = REG_BADPAT;
3017 goto parse_bracket_exp_free_return;
3018 }
3019 }
3020
3021 /* We treat the first ']' as a normal character. */
3022 if (token->type == OP_CLOSE_BRACKET)
3023 token->type = CHARACTER;
3024
3025 while (1)
3026 {
3027 bracket_elem_t start_elem, end_elem;
3028 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3029 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3030 reg_errcode_t ret;
3031 int token_len2 = 0;
3032 bool is_range_exp = false;
3033 re_token_t token2;
3034
3035 start_elem.opr.name = start_name_buf;
3036 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3037 syntax, first_round);
3038 if (BE (ret != REG_NOERROR, 0))
3039 {
3040 *err = ret;
3041 goto parse_bracket_exp_free_return;
3042 }
3043 first_round = false;
3044
3045 /* Get information about the next token. We need it in any case. */
3046 token_len = peek_token_bracket (token, regexp, syntax);
3047
3048 /* Do not check for ranges if we know they are not allowed. */
3049 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3050 {
3051 if (BE (token->type == END_OF_RE, 0))
3052 {
3053 *err = REG_EBRACK;
3054 goto parse_bracket_exp_free_return;
3055 }
3056 if (token->type == OP_CHARSET_RANGE)
3057 {
3058 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3059 token_len2 = peek_token_bracket (&token2, regexp, syntax);
3060 if (BE (token2.type == END_OF_RE, 0))
3061 {
3062 *err = REG_EBRACK;
3063 goto parse_bracket_exp_free_return;
3064 }
3065 if (token2.type == OP_CLOSE_BRACKET)
3066 {
3067 /* We treat the last '-' as a normal character. */
3068 re_string_skip_bytes (regexp, -token_len);
3069 token->type = CHARACTER;
3070 }
3071 else
3072 is_range_exp = true;
3073 }
3074 }
3075
3076 if (is_range_exp == true)
3077 {
3078 end_elem.opr.name = end_name_buf;
3079 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3080 dfa, syntax, true);
3081 if (BE (ret != REG_NOERROR, 0))
3082 {
3083 *err = ret;
3084 goto parse_bracket_exp_free_return;
3085 }
3086
3087 token_len = peek_token_bracket (token, regexp, syntax);
3088
3089#ifdef _LIBC
3090 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3091 &start_elem, &end_elem);
3092#else
3093# ifdef RE_ENABLE_I18N
3094 *err = build_range_exp (sbcset,
3095 dfa->mb_cur_max > 1 ? mbcset : NULL,
3096 &range_alloc, &start_elem, &end_elem);
3097# else
3098 *err = build_range_exp (sbcset, &start_elem, &end_elem);
3099# endif
3100#endif /* RE_ENABLE_I18N */
3101 if (BE (*err != REG_NOERROR, 0))
3102 goto parse_bracket_exp_free_return;
3103 }
3104 else
3105 {
3106 switch (start_elem.type)
3107 {
3108 case SB_CHAR:
3109 bitset_set (sbcset, start_elem.opr.ch);
3110 break;
3111#ifdef RE_ENABLE_I18N
3112 case MB_CHAR:
3113 /* Check whether the array has enough space. */
3114 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3115 {
3116 wchar_t *new_mbchars;
3117 /* Not enough, realloc it. */
3118 mbchar_alloc = mbcset->nmbchars;
3119 /* Use realloc since array is NULL if *alloc == 0. */
3120 new_mbchars = re_x2realloc (mbcset->mbchars, wchar_t,
3121 &mbchar_alloc);
3122 if (BE (new_mbchars == NULL, 0))
3123 goto parse_bracket_exp_espace;
3124 mbcset->mbchars = new_mbchars;
3125 }
3126 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3127 break;
3128#endif /* RE_ENABLE_I18N */
3129 case EQUIV_CLASS:
3130 *err = build_equiv_class (sbcset,
3131#ifdef RE_ENABLE_I18N
3132 mbcset, &equiv_class_alloc,
3133#endif /* RE_ENABLE_I18N */
3134 start_elem.opr.name);
3135 if (BE (*err != REG_NOERROR, 0))
3136 goto parse_bracket_exp_free_return;
3137 break;
3138 case COLL_SYM:
3139 *err = build_collating_symbol (sbcset,
3140#ifdef RE_ENABLE_I18N
3141 mbcset, &coll_sym_alloc,
3142#endif /* RE_ENABLE_I18N */
3143 start_elem.opr.name);
3144 if (BE (*err != REG_NOERROR, 0))
3145 goto parse_bracket_exp_free_return;
3146 break;
3147 case CHAR_CLASS:
3148 *err = build_charclass (regexp->trans, sbcset,
3149#ifdef RE_ENABLE_I18N
3150 mbcset, &char_class_alloc,
3151#endif /* RE_ENABLE_I18N */
3152 start_elem.opr.name, syntax);
3153 if (BE (*err != REG_NOERROR, 0))
3154 goto parse_bracket_exp_free_return;
3155 break;
3156 default:
3157 assert (0);
3158 break;
3159 }
3160 }
3161 if (BE (token->type == END_OF_RE, 0))
3162 {
3163 *err = REG_EBRACK;
3164 goto parse_bracket_exp_free_return;
3165 }
3166 if (token->type == OP_CLOSE_BRACKET)
3167 break;
3168 }
3169
3170 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3171
3172 /* If it is non-matching list. */
3173 if (non_match)
3174 bitset_not (sbcset);
3175
3176#ifdef RE_ENABLE_I18N
3177 /* Ensure only single byte characters are set. */
3178 if (dfa->mb_cur_max > 1)
3179 bitset_mask (sbcset, dfa->sb_char);
3180
3181 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
3182 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3183 || mbcset->non_match)))
3184 {
3185 bin_tree_t *mbc_tree;
3186 int sbc_idx;
3187 /* Build a tree for complex bracket. */
3188 dfa->has_mb_node = 1;
3189 br_token.type = COMPLEX_BRACKET;
3190 br_token.opr.mbcset = mbcset;
3191 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3192 if (BE (mbc_tree == NULL, 0))
3193 goto parse_bracket_exp_espace;
3194 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3195 if (sbcset[sbc_idx])
3196 break;
3197 /* If there are no bits set in sbcset, there is no point
3198 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3199 if (sbc_idx < BITSET_WORDS)
3200 {
3201 /* Build a tree for simple bracket. */
3202 br_token.type = SIMPLE_BRACKET;
3203 br_token.opr.sbcset = sbcset;
3204 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3205 if (BE (work_tree == NULL, 0))
3206 goto parse_bracket_exp_espace;
3207
3208 /* Then join them by ALT node. */
3209 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3210 if (BE (work_tree == NULL, 0))
3211 goto parse_bracket_exp_espace;
3212 }
3213 else
3214 {
3215 re_free (sbcset);
3216 work_tree = mbc_tree;
3217 }
3218 }
3219 else
3220#endif /* not RE_ENABLE_I18N */
3221 {
3222#ifdef RE_ENABLE_I18N
3223 free_charset (mbcset);
3224#endif
3225 /* Build a tree for simple bracket. */
3226 br_token.type = SIMPLE_BRACKET;
3227 br_token.opr.sbcset = sbcset;
3228 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3229 if (BE (work_tree == NULL, 0))
3230 goto parse_bracket_exp_espace;
3231 }
3232 return work_tree;
3233
3234 parse_bracket_exp_espace:
3235 *err = REG_ESPACE;
3236 parse_bracket_exp_free_return:
3237 re_free (sbcset);
3238#ifdef RE_ENABLE_I18N
3239 free_charset (mbcset);
3240#endif /* RE_ENABLE_I18N */
3241 return NULL;
3242}
3243
3244/* Parse an element in the bracket expression. */
3245
3246static reg_errcode_t
3247parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3248 re_token_t *token, int token_len, re_dfa_t *dfa,
3249 reg_syntax_t syntax, bool accept_hyphen)
3250{
3251#ifdef RE_ENABLE_I18N
3252 int cur_char_size;
3253 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3254 if (cur_char_size > 1)
3255 {
3256 elem->type = MB_CHAR;
3257 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3258 re_string_skip_bytes (regexp, cur_char_size);
3259 return REG_NOERROR;
3260 }
3261#endif /* RE_ENABLE_I18N */
3262 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3263 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3264 || token->type == OP_OPEN_EQUIV_CLASS)
3265 return parse_bracket_symbol (elem, regexp, token);
3266 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3267 {
3268 /* A '-' must only appear as anything but a range indicator before
3269 the closing bracket. Everything else is an error. */
3270 re_token_t token2;
3271 (void) peek_token_bracket (&token2, regexp, syntax);
3272 if (token2.type != OP_CLOSE_BRACKET)
3273 /* The actual error value is not standardized since this whole
3274 case is undefined. But ERANGE makes good sense. */
3275 return REG_ERANGE;
3276 }
3277 elem->type = SB_CHAR;
3278 elem->opr.ch = token->opr.c;
3279 return REG_NOERROR;
3280}
3281
3282/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3283 such as [:<character_class>:], [.<collating_element>.], and
3284 [=<equivalent_class>=]. */
3285
3286static reg_errcode_t
3287parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3288 re_token_t *token)
3289{
3290 unsigned char ch, delim = token->opr.c;
3291 int i = 0;
3292 if (re_string_eoi(regexp))
3293 return REG_EBRACK;
3294 for (;; ++i)
3295 {
3296 if (i >= BRACKET_NAME_BUF_SIZE)
3297 return REG_EBRACK;
3298 if (token->type == OP_OPEN_CHAR_CLASS)
3299 ch = re_string_fetch_byte_case (regexp);
3300 else
3301 ch = re_string_fetch_byte (regexp);
3302 if (re_string_eoi(regexp))
3303 return REG_EBRACK;
3304 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3305 break;
3306 elem->opr.name[i] = ch;
3307 }
3308 re_string_skip_bytes (regexp, 1);
3309 elem->opr.name[i] = '\0';
3310 switch (token->type)
3311 {
3312 case OP_OPEN_COLL_ELEM:
3313 elem->type = COLL_SYM;
3314 break;
3315 case OP_OPEN_EQUIV_CLASS:
3316 elem->type = EQUIV_CLASS;
3317 break;
3318 case OP_OPEN_CHAR_CLASS:
3319 elem->type = CHAR_CLASS;
3320 break;
3321 default:
3322 break;
3323 }
3324 return REG_NOERROR;
3325}
3326
3327 /* Helper function for parse_bracket_exp.
3328 Build the equivalence class which is represented by NAME.
3329 The result are written to MBCSET and SBCSET.
3330 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3331 is a pointer argument sinse we may update it. */
3332
3333static reg_errcode_t
3334build_equiv_class (bitset sbcset,
3335#ifdef RE_ENABLE_I18N
3336 re_charset_t *mbcset, Idx *equiv_class_alloc,
3337#endif
3338 const unsigned char *name)
3339{
3340#if defined _LIBC
3341 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3342 if (nrules != 0)
3343 {
3344 const int32_t *table, *indirect;
3345 const unsigned char *weights, *extra, *cp;
3346 unsigned char char_buf[2];
3347 int32_t idx1, idx2;
3348 unsigned int ch;
3349 size_t len;
3350 /* This #include defines a local function! */
3351# include <locale/weight.h>
3352 /* Calculate the index for equivalence class. */
3353 cp = name;
3354 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3355 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3356 _NL_COLLATE_WEIGHTMB);
3357 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3358 _NL_COLLATE_EXTRAMB);
3359 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3360 _NL_COLLATE_INDIRECTMB);
3361 idx1 = findidx (&cp);
3362 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3363 /* This isn't a valid character. */
3364 return REG_ECOLLATE;
3365
3366 /* Build single byte matcing table for this equivalence class. */
3367 char_buf[1] = (unsigned char) '\0';
3368 len = weights[idx1];
3369 for (ch = 0; ch < SBC_MAX; ++ch)
3370 {
3371 char_buf[0] = ch;
3372 cp = char_buf;
3373 idx2 = findidx (&cp);
3374/*
3375 idx2 = table[ch];
3376*/
3377 if (idx2 == 0)
3378 /* This isn't a valid character. */
3379 continue;
3380 if (len == weights[idx2])
3381 {
3382 int cnt = 0;
3383 while (cnt <= len &&
3384 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
3385 ++cnt;
3386
3387 if (cnt > len)
3388 bitset_set (sbcset, ch);
3389 }
3390 }
3391 /* Check whether the array has enough space. */
3392 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3393 {
3394 /* Not enough, realloc it. */
3395 Idx new_equiv_class_alloc = mbcset->nequiv_classes;
3396 /* Use realloc since the array is NULL if *alloc == 0. */
3397 int32_t *new_equiv_classes = re_x2realloc (mbcset->equiv_classes,
3398 int32_t,
3399 &new_equiv_class_alloc);
3400 if (BE (new_equiv_classes == NULL, 0))
3401 return REG_ESPACE;
3402 mbcset->equiv_classes = new_equiv_classes;
3403 *equiv_class_alloc = new_equiv_class_alloc;
3404 }
3405 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3406 }
3407 else
3408#endif /* _LIBC */
3409 {
3410 if (BE (strlen ((const char *) name) != 1, 0))
3411 return REG_ECOLLATE;
3412 bitset_set (sbcset, *name);
3413 }
3414 return REG_NOERROR;
3415}
3416
3417 /* Helper function for parse_bracket_exp.
3418 Build the character class which is represented by NAME.
3419 The result are written to MBCSET and SBCSET.
3420 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3421 is a pointer argument sinse we may update it. */
3422
3423static reg_errcode_t
3424build_charclass (unsigned REG_TRANSLATE_TYPE trans, bitset sbcset,
3425#ifdef RE_ENABLE_I18N
3426 re_charset_t *mbcset, Idx *char_class_alloc,
3427#endif
3428 const unsigned char *class_name, reg_syntax_t syntax)
3429{
3430 int i;
3431 const char *name = (const char *) class_name;
3432
3433 /* In case of REG_ICASE "upper" and "lower" match the both of
3434 upper and lower cases. */
3435 if ((syntax & REG_IGNORE_CASE)
3436 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
3437 name = "alpha";
3438
3439#ifdef RE_ENABLE_I18N
3440 /* Check the space of the arrays. */
3441 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3442 {
3443 /* Not enough, realloc it. */
3444 Idx new_char_class_alloc = mbcset->nchar_classes;
3445 /* Use realloc since array is NULL if *alloc == 0. */
3446 wctype_t *new_char_classes = re_x2realloc (mbcset->char_classes, wctype_t,
3447 &new_char_class_alloc);
3448 if (BE (new_char_classes == NULL, 0))
3449 return REG_ESPACE;
3450 mbcset->char_classes = new_char_classes;
3451 *char_class_alloc = new_char_class_alloc;
3452 }
3453 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3454#endif /* RE_ENABLE_I18N */
3455
3456#define BUILD_CHARCLASS_LOOP(ctype_func) \
3457 for (i = 0; i < SBC_MAX; ++i) \
3458 { \
3459 if (ctype_func (i)) \
3460 { \
3461 int ch = trans ? trans[i] : i; \
3462 bitset_set (sbcset, ch); \
3463 } \
3464 }
3465
3466 if (strcmp (name, "alnum") == 0)
3467 BUILD_CHARCLASS_LOOP (isalnum)
3468 else if (strcmp (name, "cntrl") == 0)
3469 BUILD_CHARCLASS_LOOP (iscntrl)
3470 else if (strcmp (name, "lower") == 0)
3471 BUILD_CHARCLASS_LOOP (islower)
3472 else if (strcmp (name, "space") == 0)
3473 BUILD_CHARCLASS_LOOP (isspace)
3474 else if (strcmp (name, "alpha") == 0)
3475 BUILD_CHARCLASS_LOOP (isalpha)
3476 else if (strcmp (name, "digit") == 0)
3477 BUILD_CHARCLASS_LOOP (isdigit)
3478 else if (strcmp (name, "print") == 0)
3479 BUILD_CHARCLASS_LOOP (isprint)
3480 else if (strcmp (name, "upper") == 0)
3481 BUILD_CHARCLASS_LOOP (isupper)
3482 else if (strcmp (name, "blank") == 0)
3483 BUILD_CHARCLASS_LOOP (isblank)
3484 else if (strcmp (name, "graph") == 0)
3485 BUILD_CHARCLASS_LOOP (isgraph)
3486 else if (strcmp (name, "punct") == 0)
3487 BUILD_CHARCLASS_LOOP (ispunct)
3488 else if (strcmp (name, "xdigit") == 0)
3489 BUILD_CHARCLASS_LOOP (isxdigit)
3490 else
3491 return REG_ECTYPE;
3492
3493 return REG_NOERROR;
3494}
3495
3496static bin_tree_t *
3497build_charclass_op (re_dfa_t *dfa, unsigned REG_TRANSLATE_TYPE trans,
3498 const unsigned char *class_name,
3499 const unsigned char *extra,
3500 bool non_match, reg_errcode_t *err)
3501{
3502 re_bitset_ptr_t sbcset;
3503#ifdef RE_ENABLE_I18N
3504 re_charset_t *mbcset;
3505 Idx alloc = 0;
3506#endif /* not RE_ENABLE_I18N */
3507 reg_errcode_t ret;
3508 re_token_t br_token;
3509 bin_tree_t *tree;
3510
3511 sbcset = re_calloc (bitset_word, BITSET_WORDS);
3512#ifdef RE_ENABLE_I18N
3513 mbcset = re_calloc (re_charset_t, 1);
3514#endif /* RE_ENABLE_I18N */
3515
3516#ifdef RE_ENABLE_I18N
3517 if (BE (sbcset == NULL || mbcset == NULL, 0))
3518#else /* not RE_ENABLE_I18N */
3519 if (BE (sbcset == NULL, 0))
3520#endif /* not RE_ENABLE_I18N */
3521 {
3522 *err = REG_ESPACE;
3523 return NULL;
3524 }
3525
3526 if (non_match)
3527 {
3528#ifdef RE_ENABLE_I18N
3529 /*
3530 if (syntax & REG_HAT_LISTS_NOT_NEWLINE)
3531 bitset_set(cset->sbcset, '\0');
3532 */
3533 mbcset->non_match = 1;
3534#endif /* not RE_ENABLE_I18N */
3535 }
3536
3537 /* We don't care the syntax in this case. */
3538 ret = build_charclass (trans, sbcset,
3539#ifdef RE_ENABLE_I18N
3540 mbcset, &alloc,
3541#endif /* RE_ENABLE_I18N */
3542 class_name, 0);
3543
3544 if (BE (ret != REG_NOERROR, 0))
3545 {
3546 re_free (sbcset);
3547#ifdef RE_ENABLE_I18N
3548 free_charset (mbcset);
3549#endif /* RE_ENABLE_I18N */
3550 *err = ret;
3551 return NULL;
3552 }
3553 /* \w match '_' also. */
3554 for (; *extra; extra++)
3555 bitset_set (sbcset, *extra);
3556
3557 /* If it is non-matching list. */
3558 if (non_match)
3559 bitset_not (sbcset);
3560
3561#ifdef RE_ENABLE_I18N
3562 /* Ensure only single byte characters are set. */
3563 if (dfa->mb_cur_max > 1)
3564 bitset_mask (sbcset, dfa->sb_char);
3565#endif
3566
3567 /* Build a tree for simple bracket. */
3568 br_token.type = SIMPLE_BRACKET;
3569 br_token.opr.sbcset = sbcset;
3570 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3571 if (BE (tree == NULL, 0))
3572 goto build_word_op_espace;
3573
3574#ifdef RE_ENABLE_I18N
3575 if (dfa->mb_cur_max > 1)
3576 {
3577 bin_tree_t *mbc_tree;
3578 /* Build a tree for complex bracket. */
3579 br_token.type = COMPLEX_BRACKET;
3580 br_token.opr.mbcset = mbcset;
3581 dfa->has_mb_node = 1;
3582 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3583 if (BE (mbc_tree == NULL, 0))
3584 goto build_word_op_espace;
3585 /* Then join them by ALT node. */
3586 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3587 if (BE (mbc_tree != NULL, 1))
3588 return tree;
3589 }
3590 else
3591 {
3592 free_charset (mbcset);
3593 return tree;
3594 }
3595#else /* not RE_ENABLE_I18N */
3596 return tree;
3597#endif /* not RE_ENABLE_I18N */
3598
3599 build_word_op_espace:
3600 re_free (sbcset);
3601#ifdef RE_ENABLE_I18N
3602 free_charset (mbcset);
3603#endif /* RE_ENABLE_I18N */
3604 *err = REG_ESPACE;
3605 return NULL;
3606}
3607
3608/* This is intended for the expressions like "a{1,3}".
3609 Fetch a number from `input', and return the number.
3610 Return REG_MISSING if the number field is empty like "{,1}".
3611 Return REG_ERROR if an error occurred. */
3612
3613static Idx
3614fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3615{
3616 Idx num = REG_MISSING;
3617 unsigned char c;
3618 while (1)
3619 {
3620 fetch_token (token, input, syntax);
3621 c = token->opr.c;
3622 if (BE (token->type == END_OF_RE, 0))
3623 return REG_ERROR;
3624 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3625 break;
3626 num = ((token->type != CHARACTER || c < '0' || '9' < c
3627 || num == REG_ERROR)
3628 ? REG_ERROR
3629 : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0'));
3630 num = (num > REG_DUP_MAX) ? REG_ERROR : num;
3631 }
3632 return num;
3633}
3634
3635#ifdef RE_ENABLE_I18N
3636static void
3637free_charset (re_charset_t *cset)
3638{
3639 re_free (cset->mbchars);
3640# ifdef _LIBC
3641 re_free (cset->coll_syms);
3642 re_free (cset->equiv_classes);
3643 re_free (cset->range_starts);
3644 re_free (cset->range_ends);
3645# endif
3646 re_free (cset->char_classes);
3647 re_free (cset);
3648}
3649#endif /* RE_ENABLE_I18N */
3650
3651/* Functions for binary tree operation. */
3652
3653/* Create a tree node. */
3654
3655static bin_tree_t *
3656create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3657 re_token_type_t type)
3658{
3659 re_token_t t;
3660 t.type = type;
3661 return create_token_tree (dfa, left, right, &t);
3662}
3663
3664static bin_tree_t *
3665create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3666 const re_token_t *token)
3667{
3668 bin_tree_t *tree;
3669 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3670 {
3671 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3672
3673 if (storage == NULL)
3674 return NULL;
3675 storage->next = dfa->str_tree_storage;
3676 dfa->str_tree_storage = storage;
3677 dfa->str_tree_storage_idx = 0;
3678 }
3679 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3680
3681 tree->parent = NULL;
3682 tree->left = left;
3683 tree->right = right;
3684 tree->token = *token;
3685 tree->token.duplicated = 0;
3686 tree->token.opt_subexp = 0;
3687 tree->first = NULL;
3688 tree->next = NULL;
3689 tree->node_idx = REG_MISSING;
3690
3691 if (left != NULL)
3692 left->parent = tree;
3693 if (right != NULL)
3694 right->parent = tree;
3695 return tree;
3696}
3697
3698/* Mark the tree SRC as an optional subexpression.
3699 To be called from preorder or postorder. */
3700
3701static reg_errcode_t
3702mark_opt_subexp (void *extra, bin_tree_t *node)
3703{
3704 Idx idx = (Idx) (long) extra;
3705 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3706 node->token.opt_subexp = 1;
3707
3708 return REG_NOERROR;
3709}
3710
3711/* Free the allocated memory inside NODE. */
3712
3713static void
3714free_token (re_token_t *node)
3715{
3716#ifdef RE_ENABLE_I18N
3717 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3718 free_charset (node->opr.mbcset);
3719 else
3720#endif /* RE_ENABLE_I18N */
3721 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3722 re_free (node->opr.sbcset);
3723}
3724
3725/* Worker function for tree walking. Free the allocated memory inside NODE
3726 and its children. */
3727
3728static reg_errcode_t
3729free_tree (void *extra, bin_tree_t *node)
3730{
3731 free_token (&node->token);
3732 return REG_NOERROR;
3733}
3734
3735
3736/* Duplicate the node SRC, and return new node. This is a preorder
3737 visit similar to the one implemented by the generic visitor, but
3738 we need more infrastructure to maintain two parallel trees --- so,
3739 it's easier to duplicate. */
3740
3741static bin_tree_t *
3742duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3743{
3744 const bin_tree_t *node;
3745 bin_tree_t *dup_root;
3746 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
3747
3748 for (node = root; ; )
3749 {
3750 /* Create a new tree and link it back to the current parent. */
3751 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3752 if (*p_new == NULL)
3753 return NULL;
3754 (*p_new)->parent = dup_node;
3755 (*p_new)->token.duplicated = 1;
3756 dup_node = *p_new;
3757
3758 /* Go to the left node, or up and to the right. */
3759 if (node->left)
3760 {
3761 node = node->left;
3762 p_new = &dup_node->left;
3763 }
3764 else
3765 {
3766 const bin_tree_t *prev = NULL;
3767 while (node->right == prev || node->right == NULL)
3768 {
3769 prev = node;
3770 node = node->parent;
3771 dup_node = dup_node->parent;
3772 if (!node)
3773 return dup_root;
3774 }
3775 node = node->right;
3776 p_new = &dup_node->right;
3777 }
3778 }
3779}
diff --git a/lib/regex.c b/lib/regex.c
deleted file mode 100644
index 82e76c09..00000000
--- a/lib/regex.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#ifdef _LIBC
25/* We have to keep the namespace clean. */
26# define regfree(preg) __regfree (preg)
27# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
28# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
29# define regerror(errcode, preg, errbuf, errbuf_size) \
30 __regerror(errcode, preg, errbuf, errbuf_size)
31# define re_set_registers(bu, re, nu, st, en) \
32 __re_set_registers (bu, re, nu, st, en)
33# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
34 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
35# define re_match(bufp, string, size, pos, regs) \
36 __re_match (bufp, string, size, pos, regs)
37# define re_search(bufp, string, size, startpos, range, regs) \
38 __re_search (bufp, string, size, startpos, range, regs)
39# define re_compile_pattern(pattern, length, bufp) \
40 __re_compile_pattern (pattern, length, bufp)
41# define re_set_syntax(syntax) __re_set_syntax (syntax)
42# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
43 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
44# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
45
46# include "../locale/localeinfo.h"
47#endif
48
49/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
50 GNU regex allows. Include it before <regex.h>, which correctly
51 #undefs RE_DUP_MAX and sets it to the right value. */
52#include <limits.h>
53
54#include <regex.h>
55#include "regex_internal.h"
56
57#include "regex_internal.c"
58#include "regcomp.c"
59#include "regexec.c"
60
61/* Binary backward compatibility. */
62#if _LIBC
63# include <shlib-compat.h>
64# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
65link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
66int re_max_failures = 2000;
67# endif
68#endif
diff --git a/lib/regex.h b/lib/regex.h
deleted file mode 100644
index c06a062c..00000000
--- a/lib/regex.h
+++ /dev/null
@@ -1,769 +0,0 @@
1/* Definitions for data structures and routines for the regular
2 expression library.
3 Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005
4 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation,
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20
21#ifndef _REGEX_H
22#define _REGEX_H 1
23
24#include <sys/types.h>
25
26/* Allow the use in C++ code. */
27#ifdef __cplusplus
28extern "C" {
29#endif
30
31/* Define _REGEX_SOURCE to get definitions that are incompatible with
32 POSIX. */
33#if (!defined _REGEX_SOURCE \
34 && (defined _GNU_SOURCE \
35 || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \
36 && !defined _XOPEN_SOURCE)))
37# define _REGEX_SOURCE 1
38#endif
39
40#if defined _REGEX_SOURCE && defined VMS
41/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
42 should be there. */
43# include <stddef.h>
44#endif
45
46#ifdef _REGEX_LARGE_OFFSETS
47
48/* Use types and values that are wide enough to represent signed and
49 unsigned byte offsets in memory. This currently works only when
50 the regex code is used outside of the GNU C library; it is not yet
51 supported within glibc itself, and glibc users should not define
52 _REGEX_LARGE_OFFSETS. */
53
54/* The type of the offset of a byte within a string.
55 For historical reasons POSIX 1003.1-2004 requires that regoff_t be
56 at least as wide as off_t. This is a bit odd (and many common
57 POSIX platforms set it to the more-sensible ssize_t) but we might
58 as well conform. We don't know of any hosts where ssize_t is wider
59 than off_t, so off_t is safe. */
60typedef off_t regoff_t;
61
62/* The type of nonnegative object indexes. Traditionally, GNU regex
63 uses 'int' for these. Code that uses __re_idx_t should work
64 regardless of whether the type is signed. */
65typedef size_t __re_idx_t;
66
67/* The type of object sizes. */
68typedef size_t __re_size_t;
69
70/* The type of object sizes, in places where the traditional code
71 uses unsigned long int. */
72typedef size_t __re_long_size_t;
73
74#else
75
76/* Use types that are binary-compatible with the traditional GNU regex
77 implementation, which mishandles strings longer than INT_MAX. */
78
79typedef int regoff_t;
80typedef int __re_idx_t;
81typedef unsigned int __re_size_t;
82typedef unsigned long int __re_long_size_t;
83
84#endif
85
86/* The following two types have to be signed and unsigned integer type
87 wide enough to hold a value of a pointer. For most ANSI compilers
88 ptrdiff_t and size_t should be likely OK. Still size of these two
89 types is 2 for Microsoft C. Ugh... */
90typedef long int s_reg_t;
91typedef unsigned long int active_reg_t;
92
93/* The following bits are used to determine the regexp syntax we
94 recognize. The set/not-set meanings are chosen so that Emacs syntax
95 remains the value 0. The bits are given in alphabetical order, and
96 the definitions shifted by one from the previous bit; thus, when we
97 add or remove a bit, only one other definition need change. */
98typedef unsigned long int reg_syntax_t;
99
100/* If this bit is not set, then \ inside a bracket expression is literal.
101 If set, then such a \ quotes the following character. */
102#define REG_BACKSLASH_ESCAPE_IN_LISTS 1ul
103
104/* If this bit is not set, then + and ? are operators, and \+ and \? are
105 literals.
106 If set, then \+ and \? are operators and + and ? are literals. */
107#define REG_BK_PLUS_QM (1ul << 1)
108
109/* If this bit is set, then character classes are supported. They are:
110 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
111 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
112 If not set, then character classes are not supported. */
113#define REG_CHAR_CLASSES (1ul << 2)
114
115/* If this bit is set, then ^ and $ are always anchors (outside bracket
116 expressions, of course).
117 If this bit is not set, then it depends:
118 ^ is an anchor if it is at the beginning of a regular
119 expression or after an open-group or an alternation operator;
120 $ is an anchor if it is at the end of a regular expression, or
121 before a close-group or an alternation operator.
122
123 This bit could be (re)combined with REG_CONTEXT_INDEP_OPS, because
124 POSIX draft 11.2 says that * etc. in leading positions is undefined.
125 We already implemented a previous draft which made those constructs
126 invalid, though, so we haven't changed the code back. */
127#define REG_CONTEXT_INDEP_ANCHORS (1ul << 3)
128
129/* If this bit is set, then special characters are always special
130 regardless of where they are in the pattern.
131 If this bit is not set, then special characters are special only in
132 some contexts; otherwise they are ordinary. Specifically,
133 * + ? and intervals are only special when not after the beginning,
134 open-group, or alternation operator. */
135#define REG_CONTEXT_INDEP_OPS (1ul << 4)
136
137/* If this bit is set, then *, +, ?, and { cannot be first in an re or
138 immediately after an alternation or begin-group operator. */
139#define REG_CONTEXT_INVALID_OPS (1ul << 5)
140
141/* If this bit is set, then . matches newline.
142 If not set, then it doesn't. */
143#define REG_DOT_NEWLINE (1ul << 6)
144
145/* If this bit is set, then . doesn't match NUL.
146 If not set, then it does. */
147#define REG_DOT_NOT_NULL (1ul << 7)
148
149/* If this bit is set, nonmatching lists [^...] do not match newline.
150 If not set, they do. */
151#define REG_HAT_LISTS_NOT_NEWLINE (1ul << 8)
152
153/* If this bit is set, either \{...\} or {...} defines an
154 interval, depending on REG_NO_BK_BRACES.
155 If not set, \{, \}, {, and } are literals. */
156#define REG_INTERVALS (1ul << 9)
157
158/* If this bit is set, +, ? and | aren't recognized as operators.
159 If not set, they are. */
160#define REG_LIMITED_OPS (1ul << 10)
161
162/* If this bit is set, newline is an alternation operator.
163 If not set, newline is literal. */
164#define REG_NEWLINE_ALT (1ul << 11)
165
166/* If this bit is set, then `{...}' defines an interval, and \{ and \}
167 are literals.
168 If not set, then `\{...\}' defines an interval. */
169#define REG_NO_BK_BRACES (1ul << 12)
170
171/* If this bit is set, (...) defines a group, and \( and \) are literals.
172 If not set, \(...\) defines a group, and ( and ) are literals. */
173#define REG_NO_BK_PARENS (1ul << 13)
174
175/* If this bit is set, then \<digit> matches <digit>.
176 If not set, then \<digit> is a back-reference. */
177#define REG_NO_BK_REFS (1ul << 14)
178
179/* If this bit is set, then | is an alternation operator, and \| is literal.
180 If not set, then \| is an alternation operator, and | is literal. */
181#define REG_NO_BK_VBAR (1ul << 15)
182
183/* If this bit is set, then an ending range point collating higher
184 than the starting range point, as in [z-a], is invalid.
185 If not set, the containing range is empty and does not match any string. */
186#define REG_NO_EMPTY_RANGES (1ul << 16)
187
188/* If this bit is set, then an unmatched ) is ordinary.
189 If not set, then an unmatched ) is invalid. */
190#define REG_UNMATCHED_RIGHT_PAREN_ORD (1ul << 17)
191
192/* If this bit is set, succeed as soon as we match the whole pattern,
193 without further backtracking. */
194#define REG_NO_POSIX_BACKTRACKING (1ul << 18)
195
196/* If this bit is set, do not process the GNU regex operators.
197 If not set, then the GNU regex operators are recognized. */
198#define REG_NO_GNU_OPS (1ul << 19)
199
200/* If this bit is set, turn on internal regex debugging.
201 If not set, and debugging was on, turn it off.
202 This only works if regex.c is compiled -DDEBUG.
203 We define this bit always, so that all that's needed to turn on
204 debugging is to recompile regex.c; the calling code can always have
205 this bit set, and it won't affect anything in the normal case. */
206#define REG_DEBUG (1ul << 20)
207
208/* If this bit is set, a syntactically invalid interval is treated as
209 a string of ordinary characters. For example, the ERE 'a{1' is
210 treated as 'a\{1'. */
211#define REG_INVALID_INTERVAL_ORD (1ul << 21)
212
213/* If this bit is set, then ignore case when matching.
214 If not set, then case is significant. */
215#define REG_IGNORE_CASE (1ul << 22)
216
217/* This bit is used internally like REG_CONTEXT_INDEP_ANCHORS but only
218 for ^, because it is difficult to scan the regex backwards to find
219 whether ^ should be special. */
220#define REG_CARET_ANCHORS_HERE (1ul << 23)
221
222/* If this bit is set, then \{ cannot be first in an bre or
223 immediately after an alternation or begin-group operator. */
224#define REG_CONTEXT_INVALID_DUP (1ul << 24)
225
226/* If this bit is set, then no_sub will be set to 1 during
227 re_compile_pattern. */
228#define REG_NO_SUB (1ul << 25)
229
230/* This global variable defines the particular regexp syntax to use (for
231 some interfaces). When a regexp is compiled, the syntax used is
232 stored in the pattern buffer, so changing this does not affect
233 already-compiled regexps. */
234extern reg_syntax_t re_syntax_options;
235
236/* Define combinations of the above bits for the standard possibilities.
237 (The [[[ comments delimit what gets put into the Texinfo file, so
238 don't delete them!) */
239/* [[[begin syntaxes]]] */
240#define REG_SYNTAX_EMACS 0
241
242#define REG_SYNTAX_AWK \
243 (REG_BACKSLASH_ESCAPE_IN_LISTS | REG_DOT_NOT_NULL \
244 | REG_NO_BK_PARENS | REG_NO_BK_REFS \
245 | REG_NO_BK_VBAR | REG_NO_EMPTY_RANGES \
246 | REG_DOT_NEWLINE | REG_CONTEXT_INDEP_ANCHORS \
247 | REG_UNMATCHED_RIGHT_PAREN_ORD | REG_NO_GNU_OPS)
248
249#define REG_SYNTAX_GNU_AWK \
250 ((REG_SYNTAX_POSIX_EXTENDED | REG_BACKSLASH_ESCAPE_IN_LISTS \
251 | REG_DEBUG) \
252 & ~(REG_DOT_NOT_NULL | REG_INTERVALS | REG_CONTEXT_INDEP_OPS \
253 | REG_CONTEXT_INVALID_OPS ))
254
255#define REG_SYNTAX_POSIX_AWK \
256 (REG_SYNTAX_POSIX_EXTENDED | REG_BACKSLASH_ESCAPE_IN_LISTS \
257 | REG_INTERVALS | REG_NO_GNU_OPS)
258
259#define REG_SYNTAX_GREP \
260 (REG_BK_PLUS_QM | REG_CHAR_CLASSES \
261 | REG_HAT_LISTS_NOT_NEWLINE | REG_INTERVALS \
262 | REG_NEWLINE_ALT)
263
264#define REG_SYNTAX_EGREP \
265 (REG_CHAR_CLASSES | REG_CONTEXT_INDEP_ANCHORS \
266 | REG_CONTEXT_INDEP_OPS | REG_HAT_LISTS_NOT_NEWLINE \
267 | REG_NEWLINE_ALT | REG_NO_BK_PARENS \
268 | REG_NO_BK_VBAR)
269
270#define REG_SYNTAX_POSIX_EGREP \
271 (REG_SYNTAX_EGREP | REG_INTERVALS | REG_NO_BK_BRACES \
272 | REG_INVALID_INTERVAL_ORD)
273
274/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
275#define REG_SYNTAX_ED REG_SYNTAX_POSIX_BASIC
276
277#define REG_SYNTAX_SED REG_SYNTAX_POSIX_BASIC
278
279/* Syntax bits common to both basic and extended POSIX regex syntax. */
280#define _REG_SYNTAX_POSIX_COMMON \
281 (REG_CHAR_CLASSES | REG_DOT_NEWLINE | REG_DOT_NOT_NULL \
282 | REG_INTERVALS | REG_NO_EMPTY_RANGES)
283
284#define REG_SYNTAX_POSIX_BASIC \
285 (_REG_SYNTAX_POSIX_COMMON | REG_BK_PLUS_QM | REG_CONTEXT_INVALID_DUP)
286
287/* Differs from ..._POSIX_BASIC only in that REG_BK_PLUS_QM becomes
288 REG_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
289 isn't minimal, since other operators, such as \`, aren't disabled. */
290#define REG_SYNTAX_POSIX_MINIMAL_BASIC \
291 (_REG_SYNTAX_POSIX_COMMON | REG_LIMITED_OPS)
292
293#define REG_SYNTAX_POSIX_EXTENDED \
294 (_REG_SYNTAX_POSIX_COMMON | REG_CONTEXT_INDEP_ANCHORS \
295 | REG_CONTEXT_INDEP_OPS | REG_NO_BK_BRACES \
296 | REG_NO_BK_PARENS | REG_NO_BK_VBAR \
297 | REG_CONTEXT_INVALID_OPS | REG_UNMATCHED_RIGHT_PAREN_ORD)
298
299/* Differs from ..._POSIX_EXTENDED in that REG_CONTEXT_INDEP_OPS is
300 removed and REG_NO_BK_REFS is added. */
301#define REG_SYNTAX_POSIX_MINIMAL_EXTENDED \
302 (_REG_SYNTAX_POSIX_COMMON | REG_CONTEXT_INDEP_ANCHORS \
303 | REG_CONTEXT_INVALID_OPS | REG_NO_BK_BRACES \
304 | REG_NO_BK_PARENS | REG_NO_BK_REFS \
305 | REG_NO_BK_VBAR | REG_UNMATCHED_RIGHT_PAREN_ORD)
306/* [[[end syntaxes]]] */
307
308/* Maximum number of duplicates an interval can allow. This is
309 distinct from RE_DUP_MAX, to conform to POSIX name space rules and
310 to avoid collisions with <limits.h>. */
311#define REG_DUP_MAX 32767
312
313
314/* POSIX `cflags' bits (i.e., information for `regcomp'). */
315
316/* If this bit is set, then use extended regular expression syntax.
317 If not set, then use basic regular expression syntax. */
318#define REG_EXTENDED 1
319
320/* If this bit is set, then ignore case when matching.
321 If not set, then case is significant. */
322#define REG_ICASE (1 << 1)
323
324/* If this bit is set, then anchors do not match at newline
325 characters in the string.
326 If not set, then anchors do match at newlines. */
327#define REG_NEWLINE (1 << 2)
328
329/* If this bit is set, then report only success or fail in regexec.
330 If not set, then returns differ between not matching and errors. */
331#define REG_NOSUB (1 << 3)
332
333
334/* POSIX `eflags' bits (i.e., information for regexec). */
335
336/* If this bit is set, then the beginning-of-line operator doesn't match
337 the beginning of the string (presumably because it's not the
338 beginning of a line).
339 If not set, then the beginning-of-line operator does match the
340 beginning of the string. */
341#define REG_NOTBOL 1
342
343/* Like REG_NOTBOL, except for the end-of-line. */
344#define REG_NOTEOL (1 << 1)
345
346/* Use PMATCH[0] to delimit the start and end of the search in the
347 buffer. */
348#define REG_STARTEND (1 << 2)
349
350
351/* If any error codes are removed, changed, or added, update the
352 `__re_error_msgid' table in regcomp.c. */
353
354typedef enum
355{
356 _REG_ENOSYS = -1, /* This will never happen for this implementation. */
357#define REG_ENOSYS _REG_ENOSYS
358
359 _REG_NOERROR, /* Success. */
360#define REG_NOERROR _REG_NOERROR
361
362 _REG_NOMATCH, /* Didn't find a match (for regexec). */
363#define REG_NOMATCH _REG_NOMATCH
364
365 /* POSIX regcomp return error codes. (In the order listed in the
366 standard.) */
367
368 _REG_BADPAT, /* Invalid pattern. */
369#define REG_BADPAT _REG_BADPAT
370
371 _REG_ECOLLATE, /* Inalid collating element. */
372#define REG_ECOLLATE _REG_ECOLLATE
373
374 _REG_ECTYPE, /* Invalid character class name. */
375#define REG_ECTYPE _REG_ECTYPE
376
377 _REG_EESCAPE, /* Trailing backslash. */
378#define REG_EESCAPE _REG_EESCAPE
379
380 _REG_ESUBREG, /* Invalid back reference. */
381#define REG_ESUBREG _REG_ESUBREG
382
383 _REG_EBRACK, /* Unmatched left bracket. */
384#define REG_EBRACK _REG_EBRACK
385
386 _REG_EPAREN, /* Parenthesis imbalance. */
387#define REG_EPAREN _REG_EPAREN
388
389 _REG_EBRACE, /* Unmatched \{. */
390#define REG_EBRACE _REG_EBRACE
391
392 _REG_BADBR, /* Invalid contents of \{\}. */
393#define REG_BADBR _REG_BADBR
394
395 _REG_ERANGE, /* Invalid range end. */
396#define REG_ERANGE _REG_ERANGE
397
398 _REG_ESPACE, /* Ran out of memory. */
399#define REG_ESPACE _REG_ESPACE
400
401 _REG_BADRPT, /* No preceding re for repetition op. */
402#define REG_BADRPT _REG_BADRPT
403
404 /* Error codes we've added. */
405
406 _REG_EEND, /* Premature end. */
407#define REG_EEND _REG_EEND
408
409 _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
410#define REG_ESIZE _REG_ESIZE
411
412 _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
413#define REG_ERPAREN _REG_ERPAREN
414
415} reg_errcode_t;
416
417/* In the traditional GNU implementation, regex.h defined member names
418 like `buffer' that POSIX does not allow. These members now have
419 names with leading `re_' (e.g., `re_buffer'). Support the old
420 names only if _REGEX_SOURCE is defined. New programs should use
421 the new names. */
422#ifdef _REGEX_SOURCE
423# define _REG_RE_NAME(id) id
424# define _REG_RM_NAME(id) id
425#else
426# define _REG_RE_NAME(id) re_##id
427# define _REG_RM_NAME(id) rm_##id
428#endif
429
430/* The user can specify the type of the re_translate member by
431 defining the macro REG_TRANSLATE_TYPE. In the traditional GNU
432 implementation, this macro was named RE_TRANSLATE_TYPE, but POSIX
433 does not allow this. Support the old name only if _REGEX_SOURCE
434 and if the new name is not defined. New programs should use the new
435 name. */
436#ifndef REG_TRANSLATE_TYPE
437# if defined _REGEX_SOURCE && defined RE_TRANSLATE_TYPE
438# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
439# else
440# define REG_TRANSLATE_TYPE char *
441# endif
442#endif
443
444/* This data structure represents a compiled pattern. Before calling
445 the pattern compiler), the fields `re_buffer', `re_allocated', `re_fastmap',
446 `re_translate', and `re_no_sub' can be set. After the pattern has been
447 compiled, the `re_nsub' field is available. All other fields are
448 private to the regex routines. */
449
450struct re_pattern_buffer
451{
452/* [[[begin pattern_buffer]]] */
453 /* Space that holds the compiled pattern. It is declared as
454 `unsigned char *' because its elements are
455 sometimes used as array indexes. */
456 unsigned char *_REG_RE_NAME (buffer);
457
458 /* Number of bytes to which `re_buffer' points. */
459 __re_long_size_t _REG_RE_NAME (allocated);
460
461 /* Number of bytes actually used in `re_buffer'. */
462 __re_long_size_t _REG_RE_NAME (used);
463
464 /* Syntax setting with which the pattern was compiled. */
465 reg_syntax_t _REG_RE_NAME (syntax);
466
467 /* Pointer to a fastmap, if any, otherwise zero. re_search uses
468 the fastmap, if there is one, to skip over impossible
469 starting points for matches. */
470 char *_REG_RE_NAME (fastmap);
471
472 /* Either a translate table to apply to all characters before
473 comparing them, or zero for no translation. The translation
474 is applied to a pattern when it is compiled and to a string
475 when it is matched. */
476 REG_TRANSLATE_TYPE _REG_RE_NAME (translate);
477
478 /* Number of subexpressions found by the compiler. */
479 size_t re_nsub;
480
481 /* Zero if this pattern cannot match the empty string, one else.
482 Well, in truth it's used only in `re_search_2', to see
483 whether or not we should use the fastmap, so we don't set
484 this absolutely perfectly; see `re_compile_fastmap' (the
485 `duplicate' case). */
486 unsigned int _REG_RE_NAME (can_be_null) : 1;
487
488 /* If REG_UNALLOCATED, allocate space in the `regs' structure
489 for `max (REG_NREGS, re_nsub + 1)' groups.
490 If REG_REALLOCATE, reallocate space if necessary.
491 If REG_FIXED, use what's there. */
492#define REG_UNALLOCATED 0
493#define REG_REALLOCATE 1
494#define REG_FIXED 2
495 unsigned int _REG_RE_NAME (regs_allocated) : 2;
496
497 /* Set to zero when `regex_compile' compiles a pattern; set to one
498 by `re_compile_fastmap' if it updates the fastmap. */
499 unsigned int _REG_RE_NAME (fastmap_accurate) : 1;
500
501 /* If set, `re_match_2' does not return information about
502 subexpressions. */
503 unsigned int _REG_RE_NAME (no_sub) : 1;
504
505 /* If set, a beginning-of-line anchor doesn't match at the
506 beginning of the string. */
507 unsigned int _REG_RE_NAME (not_bol) : 1;
508
509 /* Similarly for an end-of-line anchor. */
510 unsigned int _REG_RE_NAME (not_eol) : 1;
511
512 /* If true, an anchor at a newline matches. */
513 unsigned int _REG_RE_NAME (newline_anchor) : 1;
514
515/* [[[end pattern_buffer]]] */
516};
517
518typedef struct re_pattern_buffer regex_t;
519
520/* This is the structure we store register match data in. See
521 regex.texinfo for a full description of what registers match. */
522struct re_registers
523{
524 __re_size_t _REG_RM_NAME (num_regs);
525 regoff_t *_REG_RM_NAME (start);
526 regoff_t *_REG_RM_NAME (end);
527};
528
529
530/* If `regs_allocated' is REG_UNALLOCATED in the pattern buffer,
531 `re_match_2' returns information about at least this many registers
532 the first time a `regs' structure is passed. */
533#ifndef REG_NREGS
534# define REG_NREGS 30
535#endif
536
537
538/* POSIX specification for registers. Aside from the different names than
539 `re_registers', POSIX uses an array of structures, instead of a
540 structure of arrays. */
541typedef struct
542{
543 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
544 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
545} regmatch_t;
546
547/* Declarations for routines. */
548
549/* Sets the current default syntax to SYNTAX, and return the old syntax.
550 You can also simply assign to the `re_syntax_options' variable. */
551extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
552
553/* Compile the regular expression PATTERN, with length LENGTH
554 and syntax given by the global `re_syntax_options', into the buffer
555 BUFFER. Return NULL if successful, and an error string if not. */
556extern const char *re_compile_pattern (const char *__pattern, size_t __length,
557 struct re_pattern_buffer *__buffer);
558
559
560/* Compile a fastmap for the compiled pattern in BUFFER; used to
561 accelerate searches. Return 0 if successful and -2 if was an
562 internal error. */
563extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
564
565
566/* Search in the string STRING (with length LENGTH) for the pattern
567 compiled into BUFFER. Start searching at position START, for RANGE
568 characters. Return the starting position of the match, -1 for no
569 match, or -2 for an internal error. Also return register
570 information in REGS (if REGS and BUFFER->re_no_sub are nonzero). */
571extern regoff_t re_search (struct re_pattern_buffer *__buffer,
572 const char *__string, __re_idx_t __length,
573 __re_idx_t __start, regoff_t __range,
574 struct re_registers *__regs);
575
576
577/* Like `re_search', but search in the concatenation of STRING1 and
578 STRING2. Also, stop searching at index START + STOP. */
579extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
580 const char *__string1, __re_idx_t __length1,
581 const char *__string2, __re_idx_t __length2,
582 __re_idx_t __start, regoff_t __range,
583 struct re_registers *__regs,
584 __re_idx_t __stop);
585
586
587/* Like `re_search', but return how many characters in STRING the regexp
588 in BUFFER matched, starting at position START. */
589extern regoff_t re_match (struct re_pattern_buffer *__buffer,
590 const char *__string, __re_idx_t __length,
591 __re_idx_t __start, struct re_registers *__regs);
592
593
594/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
595extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
596 const char *__string1, __re_idx_t __length1,
597 const char *__string2, __re_idx_t __length2,
598 __re_idx_t __start, struct re_registers *__regs,
599 __re_idx_t __stop);
600
601
602/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
603 ENDS. Subsequent matches using BUFFER and REGS will use this memory
604 for recording register information. STARTS and ENDS must be
605 allocated with malloc, and must each be at least `NUM_REGS * sizeof
606 (regoff_t)' bytes long.
607
608 If NUM_REGS == 0, then subsequent matches should allocate their own
609 register data.
610
611 Unless this function is called, the first search or match using
612 PATTERN_BUFFER will allocate its own register data, without
613 freeing the old data. */
614extern void re_set_registers (struct re_pattern_buffer *__buffer,
615 struct re_registers *__regs,
616 __re_size_t __num_regs,
617 regoff_t *__starts, regoff_t *__ends);
618
619#if defined _REGEX_RE_COMP || defined _LIBC
620# ifndef _CRAY
621/* 4.2 bsd compatibility. */
622extern char *re_comp (const char *);
623extern int re_exec (const char *);
624# endif
625#endif
626
627/* GCC 2.95 and later have "__restrict"; C99 compilers have
628 "restrict", and "configure" may have defined "restrict". */
629#ifndef __restrict
630# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
631# if defined restrict || 199901L <= __STDC_VERSION__
632# define __restrict restrict
633# else
634# define __restrict
635# endif
636# endif
637#endif
638/* gcc 3.1 and up support the [restrict] syntax, but g++ doesn't. */
639#ifndef __restrict_arr
640# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && !defined __cplusplus
641# define __restrict_arr __restrict
642# else
643# define __restrict_arr
644# endif
645#endif
646
647/* POSIX compatibility. */
648extern int regcomp (regex_t *__restrict __preg,
649 const char *__restrict __pattern,
650 int __cflags);
651
652extern int regexec (const regex_t *__restrict __preg,
653 const char *__restrict __string, size_t __nmatch,
654 regmatch_t __pmatch[__restrict_arr],
655 int __eflags);
656
657extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
658 char *__restrict __errbuf, size_t __errbuf_size);
659
660extern void regfree (regex_t *__preg);
661
662
663#ifdef _REGEX_SOURCE
664
665/* Define the POSIX-compatible member names in terms of the
666 incompatible (and deprecated) names established by _REG_RE_NAME.
667 New programs should use the re_* names. */
668
669# define re_allocated allocated
670# define re_buffer buffer
671# define re_can_be_null can_be_null
672# define re_fastmap fastmap
673# define re_fastmap_accurate fastmap_accurate
674# define re_newline_anchor newline_anchor
675# define re_no_sub no_sub
676# define re_not_bol not_bol
677# define re_not_eol not_eol
678# define re_regs_allocated regs_allocated
679# define re_syntax syntax
680# define re_translate translate
681# define re_used used
682
683/* Similarly for _REG_RM_NAME. */
684
685# define rm_end end
686# define rm_num_regs num_regs
687# define rm_start start
688
689/* Undef RE_DUP_MAX first, in case the user has already included a
690 <limits.h> with an incompatible definition.
691
692 On GNU systems, the most common spelling for RE_DUP_MAX's value in
693 <limits.h> is (0x7ffff), so define RE_DUP_MAX to that, not to
694 REG_DUP_MAX. This avoid some duplicate-macro-definition warnings
695 with programs that include <limits.h> after this file.
696
697 New programs should not assume that regex.h defines RE_DUP_MAX; to
698 get the value of RE_DUP_MAX, they should instead include <limits.h>
699 and possibly invoke the sysconf function. */
700
701# undef RE_DUP_MAX
702# define RE_DUP_MAX (0x7fff)
703
704/* Define the following symbols for backward source compatibility.
705 These symbols violate the POSIX name space rules, and new programs
706 should avoid them. */
707
708# define REGS_FIXED REG_FIXED
709# define REGS_REALLOCATE REG_REALLOCATE
710# define REGS_UNALLOCATED REG_UNALLOCATED
711# define RE_BACKSLASH_ESCAPE_IN_LISTS REG_BACKSLASH_ESCAPE_IN_LISTS
712# define RE_BK_PLUS_QM REG_BK_PLUS_QM
713# define RE_CARET_ANCHORS_HERE REG_CARET_ANCHORS_HERE
714# define RE_CHAR_CLASSES REG_CHAR_CLASSES
715# define RE_CONTEXT_INDEP_ANCHORS REG_CONTEXT_INDEP_ANCHORS
716# define RE_CONTEXT_INDEP_OPS REG_CONTEXT_INDEP_OPS
717# define RE_CONTEXT_INVALID_DUP REG_CONTEXT_INVALID_DUP
718# define RE_CONTEXT_INVALID_OPS REG_CONTEXT_INVALID_OPS
719# define RE_DEBUG REG_DEBUG
720# define RE_DOT_NEWLINE REG_DOT_NEWLINE
721# define RE_DOT_NOT_NULL REG_DOT_NOT_NULL
722# define RE_HAT_LISTS_NOT_NEWLINE REG_HAT_LISTS_NOT_NEWLINE
723# define RE_ICASE REG_IGNORE_CASE /* avoid collision with REG_ICASE */
724# define RE_INTERVALS REG_INTERVALS
725# define RE_INVALID_INTERVAL_ORD REG_INVALID_INTERVAL_ORD
726# define RE_LIMITED_OPS REG_LIMITED_OPS
727# define RE_NEWLINE_ALT REG_NEWLINE_ALT
728# define RE_NO_BK_BRACES REG_NO_BK_BRACES
729# define RE_NO_BK_PARENS REG_NO_BK_PARENS
730# define RE_NO_BK_REFS REG_NO_BK_REFS
731# define RE_NO_BK_VBAR REG_NO_BK_VBAR
732# define RE_NO_EMPTY_RANGES REG_NO_EMPTY_RANGES
733# define RE_NO_GNU_OPS REG_NO_GNU_OPS
734# define RE_NO_POSIX_BACKTRACKING REG_NO_POSIX_BACKTRACKING
735# define RE_NO_SUB REG_NO_SUB
736# define RE_NREGS REG_NREGS
737# define RE_SYNTAX_AWK REG_SYNTAX_AWK
738# define RE_SYNTAX_ED REG_SYNTAX_ED
739# define RE_SYNTAX_EGREP REG_SYNTAX_EGREP
740# define RE_SYNTAX_EMACS REG_SYNTAX_EMACS
741# define RE_SYNTAX_GNU_AWK REG_SYNTAX_GNU_AWK
742# define RE_SYNTAX_GREP REG_SYNTAX_GREP
743# define RE_SYNTAX_POSIX_AWK REG_SYNTAX_POSIX_AWK
744# define RE_SYNTAX_POSIX_BASIC REG_SYNTAX_POSIX_BASIC
745# define RE_SYNTAX_POSIX_EGREP REG_SYNTAX_POSIX_EGREP
746# define RE_SYNTAX_POSIX_EXTENDED REG_SYNTAX_POSIX_EXTENDED
747# define RE_SYNTAX_POSIX_MINIMAL_BASIC REG_SYNTAX_POSIX_MINIMAL_BASIC
748# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED REG_SYNTAX_POSIX_MINIMAL_EXTENDED
749# define RE_SYNTAX_SED REG_SYNTAX_SED
750# define RE_UNMATCHED_RIGHT_PAREN_ORD REG_UNMATCHED_RIGHT_PAREN_ORD
751# ifndef RE_TRANSLATE_TYPE
752# define RE_TRANSLATE_TYPE REG_TRANSLATE_TYPE
753# endif
754
755#endif /* defined _REGEX_SOURCE */
756
757#ifdef __cplusplus
758}
759#endif /* C++ */
760
761#endif /* regex.h */
762
763/*
764Local variables:
765make-backup-files: t
766version-control: t
767trim-versions-without-asking: nil
768End:
769*/
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
deleted file mode 100644
index ad618cf6..00000000
--- a/lib/regex_internal.c
+++ /dev/null
@@ -1,1656 +0,0 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20static void re_string_construct_common (const char *str, Idx len,
21 re_string_t *pstr,
22 REG_TRANSLATE_TYPE trans, bool icase,
23 const re_dfa_t *dfa) internal_function;
24static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25 const re_node_set *nodes,
26 re_hashval_t hash) internal_function;
27static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
29 unsigned int context,
30 re_hashval_t hash) internal_function;
31
32/* Functions for string operation. */
33
34/* This function allocate the buffers. It is necessary to call
35 re_string_reconstruct before using the object. */
36
37static reg_errcode_t
38internal_function
39re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
40 REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
41{
42 reg_errcode_t ret;
43 Idx init_buf_len;
44
45 /* Ensure at least one character fits into the buffers. */
46 if (init_len < dfa->mb_cur_max)
47 init_len = dfa->mb_cur_max;
48 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
49 re_string_construct_common (str, len, pstr, trans, icase, dfa);
50
51 ret = re_string_realloc_buffers (pstr, init_buf_len);
52 if (BE (ret != REG_NOERROR, 0))
53 return ret;
54
55 pstr->word_char = dfa->word_char;
56 pstr->word_ops_used = dfa->word_ops_used;
57 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
58 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
59 pstr->valid_raw_len = pstr->valid_len;
60 return REG_NOERROR;
61}
62
63/* This function allocate the buffers, and initialize them. */
64
65static reg_errcode_t
66internal_function
67re_string_construct (re_string_t *pstr, const char *str, Idx len,
68 REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
69{
70 reg_errcode_t ret;
71 memset (pstr, '\0', sizeof (re_string_t));
72 re_string_construct_common (str, len, pstr, trans, icase, dfa);
73
74 if (len > 0)
75 {
76 ret = re_string_realloc_buffers (pstr, len + 1);
77 if (BE (ret != REG_NOERROR, 0))
78 return ret;
79 }
80 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
81
82 if (icase)
83 {
84#ifdef RE_ENABLE_I18N
85 if (dfa->mb_cur_max > 1)
86 {
87 while (1)
88 {
89 ret = build_wcs_upper_buffer (pstr);
90 if (BE (ret != REG_NOERROR, 0))
91 return ret;
92 if (pstr->valid_raw_len >= len)
93 break;
94 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
95 break;
96 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
97 if (BE (ret != REG_NOERROR, 0))
98 return ret;
99 }
100 }
101 else
102#endif /* RE_ENABLE_I18N */
103 build_upper_buffer (pstr);
104 }
105 else
106 {
107#ifdef RE_ENABLE_I18N
108 if (dfa->mb_cur_max > 1)
109 build_wcs_buffer (pstr);
110 else
111#endif /* RE_ENABLE_I18N */
112 {
113 if (trans != NULL)
114 re_string_translate_buffer (pstr);
115 else
116 {
117 pstr->valid_len = pstr->bufs_len;
118 pstr->valid_raw_len = pstr->bufs_len;
119 }
120 }
121 }
122
123 return REG_NOERROR;
124}
125
126/* Helper functions for re_string_allocate, and re_string_construct. */
127
128static reg_errcode_t
129internal_function
130re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
131{
132#ifdef RE_ENABLE_I18N
133 if (pstr->mb_cur_max > 1)
134 {
135 wint_t *new_wcs = re_xrealloc (pstr->wcs, wint_t, new_buf_len);
136 if (BE (new_wcs == NULL, 0))
137 return REG_ESPACE;
138 pstr->wcs = new_wcs;
139 if (pstr->offsets != NULL)
140 {
141 Idx *new_offsets = re_xrealloc (pstr->offsets, Idx, new_buf_len);
142 if (BE (new_offsets == NULL, 0))
143 return REG_ESPACE;
144 pstr->offsets = new_offsets;
145 }
146 }
147#endif /* RE_ENABLE_I18N */
148 if (pstr->mbs_allocated)
149 {
150 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
151 new_buf_len);
152 if (BE (new_mbs == NULL, 0))
153 return REG_ESPACE;
154 pstr->mbs = new_mbs;
155 }
156 pstr->bufs_len = new_buf_len;
157 return REG_NOERROR;
158}
159
160
161static void
162internal_function
163re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
164 REG_TRANSLATE_TYPE trans, bool icase,
165 const re_dfa_t *dfa)
166{
167 pstr->raw_mbs = (const unsigned char *) str;
168 pstr->len = len;
169 pstr->raw_len = len;
170 pstr->trans = (unsigned REG_TRANSLATE_TYPE) trans;
171 pstr->icase = icase;
172 pstr->mbs_allocated = (trans != NULL || icase);
173 pstr->mb_cur_max = dfa->mb_cur_max;
174 pstr->is_utf8 = dfa->is_utf8;
175 pstr->map_notascii = dfa->map_notascii;
176 pstr->stop = pstr->len;
177 pstr->raw_stop = pstr->stop;
178}
179
180#ifdef RE_ENABLE_I18N
181
182/* Build wide character buffer PSTR->WCS.
183 If the byte sequence of the string are:
184 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
185 Then wide character buffer will be:
186 <wc1> , WEOF , <wc2> , WEOF , <wc3>
187 We use WEOF for padding, they indicate that the position isn't
188 a first byte of a multibyte character.
189
190 Note that this function assumes PSTR->VALID_LEN elements are already
191 built and starts from PSTR->VALID_LEN. */
192
193static void
194internal_function
195build_wcs_buffer (re_string_t *pstr)
196{
197#ifdef _LIBC
198 unsigned char buf[MB_LEN_MAX];
199 assert (MB_LEN_MAX >= pstr->mb_cur_max);
200#else
201 unsigned char buf[64];
202#endif
203 mbstate_t prev_st;
204 Idx byte_idx, end_idx, remain_len;
205 size_t mbclen;
206
207 /* Build the buffers from pstr->valid_len to either pstr->len or
208 pstr->bufs_len. */
209 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
210 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
211 {
212 wchar_t wc;
213 const char *p;
214
215 remain_len = end_idx - byte_idx;
216 prev_st = pstr->cur_state;
217 /* Apply the translation if we need. */
218 if (BE (pstr->trans != NULL, 0))
219 {
220 int i, ch;
221
222 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
223 {
224 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
225 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
226 }
227 p = (const char *) buf;
228 }
229 else
230 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
231 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
232 if (BE (mbclen == (size_t) -2, 0))
233 {
234 /* The buffer doesn't have enough space, finish to build. */
235 pstr->cur_state = prev_st;
236 break;
237 }
238 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
239 {
240 /* We treat these cases as a singlebyte character. */
241 mbclen = 1;
242 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
243 if (BE (pstr->trans != NULL, 0))
244 wc = pstr->trans[wc];
245 pstr->cur_state = prev_st;
246 }
247
248 /* Write wide character and padding. */
249 pstr->wcs[byte_idx++] = wc;
250 /* Write paddings. */
251 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
252 pstr->wcs[byte_idx++] = WEOF;
253 }
254 pstr->valid_len = byte_idx;
255 pstr->valid_raw_len = byte_idx;
256}
257
258/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
259 but for REG_ICASE. */
260
261static reg_errcode_t
262internal_function
263build_wcs_upper_buffer (re_string_t *pstr)
264{
265 mbstate_t prev_st;
266 Idx src_idx, byte_idx, end_idx, remain_len;
267 size_t mbclen;
268#ifdef _LIBC
269 char buf[MB_LEN_MAX];
270 assert (MB_LEN_MAX >= pstr->mb_cur_max);
271#else
272 char buf[64];
273#endif
274
275 byte_idx = pstr->valid_len;
276 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
277
278 /* The following optimization assumes that ASCII characters can be
279 mapped to wide characters with a simple cast. */
280 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
281 {
282 while (byte_idx < end_idx)
283 {
284 wchar_t wc;
285
286 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
287 && mbsinit (&pstr->cur_state))
288 {
289 /* In case of a singlebyte character. */
290 pstr->mbs[byte_idx]
291 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
292 /* The next step uses the assumption that wchar_t is encoded
293 ASCII-safe: all ASCII values can be converted like this. */
294 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
295 ++byte_idx;
296 continue;
297 }
298
299 remain_len = end_idx - byte_idx;
300 prev_st = pstr->cur_state;
301 mbclen = mbrtowc (&wc,
302 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
303 + byte_idx), remain_len, &pstr->cur_state);
304 if (BE ((size_t) (mbclen + 2) > 2, 1))
305 {
306 wchar_t wcu = wc;
307 if (iswlower (wc))
308 {
309 size_t mbcdlen;
310
311 wcu = towupper (wc);
312 mbcdlen = wcrtomb (buf, wcu, &prev_st);
313 if (BE (mbclen == mbcdlen, 1))
314 memcpy (pstr->mbs + byte_idx, buf, mbclen);
315 else
316 {
317 src_idx = byte_idx;
318 goto offsets_needed;
319 }
320 }
321 else
322 memcpy (pstr->mbs + byte_idx,
323 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
324 pstr->wcs[byte_idx++] = wcu;
325 /* Write paddings. */
326 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
327 pstr->wcs[byte_idx++] = WEOF;
328 }
329 else if (mbclen == (size_t) -1 || mbclen == 0)
330 {
331 /* It is an invalid character or '\0'. Just use the byte. */
332 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
333 pstr->mbs[byte_idx] = ch;
334 /* And also cast it to wide char. */
335 pstr->wcs[byte_idx++] = (wchar_t) ch;
336 if (BE (mbclen == (size_t) -1, 0))
337 pstr->cur_state = prev_st;
338 }
339 else
340 {
341 /* The buffer doesn't have enough space, finish to build. */
342 pstr->cur_state = prev_st;
343 break;
344 }
345 }
346 pstr->valid_len = byte_idx;
347 pstr->valid_raw_len = byte_idx;
348 return REG_NOERROR;
349 }
350 else
351 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
352 {
353 wchar_t wc;
354 const char *p;
355 offsets_needed:
356 remain_len = end_idx - byte_idx;
357 prev_st = pstr->cur_state;
358 if (BE (pstr->trans != NULL, 0))
359 {
360 int i, ch;
361
362 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
363 {
364 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
365 buf[i] = pstr->trans[ch];
366 }
367 p = (const char *) buf;
368 }
369 else
370 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
371 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
372 if (BE ((size_t) (mbclen + 2) > 2, 1))
373 {
374 wchar_t wcu = wc;
375 if (iswlower (wc))
376 {
377 size_t mbcdlen;
378
379 wcu = towupper (wc);
380 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
381 if (BE (mbclen == mbcdlen, 1))
382 memcpy (pstr->mbs + byte_idx, buf, mbclen);
383 else if (mbcdlen != (size_t) -1)
384 {
385 size_t i;
386
387 if (byte_idx + mbcdlen > pstr->bufs_len)
388 {
389 pstr->cur_state = prev_st;
390 break;
391 }
392
393 if (pstr->offsets == NULL)
394 {
395 pstr->offsets = re_xmalloc (Idx, pstr->bufs_len);
396
397 if (pstr->offsets == NULL)
398 return REG_ESPACE;
399 }
400 if (!pstr->offsets_needed)
401 {
402 for (i = 0; i < (size_t) byte_idx; ++i)
403 pstr->offsets[i] = i;
404 pstr->offsets_needed = 1;
405 }
406
407 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
408 pstr->wcs[byte_idx] = wcu;
409 pstr->offsets[byte_idx] = src_idx;
410 for (i = 1; i < mbcdlen; ++i)
411 {
412 pstr->offsets[byte_idx + i]
413 = src_idx + (i < mbclen ? i : mbclen - 1);
414 pstr->wcs[byte_idx + i] = WEOF;
415 }
416 pstr->len += mbcdlen - mbclen;
417 if (pstr->raw_stop > src_idx)
418 pstr->stop += mbcdlen - mbclen;
419 end_idx = (pstr->bufs_len > pstr->len)
420 ? pstr->len : pstr->bufs_len;
421 byte_idx += mbcdlen;
422 src_idx += mbclen;
423 continue;
424 }
425 else
426 memcpy (pstr->mbs + byte_idx, p, mbclen);
427 }
428 else
429 memcpy (pstr->mbs + byte_idx, p, mbclen);
430
431 if (BE (pstr->offsets_needed != 0, 0))
432 {
433 size_t i;
434 for (i = 0; i < mbclen; ++i)
435 pstr->offsets[byte_idx + i] = src_idx + i;
436 }
437 src_idx += mbclen;
438
439 pstr->wcs[byte_idx++] = wcu;
440 /* Write paddings. */
441 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
442 pstr->wcs[byte_idx++] = WEOF;
443 }
444 else if (mbclen == (size_t) -1 || mbclen == 0)
445 {
446 /* It is an invalid character or '\0'. Just use the byte. */
447 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
448
449 if (BE (pstr->trans != NULL, 0))
450 ch = pstr->trans [ch];
451 pstr->mbs[byte_idx] = ch;
452
453 if (BE (pstr->offsets_needed != 0, 0))
454 pstr->offsets[byte_idx] = src_idx;
455 ++src_idx;
456
457 /* And also cast it to wide char. */
458 pstr->wcs[byte_idx++] = (wchar_t) ch;
459 if (BE (mbclen == (size_t) -1, 0))
460 pstr->cur_state = prev_st;
461 }
462 else
463 {
464 /* The buffer doesn't have enough space, finish to build. */
465 pstr->cur_state = prev_st;
466 break;
467 }
468 }
469 pstr->valid_len = byte_idx;
470 pstr->valid_raw_len = src_idx;
471 return REG_NOERROR;
472}
473
474/* Skip characters until the index becomes greater than NEW_RAW_IDX.
475 Return the index. */
476
477static Idx
478internal_function
479re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
480{
481 mbstate_t prev_st;
482 Idx rawbuf_idx;
483 size_t mbclen;
484 wchar_t wc = 0;
485
486 /* Skip the characters which are not necessary to check. */
487 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
488 rawbuf_idx < new_raw_idx;)
489 {
490 Idx remain_len;
491 remain_len = pstr->len - rawbuf_idx;
492 prev_st = pstr->cur_state;
493 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
494 remain_len, &pstr->cur_state);
495 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
496 {
497 /* We treat these cases as a singlebyte character. */
498 mbclen = 1;
499 pstr->cur_state = prev_st;
500 }
501 /* Then proceed the next character. */
502 rawbuf_idx += mbclen;
503 }
504 *last_wc = (wint_t) wc;
505 return rawbuf_idx;
506}
507#endif /* RE_ENABLE_I18N */
508
509/* Build the buffer PSTR->MBS, and apply the translation if we need.
510 This function is used in case of REG_ICASE. */
511
512static void
513internal_function
514build_upper_buffer (re_string_t *pstr)
515{
516 Idx char_idx, end_idx;
517 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
518
519 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
520 {
521 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
522 if (BE (pstr->trans != NULL, 0))
523 ch = pstr->trans[ch];
524 if (islower (ch))
525 pstr->mbs[char_idx] = toupper (ch);
526 else
527 pstr->mbs[char_idx] = ch;
528 }
529 pstr->valid_len = char_idx;
530 pstr->valid_raw_len = char_idx;
531}
532
533/* Apply TRANS to the buffer in PSTR. */
534
535static void
536internal_function
537re_string_translate_buffer (re_string_t *pstr)
538{
539 Idx buf_idx, end_idx;
540 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541
542 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
543 {
544 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
545 pstr->mbs[buf_idx] = pstr->trans[ch];
546 }
547
548 pstr->valid_len = buf_idx;
549 pstr->valid_raw_len = buf_idx;
550}
551
552/* This function re-construct the buffers.
553 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
554 convert to upper case in case of REG_ICASE, apply translation. */
555
556static reg_errcode_t
557internal_function
558re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
559{
560 Idx offset;
561
562 if (BE (pstr->raw_mbs_idx <= idx, 0))
563 offset = idx - pstr->raw_mbs_idx;
564 else
565 {
566 /* Reset buffer. */
567#ifdef RE_ENABLE_I18N
568 if (pstr->mb_cur_max > 1)
569 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
570#endif /* RE_ENABLE_I18N */
571 pstr->len = pstr->raw_len;
572 pstr->stop = pstr->raw_stop;
573 pstr->valid_len = 0;
574 pstr->raw_mbs_idx = 0;
575 pstr->valid_raw_len = 0;
576 pstr->offsets_needed = 0;
577 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
578 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
579 if (!pstr->mbs_allocated)
580 pstr->mbs = (unsigned char *) pstr->raw_mbs;
581 offset = idx;
582 }
583
584 if (BE (offset != 0, 1))
585 {
586 /* Are the characters which are already checked remain? */
587 if (BE (offset < pstr->valid_raw_len, 1)
588#ifdef RE_ENABLE_I18N
589 /* Handling this would enlarge the code too much.
590 Accept a slowdown in that case. */
591 && pstr->offsets_needed == 0
592#endif
593 )
594 {
595 /* Yes, move them to the front of the buffer. */
596 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
597#ifdef RE_ENABLE_I18N
598 if (pstr->mb_cur_max > 1)
599 memmove (pstr->wcs, pstr->wcs + offset,
600 (pstr->valid_len - offset) * sizeof (wint_t));
601#endif /* RE_ENABLE_I18N */
602 if (BE (pstr->mbs_allocated, 0))
603 memmove (pstr->mbs, pstr->mbs + offset,
604 pstr->valid_len - offset);
605 pstr->valid_len -= offset;
606 pstr->valid_raw_len -= offset;
607#if DEBUG
608 assert (pstr->valid_len > 0);
609#endif
610 }
611 else
612 {
613 /* No, skip all characters until IDX. */
614#ifdef RE_ENABLE_I18N
615 if (BE (pstr->offsets_needed, 0))
616 {
617 pstr->len = pstr->raw_len - idx + offset;
618 pstr->stop = pstr->raw_stop - idx + offset;
619 pstr->offsets_needed = 0;
620 }
621#endif
622 pstr->valid_len = 0;
623 pstr->valid_raw_len = 0;
624#ifdef RE_ENABLE_I18N
625 if (pstr->mb_cur_max > 1)
626 {
627 Idx wcs_idx;
628 wint_t wc = WEOF;
629
630 if (pstr->is_utf8)
631 {
632 const unsigned char *raw, *p, *q, *end;
633
634 /* Special case UTF-8. Multi-byte chars start with any
635 byte other than 0x80 - 0xbf. */
636 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
637 end = raw + (offset - pstr->mb_cur_max);
638 for (p = raw + offset - 1; p >= end; --p)
639 if ((*p & 0xc0) != 0x80)
640 {
641 mbstate_t cur_state;
642 wchar_t wc2;
643 Idx mlen = raw + pstr->len - p;
644 unsigned char buf[6];
645 size_t mbclen;
646
647 q = p;
648 if (BE (pstr->trans != NULL, 0))
649 {
650 int i = mlen < 6 ? mlen : 6;
651 while (--i >= 0)
652 buf[i] = pstr->trans[p[i]];
653 q = buf;
654 }
655 /* XXX Don't use mbrtowc, we know which conversion
656 to use (UTF-8 -> UCS4). */
657 memset (&cur_state, 0, sizeof (cur_state));
658 mbclen = mbrtowc (&wc2, (const char *) p, mlen,
659 &cur_state);
660 if (raw + offset - p <= mbclen && mbclen < (size_t) -2)
661 {
662 memset (&pstr->cur_state, '\0',
663 sizeof (mbstate_t));
664 pstr->valid_len = mbclen - (raw + offset - p);
665 wc = wc2;
666 }
667 break;
668 }
669 }
670
671 if (wc == WEOF)
672 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
673 if (BE (pstr->valid_len, 0))
674 {
675 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
676 pstr->wcs[wcs_idx] = WEOF;
677 if (pstr->mbs_allocated)
678 memset (pstr->mbs, -1, pstr->valid_len);
679 }
680 pstr->valid_raw_len = pstr->valid_len;
681 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
682 && IS_WIDE_WORD_CHAR (wc))
683 ? CONTEXT_WORD
684 : ((IS_WIDE_NEWLINE (wc)
685 && pstr->newline_anchor)
686 ? CONTEXT_NEWLINE : 0));
687 }
688 else
689#endif /* RE_ENABLE_I18N */
690 {
691 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
692 if (pstr->trans)
693 c = pstr->trans[c];
694 pstr->tip_context = (bitset_contain (pstr->word_char, c)
695 ? CONTEXT_WORD
696 : ((IS_NEWLINE (c) && pstr->newline_anchor)
697 ? CONTEXT_NEWLINE : 0));
698 }
699 }
700 if (!BE (pstr->mbs_allocated, 0))
701 pstr->mbs += offset;
702 }
703 pstr->raw_mbs_idx = idx;
704 pstr->len -= offset;
705 pstr->stop -= offset;
706
707 /* Then build the buffers. */
708#ifdef RE_ENABLE_I18N
709 if (pstr->mb_cur_max > 1)
710 {
711 if (pstr->icase)
712 {
713 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
714 if (BE (ret != REG_NOERROR, 0))
715 return ret;
716 }
717 else
718 build_wcs_buffer (pstr);
719 }
720 else
721#endif /* RE_ENABLE_I18N */
722 if (BE (pstr->mbs_allocated, 0))
723 {
724 if (pstr->icase)
725 build_upper_buffer (pstr);
726 else if (pstr->trans != NULL)
727 re_string_translate_buffer (pstr);
728 }
729 else
730 pstr->valid_len = pstr->len;
731
732 pstr->cur_idx = 0;
733 return REG_NOERROR;
734}
735
736static unsigned char
737internal_function __attribute ((pure))
738re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
739{
740 int ch;
741 Idx off;
742
743 /* Handle the common (easiest) cases first. */
744 if (BE (!pstr->mbs_allocated, 1))
745 return re_string_peek_byte (pstr, idx);
746
747#ifdef RE_ENABLE_I18N
748 if (pstr->mb_cur_max > 1
749 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
750 return re_string_peek_byte (pstr, idx);
751#endif
752
753 off = pstr->cur_idx + idx;
754#ifdef RE_ENABLE_I18N
755 if (pstr->offsets_needed)
756 off = pstr->offsets[off];
757#endif
758
759 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
760
761#ifdef RE_ENABLE_I18N
762 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
763 this function returns CAPITAL LETTER I instead of first byte of
764 DOTLESS SMALL LETTER I. The latter would confuse the parser,
765 since peek_byte_case doesn't advance cur_idx in any way. */
766 if (pstr->offsets_needed && !isascii (ch))
767 return re_string_peek_byte (pstr, idx);
768#endif
769
770 return ch;
771}
772
773static unsigned char
774internal_function __attribute ((pure))
775re_string_fetch_byte_case (re_string_t *pstr)
776{
777 if (BE (!pstr->mbs_allocated, 1))
778 return re_string_fetch_byte (pstr);
779
780#ifdef RE_ENABLE_I18N
781 if (pstr->offsets_needed)
782 {
783 Idx off;
784 int ch;
785
786 /* For tr_TR.UTF-8 [[:islower:]] there is
787 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
788 in that case the whole multi-byte character and return
789 the original letter. On the other side, with
790 [[: DOTLESS SMALL LETTER I return [[:I, as doing
791 anything else would complicate things too much. */
792
793 if (!re_string_first_byte (pstr, pstr->cur_idx))
794 return re_string_fetch_byte (pstr);
795
796 off = pstr->offsets[pstr->cur_idx];
797 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
798
799 if (! isascii (ch))
800 return re_string_fetch_byte (pstr);
801
802 re_string_skip_bytes (pstr,
803 re_string_char_size_at (pstr, pstr->cur_idx));
804 return ch;
805 }
806#endif
807
808 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
809}
810
811static void
812internal_function
813re_string_destruct (re_string_t *pstr)
814{
815#ifdef RE_ENABLE_I18N
816 re_free (pstr->wcs);
817 re_free (pstr->offsets);
818#endif /* RE_ENABLE_I18N */
819 if (pstr->mbs_allocated)
820 re_free (pstr->mbs);
821}
822
823/* Return the context at IDX in INPUT. */
824
825static unsigned int
826internal_function
827re_string_context_at (const re_string_t *input, Idx idx, int eflags)
828{
829 int c;
830 if (BE (! REG_VALID_INDEX (idx), 0))
831 /* In this case, we use the value stored in input->tip_context,
832 since we can't know the character in input->mbs[-1] here. */
833 return input->tip_context;
834 if (BE (idx == input->len, 0))
835 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
836 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
837#ifdef RE_ENABLE_I18N
838 if (input->mb_cur_max > 1)
839 {
840 wint_t wc;
841 Idx wc_idx = idx;
842 while(input->wcs[wc_idx] == WEOF)
843 {
844#ifdef DEBUG
845 /* It must not happen. */
846 assert (REG_VALID_INDEX (wc_idx));
847#endif
848 --wc_idx;
849 if (! REG_VALID_INDEX (wc_idx))
850 return input->tip_context;
851 }
852 wc = input->wcs[wc_idx];
853 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
854 return CONTEXT_WORD;
855 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
856 ? CONTEXT_NEWLINE : 0);
857 }
858 else
859#endif
860 {
861 c = re_string_byte_at (input, idx);
862 if (bitset_contain (input->word_char, c))
863 return CONTEXT_WORD;
864 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
865 }
866}
867
868/* Functions for set operation. */
869
870static reg_errcode_t
871internal_function
872re_node_set_alloc (re_node_set *set, Idx size)
873{
874 set->alloc = size;
875 set->nelem = 0;
876 set->elems = re_xmalloc (Idx, size);
877 if (BE (set->elems == NULL, 0))
878 return REG_ESPACE;
879 return REG_NOERROR;
880}
881
882static reg_errcode_t
883internal_function
884re_node_set_init_1 (re_node_set *set, Idx elem)
885{
886 set->alloc = 1;
887 set->nelem = 1;
888 set->elems = re_malloc (Idx, 1);
889 if (BE (set->elems == NULL, 0))
890 {
891 set->alloc = set->nelem = 0;
892 return REG_ESPACE;
893 }
894 set->elems[0] = elem;
895 return REG_NOERROR;
896}
897
898static reg_errcode_t
899internal_function
900re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
901{
902 set->alloc = 2;
903 set->elems = re_malloc (Idx, 2);
904 if (BE (set->elems == NULL, 0))
905 return REG_ESPACE;
906 if (elem1 == elem2)
907 {
908 set->nelem = 1;
909 set->elems[0] = elem1;
910 }
911 else
912 {
913 set->nelem = 2;
914 if (elem1 < elem2)
915 {
916 set->elems[0] = elem1;
917 set->elems[1] = elem2;
918 }
919 else
920 {
921 set->elems[0] = elem2;
922 set->elems[1] = elem1;
923 }
924 }
925 return REG_NOERROR;
926}
927
928static reg_errcode_t
929internal_function
930re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
931{
932 dest->nelem = src->nelem;
933 if (src->nelem > 0)
934 {
935 dest->alloc = dest->nelem;
936 dest->elems = re_malloc (Idx, dest->alloc);
937 if (BE (dest->elems == NULL, 0))
938 {
939 dest->alloc = dest->nelem = 0;
940 return REG_ESPACE;
941 }
942 memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
943 }
944 else
945 re_node_set_init_empty (dest);
946 return REG_NOERROR;
947}
948
949/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
950 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
951 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
952
953static reg_errcode_t
954internal_function
955re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
956 const re_node_set *src2)
957{
958 Idx i1, i2, is, id, delta, sbase;
959 if (src1->nelem == 0 || src2->nelem == 0)
960 return REG_NOERROR;
961
962 /* We need dest->nelem + 2 * elems_in_intersection; this is a
963 conservative estimate. */
964 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
965 {
966 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
967 Idx *new_elems;
968 if (sizeof (Idx) < 3
969 && (new_alloc < dest->alloc
970 || ((Idx) (src1->nelem + src2->nelem) < src1->nelem)))
971 return REG_ESPACE;
972 new_elems = re_xrealloc (dest->elems, Idx, new_alloc);
973 if (BE (new_elems == NULL, 0))
974 return REG_ESPACE;
975 dest->elems = new_elems;
976 dest->alloc = new_alloc;
977 }
978
979 /* Find the items in the intersection of SRC1 and SRC2, and copy
980 into the top of DEST those that are not already in DEST itself. */
981 sbase = dest->nelem + src1->nelem + src2->nelem;
982 i1 = src1->nelem - 1;
983 i2 = src2->nelem - 1;
984 id = dest->nelem - 1;
985 for (;;)
986 {
987 if (src1->elems[i1] == src2->elems[i2])
988 {
989 /* Try to find the item in DEST. Maybe we could binary search? */
990 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
991 --id;
992
993 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
994 dest->elems[--sbase] = src1->elems[i1];
995
996 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
997 break;
998 }
999
1000 /* Lower the highest of the two items. */
1001 else if (src1->elems[i1] < src2->elems[i2])
1002 {
1003 if (! REG_VALID_INDEX (--i2))
1004 break;
1005 }
1006 else
1007 {
1008 if (! REG_VALID_INDEX (--i1))
1009 break;
1010 }
1011 }
1012
1013 id = dest->nelem - 1;
1014 is = dest->nelem + src1->nelem + src2->nelem - 1;
1015 delta = is - sbase + 1;
1016
1017 /* Now copy. When DELTA becomes zero, the remaining
1018 DEST elements are already in place; this is more or
1019 less the same loop that is in re_node_set_merge. */
1020 dest->nelem += delta;
1021 if (delta > 0 && REG_VALID_INDEX (id))
1022 for (;;)
1023 {
1024 if (dest->elems[is] > dest->elems[id])
1025 {
1026 /* Copy from the top. */
1027 dest->elems[id + delta--] = dest->elems[is--];
1028 if (delta == 0)
1029 break;
1030 }
1031 else
1032 {
1033 /* Slide from the bottom. */
1034 dest->elems[id + delta] = dest->elems[id];
1035 if (! REG_VALID_INDEX (--id))
1036 break;
1037 }
1038 }
1039
1040 /* Copy remaining SRC elements. */
1041 memcpy (dest->elems, dest->elems + sbase, delta * sizeof dest->elems[0]);
1042
1043 return REG_NOERROR;
1044}
1045
1046/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1047 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1048
1049static reg_errcode_t
1050internal_function
1051re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1052 const re_node_set *src2)
1053{
1054 Idx i1, i2, id;
1055 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1056 {
1057 dest->alloc = src1->nelem + src2->nelem;
1058 if (sizeof (Idx) < 2 && dest->alloc < src1->nelem)
1059 return REG_ESPACE;
1060 dest->elems = re_xmalloc (Idx, dest->alloc);
1061 if (BE (dest->elems == NULL, 0))
1062 return REG_ESPACE;
1063 }
1064 else
1065 {
1066 if (src1 != NULL && src1->nelem > 0)
1067 return re_node_set_init_copy (dest, src1);
1068 else if (src2 != NULL && src2->nelem > 0)
1069 return re_node_set_init_copy (dest, src2);
1070 else
1071 re_node_set_init_empty (dest);
1072 return REG_NOERROR;
1073 }
1074 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1075 {
1076 if (src1->elems[i1] > src2->elems[i2])
1077 {
1078 dest->elems[id++] = src2->elems[i2++];
1079 continue;
1080 }
1081 if (src1->elems[i1] == src2->elems[i2])
1082 ++i2;
1083 dest->elems[id++] = src1->elems[i1++];
1084 }
1085 if (i1 < src1->nelem)
1086 {
1087 memcpy (dest->elems + id, src1->elems + i1,
1088 (src1->nelem - i1) * sizeof dest->elems[0]);
1089 id += src1->nelem - i1;
1090 }
1091 else if (i2 < src2->nelem)
1092 {
1093 memcpy (dest->elems + id, src2->elems + i2,
1094 (src2->nelem - i2) * sizeof dest->elems[0]);
1095 id += src2->nelem - i2;
1096 }
1097 dest->nelem = id;
1098 return REG_NOERROR;
1099}
1100
1101/* Calculate the union set of the sets DEST and SRC. And store it to
1102 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1103
1104static reg_errcode_t
1105internal_function
1106re_node_set_merge (re_node_set *dest, const re_node_set *src)
1107{
1108 Idx is, id, sbase, delta;
1109 if (src == NULL || src->nelem == 0)
1110 return REG_NOERROR;
1111 if (sizeof (Idx) < 3
1112 && ((Idx) (2 * src->nelem) < src->nelem
1113 || (Idx) (2 * src->nelem + dest->nelem) < dest->nelem))
1114 return REG_ESPACE;
1115 if (dest->alloc < 2 * src->nelem + dest->nelem)
1116 {
1117 Idx new_alloc = src->nelem + dest->alloc;
1118 Idx *new_buffer;
1119 if (sizeof (Idx) < 4 && new_alloc < dest->alloc)
1120 return REG_ESPACE;
1121 new_buffer = re_x2realloc (dest->elems, Idx, &new_alloc);
1122 if (BE (new_buffer == NULL, 0))
1123 return REG_ESPACE;
1124 dest->elems = new_buffer;
1125 dest->alloc = new_alloc;
1126 }
1127
1128 if (BE (dest->nelem == 0, 0))
1129 {
1130 dest->nelem = src->nelem;
1131 memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
1132 return REG_NOERROR;
1133 }
1134
1135 /* Copy into the top of DEST the items of SRC that are not
1136 found in DEST. Maybe we could binary search in DEST? */
1137 for (sbase = dest->nelem + 2 * src->nelem,
1138 is = src->nelem - 1, id = dest->nelem - 1;
1139 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1140 {
1141 if (dest->elems[id] == src->elems[is])
1142 is--, id--;
1143 else if (dest->elems[id] < src->elems[is])
1144 dest->elems[--sbase] = src->elems[is--];
1145 else /* if (dest->elems[id] > src->elems[is]) */
1146 --id;
1147 }
1148
1149 if (REG_VALID_INDEX (is))
1150 {
1151 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1152 sbase -= is + 1;
1153 memcpy (dest->elems + sbase, src->elems,
1154 (is + 1) * sizeof dest->elems[0]);
1155 }
1156
1157 id = dest->nelem - 1;
1158 is = dest->nelem + 2 * src->nelem - 1;
1159 delta = is - sbase + 1;
1160 if (delta == 0)
1161 return REG_NOERROR;
1162
1163 /* Now copy. When DELTA becomes zero, the remaining
1164 DEST elements are already in place. */
1165 dest->nelem += delta;
1166 for (;;)
1167 {
1168 if (dest->elems[is] > dest->elems[id])
1169 {
1170 /* Copy from the top. */
1171 dest->elems[id + delta--] = dest->elems[is--];
1172 if (delta == 0)
1173 break;
1174 }
1175 else
1176 {
1177 /* Slide from the bottom. */
1178 dest->elems[id + delta] = dest->elems[id];
1179 if (! REG_VALID_INDEX (--id))
1180 {
1181 /* Copy remaining SRC elements. */
1182 memcpy (dest->elems, dest->elems + sbase,
1183 delta * sizeof dest->elems[0]);
1184 break;
1185 }
1186 }
1187 }
1188
1189 return REG_NOERROR;
1190}
1191
1192/* Insert the new element ELEM to the re_node_set* SET.
1193 SET should not already have ELEM.
1194 Return true if successful. */
1195
1196static bool
1197internal_function
1198re_node_set_insert (re_node_set *set, Idx elem)
1199{
1200 Idx idx;
1201 /* In case the set is empty. */
1202 if (set->alloc == 0)
1203 return re_node_set_init_1 (set, elem) == REG_NOERROR;
1204
1205 if (BE (set->nelem, 0) == 0)
1206 {
1207 /* We already guaranteed above that set->alloc != 0. */
1208 set->elems[0] = elem;
1209 ++set->nelem;
1210 return true;
1211 }
1212
1213 /* Realloc if we need. */
1214 if (set->alloc == set->nelem)
1215 {
1216 Idx *new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
1217 if (BE (new_elems == NULL, 0))
1218 return false;
1219 set->elems = new_elems;
1220 }
1221
1222 /* Move the elements which follows the new element. Test the
1223 first element separately to skip a check in the inner loop. */
1224 if (elem < set->elems[0])
1225 {
1226 idx = 0;
1227 for (idx = set->nelem; idx > 0; idx--)
1228 set->elems[idx] = set->elems[idx - 1];
1229 }
1230 else
1231 {
1232 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1233 set->elems[idx] = set->elems[idx - 1];
1234 }
1235
1236 /* Insert the new element. */
1237 set->elems[idx] = elem;
1238 ++set->nelem;
1239 return true;
1240}
1241
1242/* Insert the new element ELEM to the re_node_set* SET.
1243 SET should not already have any element greater than or equal to ELEM.
1244 Return true if successful. */
1245
1246static bool
1247internal_function
1248re_node_set_insert_last (re_node_set *set, Idx elem)
1249{
1250 /* Realloc if we need. */
1251 if (set->alloc == set->nelem)
1252 {
1253 Idx *new_elems;
1254 new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
1255 if (BE (new_elems == NULL, 0))
1256 return false;
1257 set->elems = new_elems;
1258 }
1259
1260 /* Insert the new element. */
1261 set->elems[set->nelem++] = elem;
1262 return true;
1263}
1264
1265/* Compare two node sets SET1 and SET2.
1266 Return true if SET1 and SET2 are equivalent. */
1267
1268static bool
1269internal_function __attribute ((pure))
1270re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1271{
1272 Idx i;
1273 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1274 return false;
1275 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1276 if (set1->elems[i] != set2->elems[i])
1277 return false;
1278 return true;
1279}
1280
1281/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1282
1283static Idx
1284internal_function __attribute ((pure))
1285re_node_set_contains (const re_node_set *set, Idx elem)
1286{
1287 __re_size_t idx, right, mid;
1288 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1289 return 0;
1290
1291 /* Binary search the element. */
1292 idx = 0;
1293 right = set->nelem - 1;
1294 while (idx < right)
1295 {
1296 mid = (idx + right) / 2;
1297 if (set->elems[mid] < elem)
1298 idx = mid + 1;
1299 else
1300 right = mid;
1301 }
1302 return set->elems[idx] == elem ? idx + 1 : 0;
1303}
1304
1305static void
1306internal_function
1307re_node_set_remove_at (re_node_set *set, Idx idx)
1308{
1309 if (idx < 0 || idx >= set->nelem)
1310 return;
1311 --set->nelem;
1312 for (; idx < set->nelem; idx++)
1313 set->elems[idx] = set->elems[idx + 1];
1314}
1315
1316
1317/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1318 Or return REG_MISSING if an error occurred. */
1319
1320static Idx
1321internal_function
1322re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1323{
1324 int type = token.type;
1325 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1326 {
1327 Idx new_nodes_alloc = dfa->nodes_alloc;
1328 Idx *new_nexts, *new_indices;
1329 re_node_set *new_edests, *new_eclosures;
1330
1331 re_token_t *new_nodes = re_x2realloc (dfa->nodes, re_token_t,
1332 &new_nodes_alloc);
1333 if (BE (new_nodes == NULL, 0))
1334 return REG_MISSING;
1335 dfa->nodes = new_nodes;
1336 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1337 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1338 new_edests = re_xrealloc (dfa->edests, re_node_set, new_nodes_alloc);
1339 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1340 if (BE (new_nexts == NULL || new_indices == NULL
1341 || new_edests == NULL || new_eclosures == NULL, 0))
1342 return REG_MISSING;
1343 dfa->nexts = new_nexts;
1344 dfa->org_indices = new_indices;
1345 dfa->edests = new_edests;
1346 dfa->eclosures = new_eclosures;
1347 dfa->nodes_alloc = new_nodes_alloc;
1348 }
1349 dfa->nodes[dfa->nodes_len] = token;
1350 dfa->nodes[dfa->nodes_len].constraint = 0;
1351#ifdef RE_ENABLE_I18N
1352 dfa->nodes[dfa->nodes_len].accept_mb =
1353 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1354#endif
1355 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1356 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1357 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1358 return dfa->nodes_len++;
1359}
1360
1361static inline re_hashval_t
1362internal_function
1363calc_state_hash (const re_node_set *nodes, unsigned int context)
1364{
1365 re_hashval_t hash = nodes->nelem + context;
1366 Idx i;
1367 for (i = 0 ; i < nodes->nelem ; i++)
1368 hash += nodes->elems[i];
1369 return hash;
1370}
1371
1372/* Search for the state whose node_set is equivalent to NODES.
1373 Return the pointer to the state, if we found it in the DFA.
1374 Otherwise create the new one and return it. In case of an error
1375 return NULL and set the error code in ERR.
1376 Note: - We assume NULL as the invalid state, then it is possible that
1377 return value is NULL and ERR is REG_NOERROR.
1378 - We never return non-NULL value in case of any errors, it is for
1379 optimization. */
1380
1381static re_dfastate_t*
1382internal_function
1383re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes)
1384{
1385 re_hashval_t hash;
1386 re_dfastate_t *new_state;
1387 struct re_state_table_entry *spot;
1388 Idx i;
1389#ifdef lint
1390 /* Suppress bogus uninitialized-variable warnings. */
1391 *err = REG_NOERROR;
1392#endif
1393 if (BE (nodes->nelem == 0, 0))
1394 {
1395 *err = REG_NOERROR;
1396 return NULL;
1397 }
1398 hash = calc_state_hash (nodes, 0);
1399 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1400
1401 for (i = 0 ; i < spot->num ; i++)
1402 {
1403 re_dfastate_t *state = spot->array[i];
1404 if (hash != state->hash)
1405 continue;
1406 if (re_node_set_compare (&state->nodes, nodes))
1407 return state;
1408 }
1409
1410 /* There are no appropriate state in the dfa, create the new one. */
1411 new_state = create_ci_newstate (dfa, nodes, hash);
1412 if (BE (new_state != NULL, 1))
1413 return new_state;
1414 else
1415 {
1416 *err = REG_ESPACE;
1417 return NULL;
1418 }
1419}
1420
1421/* Search for the state whose node_set is equivalent to NODES and
1422 whose context is equivalent to CONTEXT.
1423 Return the pointer to the state, if we found it in the DFA.
1424 Otherwise create the new one and return it. In case of an error
1425 return NULL and set the error code in ERR.
1426 Note: - We assume NULL as the invalid state, then it is possible that
1427 return value is NULL and ERR is REG_NOERROR.
1428 - We never return non-NULL value in case of any errors, it is for
1429 optimization. */
1430
1431static re_dfastate_t*
1432internal_function
1433re_acquire_state_context (reg_errcode_t *err, re_dfa_t *dfa,
1434 const re_node_set *nodes, unsigned int context)
1435{
1436 re_hashval_t hash;
1437 re_dfastate_t *new_state;
1438 struct re_state_table_entry *spot;
1439 Idx i;
1440#ifdef lint
1441 /* Suppress bogus uninitialized-variable warnings. */
1442 *err = REG_NOERROR;
1443#endif
1444 if (nodes->nelem == 0)
1445 {
1446 *err = REG_NOERROR;
1447 return NULL;
1448 }
1449 hash = calc_state_hash (nodes, context);
1450 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1451
1452 for (i = 0 ; i < spot->num ; i++)
1453 {
1454 re_dfastate_t *state = spot->array[i];
1455 if (state->hash == hash
1456 && state->context == context
1457 && re_node_set_compare (state->entrance_nodes, nodes))
1458 return state;
1459 }
1460 /* There are no appropriate state in `dfa', create the new one. */
1461 new_state = create_cd_newstate (dfa, nodes, context, hash);
1462 if (BE (new_state != NULL, 1))
1463 return new_state;
1464 else
1465 {
1466 *err = REG_ESPACE;
1467 return NULL;
1468 }
1469}
1470
1471/* Finish initialization of the new state NEWSTATE, and using its hash value
1472 HASH put in the appropriate bucket of DFA's state table. Return value
1473 indicates the error code if failed. */
1474
1475static reg_errcode_t
1476internal_function
1477register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash)
1478{
1479 struct re_state_table_entry *spot;
1480 reg_errcode_t err;
1481 Idx i;
1482
1483 newstate->hash = hash;
1484 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1485 if (BE (err != REG_NOERROR, 0))
1486 return REG_ESPACE;
1487 for (i = 0; i < newstate->nodes.nelem; i++)
1488 {
1489 Idx elem = newstate->nodes.elems[i];
1490 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1491 {
1492 bool ok = re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1493 if (BE (! ok, 0))
1494 return REG_ESPACE;
1495 }
1496 }
1497
1498 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1499 if (BE (spot->alloc <= spot->num, 0))
1500 {
1501 Idx new_alloc = spot->num;
1502 re_dfastate_t **new_array = re_x2realloc (spot->array, re_dfastate_t *,
1503 &new_alloc);
1504 if (BE (new_array == NULL, 0))
1505 return REG_ESPACE;
1506 spot->array = new_array;
1507 spot->alloc = new_alloc;
1508 }
1509 spot->array[spot->num++] = newstate;
1510 return REG_NOERROR;
1511}
1512
1513/* Create the new state which is independ of contexts.
1514 Return the new state if succeeded, otherwise return NULL. */
1515
1516static re_dfastate_t *
1517internal_function
1518create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1519 re_hashval_t hash)
1520{
1521 Idx i;
1522 reg_errcode_t err;
1523 re_dfastate_t *newstate;
1524
1525 newstate = re_calloc (re_dfastate_t, 1);
1526 if (BE (newstate == NULL, 0))
1527 return NULL;
1528 err = re_node_set_init_copy (&newstate->nodes, nodes);
1529 if (BE (err != REG_NOERROR, 0))
1530 {
1531 re_free (newstate);
1532 return NULL;
1533 }
1534
1535 newstate->entrance_nodes = &newstate->nodes;
1536 for (i = 0 ; i < nodes->nelem ; i++)
1537 {
1538 re_token_t *node = dfa->nodes + nodes->elems[i];
1539 re_token_type_t type = node->type;
1540 if (type == CHARACTER && !node->constraint)
1541 continue;
1542#ifdef RE_ENABLE_I18N
1543 newstate->accept_mb |= node->accept_mb;
1544#endif /* RE_ENABLE_I18N */
1545
1546 /* If the state has the halt node, the state is a halt state. */
1547 if (type == END_OF_RE)
1548 newstate->halt = 1;
1549 else if (type == OP_BACK_REF)
1550 newstate->has_backref = 1;
1551 else if (type == ANCHOR || node->constraint)
1552 newstate->has_constraint = 1;
1553 }
1554 err = register_state (dfa, newstate, hash);
1555 if (BE (err != REG_NOERROR, 0))
1556 {
1557 free_state (newstate);
1558 newstate = NULL;
1559 }
1560 return newstate;
1561}
1562
1563/* Create the new state which is depend on the context CONTEXT.
1564 Return the new state if succeeded, otherwise return NULL. */
1565
1566static re_dfastate_t *
1567internal_function
1568create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1569 unsigned int context, re_hashval_t hash)
1570{
1571 Idx i, nctx_nodes = 0;
1572 reg_errcode_t err;
1573 re_dfastate_t *newstate;
1574
1575 newstate = re_calloc (re_dfastate_t, 1);
1576 if (BE (newstate == NULL, 0))
1577 return NULL;
1578 err = re_node_set_init_copy (&newstate->nodes, nodes);
1579 if (BE (err != REG_NOERROR, 0))
1580 {
1581 re_free (newstate);
1582 return NULL;
1583 }
1584
1585 newstate->context = context;
1586 newstate->entrance_nodes = &newstate->nodes;
1587
1588 for (i = 0 ; i < nodes->nelem ; i++)
1589 {
1590 unsigned int constraint = 0;
1591 re_token_t *node = dfa->nodes + nodes->elems[i];
1592 re_token_type_t type = node->type;
1593 if (node->constraint)
1594 constraint = node->constraint;
1595
1596 if (type == CHARACTER && !constraint)
1597 continue;
1598#ifdef RE_ENABLE_I18N
1599 newstate->accept_mb |= node->accept_mb;
1600#endif /* RE_ENABLE_I18N */
1601
1602 /* If the state has the halt node, the state is a halt state. */
1603 if (type == END_OF_RE)
1604 newstate->halt = 1;
1605 else if (type == OP_BACK_REF)
1606 newstate->has_backref = 1;
1607 else if (type == ANCHOR)
1608 constraint = node->opr.ctx_type;
1609
1610 if (constraint)
1611 {
1612 if (newstate->entrance_nodes == &newstate->nodes)
1613 {
1614 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1615 if (BE (newstate->entrance_nodes == NULL, 0))
1616 {
1617 free_state (newstate);
1618 return NULL;
1619 }
1620 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1621 nctx_nodes = 0;
1622 newstate->has_constraint = 1;
1623 }
1624
1625 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1626 {
1627 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1628 ++nctx_nodes;
1629 }
1630 }
1631 }
1632 err = register_state (dfa, newstate, hash);
1633 if (BE (err != REG_NOERROR, 0))
1634 {
1635 free_state (newstate);
1636 newstate = NULL;
1637 }
1638 return newstate;
1639}
1640
1641static void
1642internal_function
1643free_state (re_dfastate_t *state)
1644{
1645 re_node_set_free (&state->non_eps_nodes);
1646 re_node_set_free (&state->inveclosure);
1647 if (state->entrance_nodes != &state->nodes)
1648 {
1649 re_node_set_free (state->entrance_nodes);
1650 re_free (state->entrance_nodes);
1651 }
1652 re_node_set_free (&state->nodes);
1653 re_free (state->word_trtable);
1654 re_free (state->trtable);
1655 re_free (state);
1656}
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
deleted file mode 100644
index a36ae4c8..00000000
--- a/lib/regex_internal.h
+++ /dev/null
@@ -1,911 +0,0 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifndef _REGEX_INTERNAL_H
21#define _REGEX_INTERNAL_H 1
22
23#include <assert.h>
24#include <ctype.h>
25#include <stdbool.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifndef _LIBC
31# include "strcase.h"
32#endif
33
34#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
35# include <langinfo.h>
36#endif
37#if defined HAVE_LOCALE_H || defined _LIBC
38# include <locale.h>
39#endif
40#if defined HAVE_WCHAR_H || defined _LIBC
41# include <wchar.h>
42#endif /* HAVE_WCHAR_H || _LIBC */
43#if defined HAVE_WCTYPE_H || defined _LIBC
44# include <wctype.h>
45#endif /* HAVE_WCTYPE_H || _LIBC */
46#if defined _LIBC
47# include <bits/libc-lock.h>
48#else
49# define __libc_lock_define(CLASS,NAME)
50# define __libc_lock_init(NAME) do { } while (0)
51# define __libc_lock_lock(NAME) do { } while (0)
52# define __libc_lock_unlock(NAME) do { } while (0)
53#endif
54
55/* In case that the system doesn't have isblank(). */
56#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
57# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
58#endif
59
60#ifdef _LIBC
61# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
62# define _RE_DEFINE_LOCALE_FUNCTIONS 1
63# include <locale/localeinfo.h>
64# include <locale/elem-hash.h>
65# include <locale/coll-lookup.h>
66# endif
67#endif
68
69/* This is for other GNU distributions with internationalized messages. */
70#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
71# include <libintl.h>
72# ifdef _LIBC
73# undef gettext
74# define gettext(msgid) \
75 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
76# endif
77#else
78# define gettext(msgid) (msgid)
79#endif
80
81#ifndef gettext_noop
82/* This define is so xgettext can find the internationalizable
83 strings. */
84# define gettext_noop(String) String
85#endif
86
87#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
88# define RE_ENABLE_I18N
89#endif
90
91#if __GNUC__ >= 3
92# define BE(expr, val) __builtin_expect (expr, val)
93#else
94# define BE(expr, val) (expr)
95#endif
96
97/* Number of single byte character. */
98#define SBC_MAX 256
99
100#define COLL_ELEM_LEN_MAX 8
101
102/* The character which represents newline. */
103#define NEWLINE_CHAR '\n'
104#define WIDE_NEWLINE_CHAR L'\n'
105
106/* Rename to standard API for using out of glibc. */
107#ifndef _LIBC
108# define __wctype wctype
109# define __iswctype iswctype
110# define __btowc btowc
111# ifndef __mempcpy
112# define __mempcpy mempcpy
113# endif
114# define __wcrtomb wcrtomb
115# define __regfree regfree
116# define attribute_hidden
117#endif /* not _LIBC */
118
119#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
120# define __attribute(arg) __attribute__ (arg)
121#else
122# define __attribute(arg)
123#endif
124
125extern const char __re_error_msgid[] attribute_hidden;
126extern const size_t __re_error_msgid_idx[] attribute_hidden;
127
128typedef __re_idx_t Idx;
129
130/* Special return value for failure to match. */
131#define REG_MISSING ((Idx) -1)
132
133/* Special return value for internal error. */
134#define REG_ERROR ((Idx) -2)
135
136/* Test whether N is a valid index, and is not one of the above. */
137#ifdef _REGEX_LARGE_OFFSETS
138# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
139#else
140# define REG_VALID_INDEX(n) (0 <= (n))
141#endif
142
143/* Test whether N is a valid nonzero index. */
144#ifdef _REGEX_LARGE_OFFSETS
145# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
146#else
147# define REG_VALID_NONZERO_INDEX(n) (0 < (n))
148#endif
149
150/* A hash value, suitable for computing hash tables. */
151typedef __re_size_t re_hashval_t;
152
153/* An integer used to represent a set of bits. It must be unsigned,
154 and must be at least as wide as unsigned int. */
155typedef unsigned long int bitset_word;
156
157/* Maximum value of a bitset word. It must be useful in preprocessor
158 contexts, and must be consistent with bitset_word. */
159#define BITSET_WORD_MAX ULONG_MAX
160
161/* Number of bits in a bitset word. Avoid greater-than-32-bit
162 integers and unconditional shifts by more than 31 bits, as they're
163 not portable. */
164#if BITSET_WORD_MAX == 0xffffffff
165# define BITSET_WORD_BITS 32
166#elif BITSET_WORD_MAX >> 31 >> 5 == 1
167# define BITSET_WORD_BITS 36
168#elif BITSET_WORD_MAX >> 31 >> 16 == 1
169# define BITSET_WORD_BITS 48
170#elif BITSET_WORD_MAX >> 31 >> 28 == 1
171# define BITSET_WORD_BITS 60
172#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
173# define BITSET_WORD_BITS 64
174#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
175# define BITSET_WORD_BITS 72
176#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
177# define BITSET_WORD_BITS 128
178#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
179# define BITSET_WORD_BITS 256
180#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
181# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
182# if BITSET_WORD_BITS <= SBC_MAX
183# error "Invalid SBC_MAX"
184# endif
185#else
186# error "Add case for new bitset_word size"
187#endif
188
189/* Number of bitset words in a bitset. */
190#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
191
192typedef bitset_word bitset[BITSET_WORDS];
193typedef bitset_word *re_bitset_ptr_t;
194typedef const bitset_word *re_const_bitset_ptr_t;
195
196#define PREV_WORD_CONSTRAINT 0x0001
197#define PREV_NOTWORD_CONSTRAINT 0x0002
198#define NEXT_WORD_CONSTRAINT 0x0004
199#define NEXT_NOTWORD_CONSTRAINT 0x0008
200#define PREV_NEWLINE_CONSTRAINT 0x0010
201#define NEXT_NEWLINE_CONSTRAINT 0x0020
202#define PREV_BEGBUF_CONSTRAINT 0x0040
203#define NEXT_ENDBUF_CONSTRAINT 0x0080
204#define WORD_DELIM_CONSTRAINT 0x0100
205#define NOT_WORD_DELIM_CONSTRAINT 0x0200
206
207typedef enum
208{
209 INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
210 WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
211 WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
212 INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
213 LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
214 LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
215 BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
216 BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
217 WORD_DELIM = WORD_DELIM_CONSTRAINT,
218 NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
219} re_context_type;
220
221typedef struct
222{
223 Idx alloc;
224 Idx nelem;
225 Idx *elems;
226} re_node_set;
227
228typedef enum
229{
230 NON_TYPE = 0,
231
232 /* Node type, These are used by token, node, tree. */
233 CHARACTER = 1,
234 END_OF_RE = 2,
235 SIMPLE_BRACKET = 3,
236 OP_BACK_REF = 4,
237 OP_PERIOD = 5,
238#ifdef RE_ENABLE_I18N
239 COMPLEX_BRACKET = 6,
240 OP_UTF8_PERIOD = 7,
241#endif /* RE_ENABLE_I18N */
242
243 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
244 when the debugger shows values of this enum type. */
245#define EPSILON_BIT 8
246 OP_OPEN_SUBEXP = EPSILON_BIT | 0,
247 OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
248 OP_ALT = EPSILON_BIT | 2,
249 OP_DUP_ASTERISK = EPSILON_BIT | 3,
250 ANCHOR = EPSILON_BIT | 4,
251
252 /* Tree type, these are used only by tree. */
253 CONCAT = 16,
254 SUBEXP = 17,
255
256 /* Token type, these are used only by token. */
257 OP_DUP_PLUS = 18,
258 OP_DUP_QUESTION,
259 OP_OPEN_BRACKET,
260 OP_CLOSE_BRACKET,
261 OP_CHARSET_RANGE,
262 OP_OPEN_DUP_NUM,
263 OP_CLOSE_DUP_NUM,
264 OP_NON_MATCH_LIST,
265 OP_OPEN_COLL_ELEM,
266 OP_CLOSE_COLL_ELEM,
267 OP_OPEN_EQUIV_CLASS,
268 OP_CLOSE_EQUIV_CLASS,
269 OP_OPEN_CHAR_CLASS,
270 OP_CLOSE_CHAR_CLASS,
271 OP_WORD,
272 OP_NOTWORD,
273 OP_SPACE,
274 OP_NOTSPACE,
275 BACK_SLASH
276
277} re_token_type_t;
278
279#ifdef RE_ENABLE_I18N
280typedef struct
281{
282 /* Multibyte characters. */
283 wchar_t *mbchars;
284
285 /* Collating symbols. */
286# ifdef _LIBC
287 int32_t *coll_syms;
288# endif
289
290 /* Equivalence classes. */
291# ifdef _LIBC
292 int32_t *equiv_classes;
293# endif
294
295 /* Range expressions. */
296# ifdef _LIBC
297 uint32_t *range_starts;
298 uint32_t *range_ends;
299# else /* not _LIBC */
300 wchar_t *range_starts;
301 wchar_t *range_ends;
302# endif /* not _LIBC */
303
304 /* Character classes. */
305 wctype_t *char_classes;
306
307 /* If this character set is the non-matching list. */
308 unsigned int non_match : 1;
309
310 /* # of multibyte characters. */
311 Idx nmbchars;
312
313 /* # of collating symbols. */
314 Idx ncoll_syms;
315
316 /* # of equivalence classes. */
317 Idx nequiv_classes;
318
319 /* # of range expressions. */
320 Idx nranges;
321
322 /* # of character classes. */
323 Idx nchar_classes;
324} re_charset_t;
325#endif /* RE_ENABLE_I18N */
326
327typedef struct
328{
329 union
330 {
331 unsigned char c; /* for CHARACTER */
332 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
333#ifdef RE_ENABLE_I18N
334 re_charset_t *mbcset; /* for COMPLEX_BRACKET */
335#endif /* RE_ENABLE_I18N */
336 Idx idx; /* for BACK_REF */
337 re_context_type ctx_type; /* for ANCHOR */
338 } opr;
339#if __GNUC__ >= 2
340 re_token_type_t type : 8;
341#else
342 re_token_type_t type;
343#endif
344 unsigned int constraint : 10; /* context constraint */
345 unsigned int duplicated : 1;
346 unsigned int opt_subexp : 1;
347#ifdef RE_ENABLE_I18N
348 unsigned int accept_mb : 1;
349 /* These 2 bits can be moved into the union if needed (e.g. if running out
350 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
351 unsigned int mb_partial : 1;
352#endif
353 unsigned int word_char : 1;
354} re_token_t;
355
356#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
357
358struct re_string_t
359{
360 /* Indicate the raw buffer which is the original string passed as an
361 argument of regexec(), re_search(), etc.. */
362 const unsigned char *raw_mbs;
363 /* Store the multibyte string. In case of "case insensitive mode" like
364 REG_ICASE, upper cases of the string are stored, otherwise MBS points
365 the same address that RAW_MBS points. */
366 unsigned char *mbs;
367#ifdef RE_ENABLE_I18N
368 /* Store the wide character string which is corresponding to MBS. */
369 wint_t *wcs;
370 Idx *offsets;
371 mbstate_t cur_state;
372#endif
373 /* Index in RAW_MBS. Each character mbs[i] corresponds to
374 raw_mbs[raw_mbs_idx + i]. */
375 Idx raw_mbs_idx;
376 /* The length of the valid characters in the buffers. */
377 Idx valid_len;
378 /* The corresponding number of bytes in raw_mbs array. */
379 Idx valid_raw_len;
380 /* The length of the buffers MBS and WCS. */
381 Idx bufs_len;
382 /* The index in MBS, which is updated by re_string_fetch_byte. */
383 Idx cur_idx;
384 /* length of RAW_MBS array. */
385 Idx raw_len;
386 /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
387 Idx len;
388 /* End of the buffer may be shorter than its length in the cases such
389 as re_match_2, re_search_2. Then, we use STOP for end of the buffer
390 instead of LEN. */
391 Idx raw_stop;
392 /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
393 Idx stop;
394
395 /* The context of mbs[0]. We store the context independently, since
396 the context of mbs[0] may be different from raw_mbs[0], which is
397 the beginning of the input string. */
398 unsigned int tip_context;
399 /* The translation passed as a part of an argument of re_compile_pattern. */
400 unsigned REG_TRANSLATE_TYPE trans;
401 /* Copy of re_dfa_t's word_char. */
402 re_const_bitset_ptr_t word_char;
403 /* true if REG_ICASE. */
404 unsigned char icase;
405 unsigned char is_utf8;
406 unsigned char map_notascii;
407 unsigned char mbs_allocated;
408 unsigned char offsets_needed;
409 unsigned char newline_anchor;
410 unsigned char word_ops_used;
411 int mb_cur_max;
412};
413typedef struct re_string_t re_string_t;
414
415
416struct re_dfa_t;
417typedef struct re_dfa_t re_dfa_t;
418
419#ifndef _LIBC
420# ifdef __i386__
421# define internal_function __attribute ((regparm (3), stdcall))
422# else
423# define internal_function
424# endif
425#endif
426
427static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
428 Idx new_buf_len)
429 internal_function;
430#ifdef RE_ENABLE_I18N
431static void build_wcs_buffer (re_string_t *pstr) internal_function;
432static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
433 internal_function;
434#endif /* RE_ENABLE_I18N */
435static void build_upper_buffer (re_string_t *pstr) internal_function;
436static void re_string_translate_buffer (re_string_t *pstr) internal_function;
437static unsigned int re_string_context_at (const re_string_t *input,
438 Idx idx, int eflags)
439 internal_function __attribute ((pure));
440
441#define re_string_peek_byte(pstr, offset) \
442 ((pstr)->mbs[(pstr)->cur_idx + offset])
443#define re_string_fetch_byte(pstr) \
444 ((pstr)->mbs[(pstr)->cur_idx++])
445#define re_string_first_byte(pstr, idx) \
446 ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
447#define re_string_is_single_byte_char(pstr, idx) \
448 ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
449 || (pstr)->wcs[(idx) + 1] != WEOF))
450#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
451#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
452#define re_string_get_buffer(pstr) ((pstr)->mbs)
453#define re_string_length(pstr) ((pstr)->len)
454#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
455#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
456#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
457
458#include <alloca.h>
459
460#ifndef _LIBC
461# if HAVE_ALLOCA
462/* The OS usually guarantees only one guard page at the bottom of the stack,
463 and a page size can be as small as 4096 bytes. So we cannot safely
464 allocate anything larger than 4096 bytes. Also care for the possibility
465 of a few compiler-allocated temporary stack slots. */
466# define __libc_use_alloca(n) ((n) < 4032)
467# else
468/* alloca is implemented with malloc, so just use malloc. */
469# define __libc_use_alloca(n) 0
470# endif
471#endif
472
473#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
474#define re_xmalloc(t,n) ((t *) re_xnmalloc (n, sizeof (t)))
475#define re_calloc(t,n) ((t *) calloc (n, sizeof (t)))
476#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
477#define re_xrealloc(p,t,n) ((t *) re_xnrealloc (p, n, sizeof (t)))
478#define re_x2realloc(p,t,pn) ((t *) re_x2nrealloc (p, pn, sizeof (t)))
479#define re_free(p) free (p)
480
481#ifndef SIZE_MAX
482# define SIZE_MAX ((size_t) -1)
483#endif
484
485/* Return true if an array of N objects, each of size S, cannot exist
486 due to size arithmetic overflow. S must be nonzero. */
487static inline bool
488re_alloc_oversized (size_t n, size_t s)
489{
490 return BE (SIZE_MAX / s < n, 0);
491}
492
493/* Return true if an array of (2 * N + 1) objects, each of size S,
494 cannot exist due to size arithmetic overflow. S must be nonzero. */
495static inline bool
496re_x2alloc_oversized (size_t n, size_t s)
497{
498 return BE ((SIZE_MAX / s - 1) / 2 < n, 0);
499}
500
501/* Allocate an array of N objects, each with S bytes of memory,
502 dynamically, with error checking. S must be nonzero. */
503static inline void *
504re_xnmalloc (size_t n, size_t s)
505{
506 return re_alloc_oversized (n, s) ? NULL : malloc (n * s);
507}
508
509/* Change the size of an allocated block of memory P to an array of N
510 objects each of S bytes, with error checking. S must be nonzero. */
511static inline void *
512re_xnrealloc (void *p, size_t n, size_t s)
513{
514 return re_alloc_oversized (n, s) ? NULL : realloc (p, n * s);
515}
516
517/* Reallocate a block of memory P to an array of (2 * (*PN) + 1)
518 objects each of S bytes, with error checking. S must be nonzero.
519 If the allocation is successful, set *PN to the new allocation
520 count and return the resulting pointer. Otherwise, return
521 NULL. */
522static inline void *
523re_x2nrealloc (void *p, size_t *pn, size_t s)
524{
525 if (re_x2alloc_oversized (*pn, s))
526 return NULL;
527 else
528 {
529 /* Add 1 in case *PN is zero. */
530 size_t n1 = 2 * *pn + 1;
531 p = realloc (p, n1 * s);
532 if (BE (p != NULL, 1))
533 *pn = n1;
534 return p;
535 }
536}
537
538struct bin_tree_t
539{
540 struct bin_tree_t *parent;
541 struct bin_tree_t *left;
542 struct bin_tree_t *right;
543 struct bin_tree_t *first;
544 struct bin_tree_t *next;
545
546 re_token_t token;
547
548 /* `node_idx' is the index in dfa->nodes, if `type' == 0.
549 Otherwise `type' indicate the type of this node. */
550 Idx node_idx;
551};
552typedef struct bin_tree_t bin_tree_t;
553
554#define BIN_TREE_STORAGE_SIZE \
555 ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
556
557struct bin_tree_storage_t
558{
559 struct bin_tree_storage_t *next;
560 bin_tree_t data[BIN_TREE_STORAGE_SIZE];
561};
562typedef struct bin_tree_storage_t bin_tree_storage_t;
563
564#define CONTEXT_WORD 1
565#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
566#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
567#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
568
569#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
570#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
571#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
572#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
573#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
574
575#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
576#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
577#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
578#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
579
580#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
581 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
582 || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
583 || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
584 || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
585
586#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
587 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
588 || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
589 || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
590 || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
591
592struct re_dfastate_t
593{
594 re_hashval_t hash;
595 re_node_set nodes;
596 re_node_set non_eps_nodes;
597 re_node_set inveclosure;
598 re_node_set *entrance_nodes;
599 struct re_dfastate_t **trtable, **word_trtable;
600 unsigned int context : 4;
601 unsigned int halt : 1;
602 /* If this state can accept `multi byte'.
603 Note that we refer to multibyte characters, and multi character
604 collating elements as `multi byte'. */
605 unsigned int accept_mb : 1;
606 /* If this state has backreference node(s). */
607 unsigned int has_backref : 1;
608 unsigned int has_constraint : 1;
609};
610typedef struct re_dfastate_t re_dfastate_t;
611
612struct re_state_table_entry
613{
614 Idx num;
615 Idx alloc;
616 re_dfastate_t **array;
617};
618
619/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
620
621typedef struct
622{
623 Idx next_idx;
624 Idx alloc;
625 re_dfastate_t **array;
626} state_array_t;
627
628/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
629
630typedef struct
631{
632 Idx node;
633 Idx str_idx; /* The position NODE match at. */
634 state_array_t path;
635} re_sub_match_last_t;
636
637/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
638 And information about the node, whose type is OP_CLOSE_SUBEXP,
639 corresponding to NODE is stored in LASTS. */
640
641typedef struct
642{
643 Idx str_idx;
644 Idx node;
645 state_array_t *path;
646 Idx alasts; /* Allocation size of LASTS. */
647 Idx nlasts; /* The number of LASTS. */
648 re_sub_match_last_t **lasts;
649} re_sub_match_top_t;
650
651struct re_backref_cache_entry
652{
653 Idx node;
654 Idx str_idx;
655 Idx subexp_from;
656 Idx subexp_to;
657 char more;
658 char unused;
659 unsigned short int eps_reachable_subexps_map;
660};
661
662typedef struct
663{
664 /* The string object corresponding to the input string. */
665 re_string_t input;
666#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
667 re_dfa_t *const dfa;
668#else
669 re_dfa_t *dfa;
670#endif
671 /* EFLAGS of the argument of regexec. */
672 int eflags;
673 /* Where the matching ends. */
674 Idx match_last;
675 Idx last_node;
676 /* The state log used by the matcher. */
677 re_dfastate_t **state_log;
678 Idx state_log_top;
679 /* Back reference cache. */
680 Idx nbkref_ents;
681 Idx abkref_ents;
682 struct re_backref_cache_entry *bkref_ents;
683 int max_mb_elem_len;
684 Idx nsub_tops;
685 Idx asub_tops;
686 re_sub_match_top_t **sub_tops;
687} re_match_context_t;
688
689typedef struct
690{
691 re_dfastate_t **sifted_states;
692 re_dfastate_t **limited_states;
693 Idx last_node;
694 Idx last_str_idx;
695 re_node_set limits;
696} re_sift_context_t;
697
698struct re_fail_stack_ent_t
699{
700 Idx idx;
701 Idx node;
702 regmatch_t *regs;
703 re_node_set eps_via_nodes;
704};
705
706struct re_fail_stack_t
707{
708 Idx num;
709 Idx alloc;
710 struct re_fail_stack_ent_t *stack;
711};
712
713struct re_dfa_t
714{
715 re_token_t *nodes;
716 Idx nodes_alloc;
717 Idx nodes_len;
718 Idx *nexts;
719 Idx *org_indices;
720 re_node_set *edests;
721 re_node_set *eclosures;
722 re_node_set *inveclosures;
723 struct re_state_table_entry *state_table;
724 re_dfastate_t *init_state;
725 re_dfastate_t *init_state_word;
726 re_dfastate_t *init_state_nl;
727 re_dfastate_t *init_state_begbuf;
728 bin_tree_t *str_tree;
729 bin_tree_storage_t *str_tree_storage;
730 re_bitset_ptr_t sb_char;
731 int str_tree_storage_idx;
732
733 /* number of subexpressions `re_nsub' is in regex_t. */
734 re_hashval_t state_hash_mask;
735 Idx init_node;
736 Idx nbackref; /* The number of backreference in this dfa. */
737
738 /* Bitmap expressing which backreference is used. */
739 bitset_word used_bkref_map;
740 bitset_word completed_bkref_map;
741
742 unsigned int has_plural_match : 1;
743 /* If this dfa has "multibyte node", which is a backreference or
744 a node which can accept multibyte character or multi character
745 collating element. */
746 unsigned int has_mb_node : 1;
747 unsigned int is_utf8 : 1;
748 unsigned int map_notascii : 1;
749 unsigned int word_ops_used : 1;
750 int mb_cur_max;
751 bitset word_char;
752 reg_syntax_t syntax;
753 Idx *subexp_map;
754#ifdef DEBUG
755 char* re_str;
756#endif
757 __libc_lock_define (, lock)
758};
759
760#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
761#define re_node_set_remove(set,id) \
762 (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
763#define re_node_set_empty(p) ((p)->nelem = 0)
764#define re_node_set_free(set) re_free ((set)->elems)
765
766static void free_state (re_dfastate_t *state) internal_function;
767
768
769typedef enum
770{
771 SB_CHAR,
772 MB_CHAR,
773 EQUIV_CLASS,
774 COLL_SYM,
775 CHAR_CLASS
776} bracket_elem_type;
777
778typedef struct
779{
780 bracket_elem_type type;
781 union
782 {
783 unsigned char ch;
784 unsigned char *name;
785 wchar_t wch;
786 } opr;
787} bracket_elem_t;
788
789
790/* Inline functions for bitset operation. */
791
792static inline void
793bitset_set (bitset set, Idx i)
794{
795 set[i / BITSET_WORD_BITS] |= (bitset_word) 1 << i % BITSET_WORD_BITS;
796}
797
798static inline void
799bitset_clear (bitset set, Idx i)
800{
801 set[i / BITSET_WORD_BITS] &= ~ ((bitset_word) 1 << i % BITSET_WORD_BITS);
802}
803
804static inline bool
805bitset_contain (const bitset set, Idx i)
806{
807 return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
808}
809
810static inline void
811bitset_empty (bitset set)
812{
813 memset (set, 0, sizeof (bitset));
814}
815
816static inline void
817bitset_set_all (bitset set)
818{
819 memset (set, -1, sizeof (bitset_word) * (SBC_MAX / BITSET_WORD_BITS));
820 if (SBC_MAX % BITSET_WORD_BITS != 0)
821 set[BITSET_WORDS - 1] =
822 ((bitset_word) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
823}
824
825static inline void
826bitset_copy (bitset dest, const bitset src)
827{
828 memcpy (dest, src, sizeof (bitset));
829}
830
831static inline void
832bitset_not (bitset set)
833{
834 int i;
835 for (i = 0; i < SBC_MAX / BITSET_WORD_BITS; ++i)
836 set[i] = ~set[i];
837 if (SBC_MAX % BITSET_WORD_BITS != 0)
838 set[BITSET_WORDS - 1] =
839 ((((bitset_word) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
840 & ~set[BITSET_WORDS - 1]);
841}
842
843static inline void
844bitset_merge (bitset dest, const bitset src)
845{
846 int i;
847 for (i = 0; i < BITSET_WORDS; ++i)
848 dest[i] |= src[i];
849}
850
851static inline void
852bitset_mask (bitset dest, const bitset src)
853{
854 int i;
855 for (i = 0; i < BITSET_WORDS; ++i)
856 dest[i] &= src[i];
857}
858
859#if defined RE_ENABLE_I18N
860/* Inline functions for re_string. */
861static inline int
862internal_function __attribute ((pure))
863re_string_char_size_at (const re_string_t *pstr, Idx idx)
864{
865 int byte_idx;
866 if (pstr->mb_cur_max == 1)
867 return 1;
868 for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
869 if (pstr->wcs[idx + byte_idx] != WEOF)
870 break;
871 return byte_idx;
872}
873
874static inline wint_t
875internal_function __attribute ((pure))
876re_string_wchar_at (const re_string_t *pstr, Idx idx)
877{
878 if (pstr->mb_cur_max == 1)
879 return (wint_t) pstr->mbs[idx];
880 return (wint_t) pstr->wcs[idx];
881}
882
883static int
884internal_function __attribute ((pure))
885re_string_elem_size_at (const re_string_t *pstr, Idx idx)
886{
887#ifdef _LIBC
888 const unsigned char *p, *extra;
889 const int32_t *table, *indirect;
890 int32_t tmp;
891# include <locale/weight.h>
892 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
893
894 if (nrules != 0)
895 {
896 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
897 extra = (const unsigned char *)
898 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
899 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
900 _NL_COLLATE_INDIRECTMB);
901 p = pstr->mbs + idx;
902 tmp = findidx (&p);
903 return p - pstr->mbs - idx;
904 }
905 else
906#endif /* _LIBC */
907 return 1;
908}
909#endif /* RE_ENABLE_I18N */
910
911#endif /* _REGEX_INTERNAL_H */
diff --git a/lib/regexec.c b/lib/regexec.c
deleted file mode 100644
index a85077c9..00000000
--- a/lib/regexec.c
+++ /dev/null
@@ -1,4333 +0,0 @@
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 Idx n) internal_function;
22static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23static void match_ctx_free (re_match_context_t *cache) internal_function;
24static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 Idx str_idx, Idx from, Idx to)
26 internal_function;
27static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28 internal_function;
29static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30 Idx str_idx) internal_function;
31static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32 Idx node, Idx str_idx)
33 internal_function;
34static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 re_dfastate_t **limited_sts, Idx last_node,
36 Idx last_str_idx)
37 internal_function;
38static reg_errcode_t re_search_internal (const regex_t *preg,
39 const char *string, Idx length,
40 Idx start, Idx last_start, Idx stop,
41 size_t nmatch, regmatch_t pmatch[],
42 int eflags) internal_function;
43static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44 const char *string1, Idx length1,
45 const char *string2, Idx length2,
46 Idx start, regoff_t range,
47 struct re_registers *regs,
48 Idx stop, bool ret_len) internal_function;
49static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50 const char *string, Idx length, Idx start,
51 regoff_t range, Idx stop,
52 struct re_registers *regs,
53 bool ret_len) internal_function;
54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55 Idx nregs, int regs_allocated) internal_function;
56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57 internal_function;
58static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59 Idx *p_match_first)
60 internal_function;
61static Idx check_halt_state_context (const re_match_context_t *mctx,
62 const re_dfastate_t *state, Idx idx)
63 internal_function;
64static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
65 regmatch_t *prev_idx_match, Idx cur_node,
66 Idx cur_idx, Idx nmatch) internal_function;
67static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
68 Idx str_idx, Idx dest_node, Idx nregs,
69 regmatch_t *regs,
70 re_node_set *eps_via_nodes) internal_function;
71static reg_errcode_t set_regs (const regex_t *preg,
72 const re_match_context_t *mctx,
73 size_t nmatch, regmatch_t *pmatch,
74 bool fl_backtrack) internal_function;
75static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
76
77#ifdef RE_ENABLE_I18N
78static int sift_states_iter_mb (const re_match_context_t *mctx,
79 re_sift_context_t *sctx,
80 Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function;
81#endif /* RE_ENABLE_I18N */
82static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
83 re_sift_context_t *sctx) internal_function;
84static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
85 re_sift_context_t *sctx, Idx str_idx,
86 re_node_set *cur_dest) internal_function;
87static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
88 re_sift_context_t *sctx,
89 Idx str_idx,
90 re_node_set *dest_nodes) internal_function;
91static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
92 re_node_set *dest_nodes,
93 const re_node_set *candidates) internal_function;
94static bool check_dst_limits (const re_match_context_t *mctx,
95 const re_node_set *limits,
96 Idx dst_node, Idx dst_idx, Idx src_node,
97 Idx src_idx) internal_function;
98static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
99 int boundaries, Idx subexp_idx,
100 Idx from_node, Idx bkref_idx) internal_function;
101static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
102 Idx limit, Idx subexp_idx,
103 Idx node, Idx str_idx,
104 Idx bkref_idx) internal_function;
105static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
106 re_node_set *dest_nodes,
107 const re_node_set *candidates,
108 re_node_set *limits,
109 struct re_backref_cache_entry *bkref_ents,
110 Idx str_idx) internal_function;
111static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
112 re_sift_context_t *sctx,
113 Idx str_idx, const re_node_set *candidates) internal_function;
114static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
115 re_dfastate_t **src, Idx num) internal_function;
116static re_dfastate_t *find_recover_state (reg_errcode_t *err,
117 re_match_context_t *mctx) internal_function;
118static re_dfastate_t *transit_state (reg_errcode_t *err,
119 re_match_context_t *mctx,
120 re_dfastate_t *state) internal_function;
121static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
122 re_match_context_t *mctx,
123 re_dfastate_t *next_state) internal_function;
124static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
125 re_node_set *cur_nodes,
126 Idx str_idx) internal_function;
127#if 0
128static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
129 re_match_context_t *mctx,
130 re_dfastate_t *pstate) internal_function;
131#endif
132#ifdef RE_ENABLE_I18N
133static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
134 re_dfastate_t *pstate) internal_function;
135#endif /* RE_ENABLE_I18N */
136static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
137 const re_node_set *nodes) internal_function;
138static reg_errcode_t get_subexp (re_match_context_t *mctx,
139 Idx bkref_node, Idx bkref_str_idx) internal_function;
140static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
141 const re_sub_match_top_t *sub_top,
142 re_sub_match_last_t *sub_last,
143 Idx bkref_node, Idx bkref_str) internal_function;
144static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
145 Idx subexp_idx, int type) internal_function;
146static reg_errcode_t check_arrival (re_match_context_t *mctx,
147 state_array_t *path, Idx top_node,
148 Idx top_str, Idx last_node, Idx last_str,
149 int type) internal_function;
150static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
151 Idx str_idx,
152 re_node_set *cur_nodes,
153 re_node_set *next_nodes) internal_function;
154static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
155 re_node_set *cur_nodes,
156 Idx ex_subexp, int type) internal_function;
157static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
158 re_node_set *dst_nodes,
159 Idx target, Idx ex_subexp,
160 int type) internal_function;
161static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
162 re_node_set *cur_nodes, Idx cur_str,
163 Idx subexp_num, int type) internal_function;
164static bool build_trtable (re_dfa_t *dfa,
165 re_dfastate_t *state) internal_function;
166#ifdef RE_ENABLE_I18N
167static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
168 const re_string_t *input, Idx idx) internal_function;
169# ifdef _LIBC
170static unsigned int find_collation_sequence_value (const unsigned char *mbs,
171 size_t name_len) internal_function;
172# endif /* _LIBC */
173#endif /* RE_ENABLE_I18N */
174static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
175 const re_dfastate_t *state,
176 re_node_set *states_node,
177 bitset *states_ch) internal_function;
178static bool check_node_accept (const re_match_context_t *mctx,
179 const re_token_t *node, Idx idx)
180 internal_function;
181static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
182
183/* Entry point for POSIX code. */
184
185/* regexec searches for a given pattern, specified by PREG, in the
186 string STRING.
187
188 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
189 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
190 least NMATCH elements, and we set them to the offsets of the
191 corresponding matched substrings.
192
193 EFLAGS specifies `execution flags' which affect matching: if
194 REG_NOTBOL is set, then ^ does not match at the beginning of the
195 string; if REG_NOTEOL is set, then $ does not match at the end.
196
197 We return 0 if we find a match and REG_NOMATCH if not. */
198
199int
200regexec (const regex_t *__restrict preg, const char *__restrict string,
201 size_t nmatch, regmatch_t pmatch[], int eflags)
202{
203 reg_errcode_t err;
204 Idx start, length;
205#ifdef _LIBC
206 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
207#endif
208
209 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
210 return REG_BADPAT;
211
212 if (eflags & REG_STARTEND)
213 {
214 start = pmatch[0].rm_so;
215 length = pmatch[0].rm_eo;
216 }
217 else
218 {
219 start = 0;
220 length = strlen (string);
221 }
222
223 __libc_lock_lock (dfa->lock);
224 if (preg->re_no_sub)
225 err = re_search_internal (preg, string, length, start, length,
226 length, 0, NULL, eflags);
227 else
228 err = re_search_internal (preg, string, length, start, length,
229 length, nmatch, pmatch, eflags);
230 __libc_lock_unlock (dfa->lock);
231 return err != REG_NOERROR;
232}
233
234#ifdef _LIBC
235# include <shlib-compat.h>
236versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
237
238# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
239__typeof__ (__regexec) __compat_regexec;
240
241int
242attribute_compat_text_section
243__compat_regexec (const regex_t *__restrict preg,
244 const char *__restrict string, size_t nmatch,
245 regmatch_t pmatch[], int eflags)
246{
247 return regexec (preg, string, nmatch, pmatch,
248 eflags & (REG_NOTBOL | REG_NOTEOL));
249}
250compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
251# endif
252#endif
253
254/* Entry points for GNU code. */
255
256/* re_match, re_search, re_match_2, re_search_2
257
258 The former two functions operate on STRING with length LENGTH,
259 while the later two operate on concatenation of STRING1 and STRING2
260 with lengths LENGTH1 and LENGTH2, respectively.
261
262 re_match() matches the compiled pattern in BUFP against the string,
263 starting at index START.
264
265 re_search() first tries matching at index START, then it tries to match
266 starting from index START + 1, and so on. The last start position tried
267 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
268 way as re_match().)
269
270 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
271 the first STOP characters of the concatenation of the strings should be
272 concerned.
273
274 If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
275 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
276 computed relative to the concatenation, not relative to the individual
277 strings.)
278
279 On success, re_match* functions return the length of the match, re_search*
280 return the position of the start of the match. Return value -1 means no
281 match was found and -2 indicates an internal error. */
282
283regoff_t
284re_match (struct re_pattern_buffer *bufp, const char *string,
285 Idx length, Idx start, struct re_registers *regs)
286{
287 return re_search_stub (bufp, string, length, start, 0, length, regs, true);
288}
289#ifdef _LIBC
290weak_alias (__re_match, re_match)
291#endif
292
293regoff_t
294re_search (struct re_pattern_buffer *bufp, const char *string,
295 Idx length, Idx start, regoff_t range, struct re_registers *regs)
296{
297 return re_search_stub (bufp, string, length, start, range, length, regs,
298 false);
299}
300#ifdef _LIBC
301weak_alias (__re_search, re_search)
302#endif
303
304regoff_t
305re_match_2 (struct re_pattern_buffer *bufp,
306 const char *string1, Idx length1,
307 const char *string2, Idx length2,
308 Idx start, struct re_registers *regs, Idx stop)
309{
310 return re_search_2_stub (bufp, string1, length1, string2, length2,
311 start, 0, regs, stop, true);
312}
313#ifdef _LIBC
314weak_alias (__re_match_2, re_match_2)
315#endif
316
317regoff_t
318re_search_2 (struct re_pattern_buffer *bufp,
319 const char *string1, Idx length1,
320 const char *string2, Idx length2,
321 Idx start, regoff_t range, struct re_registers *regs, Idx stop)
322{
323 return re_search_2_stub (bufp, string1, length1, string2, length2,
324 start, range, regs, stop, false);
325}
326#ifdef _LIBC
327weak_alias (__re_search_2, re_search_2)
328#endif
329
330static regoff_t
331internal_function
332re_search_2_stub (struct re_pattern_buffer *bufp,
333 const char *string1, Idx length1,
334 const char *string2, Idx length2,
335 Idx start, regoff_t range, struct re_registers *regs,
336 Idx stop, bool ret_len)
337{
338 const char *str;
339 regoff_t rval;
340 Idx len = length1 + length2;
341 char *s = NULL;
342
343 if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
344 return -2;
345
346 /* Concatenate the strings. */
347 if (length2 > 0)
348 if (length1 > 0)
349 {
350 s = re_malloc (char, len);
351
352 if (BE (s == NULL, 0))
353 return -2;
354 memcpy (s, string1, length1);
355 memcpy (s + length1, string2, length2);
356 str = s;
357 }
358 else
359 str = string2;
360 else
361 str = string1;
362
363 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
364 ret_len);
365 re_free (s);
366 return rval;
367}
368
369/* The parameters have the same meaning as those of re_search.
370 Additional parameters:
371 If RET_LEN is true the length of the match is returned (re_match style);
372 otherwise the position of the match is returned. */
373
374static regoff_t
375internal_function
376re_search_stub (struct re_pattern_buffer *bufp,
377 const char *string, Idx length,
378 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
379 bool ret_len)
380{
381 reg_errcode_t result;
382 regmatch_t *pmatch;
383 Idx nregs;
384 regoff_t rval;
385 int eflags = 0;
386#ifdef _LIBC
387 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
388#endif
389 Idx last_start = start + range;
390
391 /* Check for out-of-range. */
392 if (BE (start < 0 || start > length, 0))
393 return -1;
394 if (sizeof start < sizeof range)
395 {
396 regoff_t length_offset = length;
397 regoff_t start_offset = start;
398 if (BE (length_offset - start_offset < range, 0))
399 last_start = length;
400 else if (BE (range < - start_offset, 0))
401 last_start = 0;
402 }
403 else
404 {
405 if (BE ((last_start < start) != (range < 0), 0))
406 {
407 /* Overflow occurred when computing last_start; substitute
408 the extreme value. */
409 last_start = range < 0 ? 0 : length;
410 }
411 else
412 {
413 if (BE (length < last_start, 0))
414 last_start = length;
415 else if (BE (last_start < 0, 0))
416 last_start = 0;
417 }
418 }
419
420 __libc_lock_lock (dfa->lock);
421
422 eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
423 eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
424
425 /* Compile fastmap if we haven't yet. */
426 if (start < last_start && bufp->re_fastmap != NULL
427 && !bufp->re_fastmap_accurate)
428 re_compile_fastmap (bufp);
429
430 if (BE (bufp->re_no_sub, 0))
431 regs = NULL;
432
433 /* We need at least 1 register. */
434 if (regs == NULL)
435 nregs = 1;
436 else if (BE (bufp->re_regs_allocated == REG_FIXED
437 && regs->rm_num_regs <= bufp->re_nsub, 0))
438 {
439 nregs = regs->rm_num_regs;
440 if (BE (nregs < 1, 0))
441 {
442 /* Nothing can be copied to regs. */
443 regs = NULL;
444 nregs = 1;
445 }
446 }
447 else
448 nregs = bufp->re_nsub + 1;
449 pmatch = re_xmalloc (regmatch_t, nregs);
450 if (BE (pmatch == NULL, 0))
451 {
452 rval = -2;
453 goto out;
454 }
455
456 result = re_search_internal (bufp, string, length, start, last_start, stop,
457 nregs, pmatch, eflags);
458
459 rval = 0;
460
461 /* I hope we needn't fill ther regs with -1's when no match was found. */
462 if (result != REG_NOERROR)
463 rval = -1;
464 else if (regs != NULL)
465 {
466 /* If caller wants register contents data back, copy them. */
467 bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
468 bufp->re_regs_allocated);
469 if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
470 rval = -2;
471 }
472
473 if (BE (rval == 0, 1))
474 {
475 if (ret_len)
476 {
477 assert (pmatch[0].rm_so == start);
478 rval = pmatch[0].rm_eo - start;
479 }
480 else
481 rval = pmatch[0].rm_so;
482 }
483 re_free (pmatch);
484 out:
485 __libc_lock_unlock (dfa->lock);
486 return rval;
487}
488
489static unsigned
490internal_function
491re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
492 int regs_allocated)
493{
494 int rval = REG_REALLOCATE;
495 Idx i;
496 Idx need_regs = nregs + 1;
497 /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
498 uses. */
499
500 /* Have the register data arrays been allocated? */
501 if (regs_allocated == REG_UNALLOCATED)
502 { /* No. So allocate them with malloc. */
503 regs->rm_start = re_xmalloc (regoff_t, need_regs);
504 regs->rm_end = re_malloc (regoff_t, need_regs);
505 if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
506 return REG_UNALLOCATED;
507 regs->rm_num_regs = need_regs;
508 }
509 else if (regs_allocated == REG_REALLOCATE)
510 { /* Yes. If we need more elements than were already
511 allocated, reallocate them. If we need fewer, just
512 leave it alone. */
513 if (BE (need_regs > regs->rm_num_regs, 0))
514 {
515 regoff_t *new_start =
516 re_xrealloc (regs->rm_start, regoff_t, need_regs);
517 regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
518 if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
519 return REG_UNALLOCATED;
520 regs->rm_start = new_start;
521 regs->rm_end = new_end;
522 regs->rm_num_regs = need_regs;
523 }
524 }
525 else
526 {
527 assert (regs_allocated == REG_FIXED);
528 /* This function may not be called with REG_FIXED and nregs too big. */
529 assert (regs->rm_num_regs >= nregs);
530 rval = REG_FIXED;
531 }
532
533 /* Copy the regs. */
534 for (i = 0; i < nregs; ++i)
535 {
536 regs->rm_start[i] = pmatch[i].rm_so;
537 regs->rm_end[i] = pmatch[i].rm_eo;
538 }
539 for ( ; i < regs->rm_num_regs; ++i)
540 regs->rm_start[i] = regs->rm_end[i] = -1;
541
542 return rval;
543}
544
545/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
546 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
547 this memory for recording register information. STARTS and ENDS
548 must be allocated using the malloc library routine, and must each
549 be at least NUM_REGS * sizeof (regoff_t) bytes long.
550
551 If NUM_REGS == 0, then subsequent matches should allocate their own
552 register data.
553
554 Unless this function is called, the first search or match using
555 PATTERN_BUFFER will allocate its own register data, without
556 freeing the old data. */
557
558void
559re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
560 __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
561{
562 if (num_regs)
563 {
564 bufp->re_regs_allocated = REG_REALLOCATE;
565 regs->rm_num_regs = num_regs;
566 regs->rm_start = starts;
567 regs->rm_end = ends;
568 }
569 else
570 {
571 bufp->re_regs_allocated = REG_UNALLOCATED;
572 regs->rm_num_regs = 0;
573 regs->rm_start = regs->rm_end = NULL;
574 }
575}
576#ifdef _LIBC
577weak_alias (__re_set_registers, re_set_registers)
578#endif
579
580/* Entry points compatible with 4.2 BSD regex library. We don't define
581 them unless specifically requested. */
582
583#if defined _REGEX_RE_COMP || defined _LIBC
584int
585# ifdef _LIBC
586weak_function
587# endif
588re_exec (const char *s)
589{
590 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
591}
592#endif /* _REGEX_RE_COMP */
593
594/* Internal entry point. */
595
596/* Searches for a compiled pattern PREG in the string STRING, whose
597 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
598 meaning as with regexec. LAST_START is START + RANGE, where
599 START and RANGE have the same meaning as with re_search.
600 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
601 otherwise return the error code.
602 Note: We assume front end functions already check ranges.
603 (0 <= LAST_START && LAST_START <= LENGTH) */
604
605static reg_errcode_t
606internal_function
607re_search_internal (const regex_t *preg,
608 const char *string, Idx length,
609 Idx start, Idx last_start, Idx stop,
610 size_t nmatch, regmatch_t pmatch[],
611 int eflags)
612{
613 reg_errcode_t err;
614 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
615 Idx left_lim, right_lim;
616 int incr;
617 bool fl_longest_match;
618 int match_kind;
619 Idx match_first, match_last = REG_MISSING;
620 Idx extra_nmatch;
621 bool sb;
622 int ch;
623#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
624 re_match_context_t mctx = { .dfa = dfa };
625#else
626 re_match_context_t mctx;
627#endif
628 char *fastmap = ((preg->re_fastmap != NULL && preg->re_fastmap_accurate
629 && start != last_start && !preg->re_can_be_null)
630 ? preg->re_fastmap : NULL);
631 unsigned REG_TRANSLATE_TYPE t =
632 (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
633
634#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
635 memset (&mctx, '\0', sizeof (re_match_context_t));
636 mctx.dfa = dfa;
637#endif
638
639 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
640 nmatch -= extra_nmatch;
641
642 /* Check if the DFA haven't been compiled. */
643 if (BE (preg->re_used == 0 || dfa->init_state == NULL
644 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
645 || dfa->init_state_begbuf == NULL, 0))
646 return REG_NOMATCH;
647
648#ifdef DEBUG
649 /* We assume front-end functions already check them. */
650 assert (0 <= last_start && last_start <= length);
651#endif
652
653 /* If initial states with non-begbuf contexts have no elements,
654 the regex must be anchored. If preg->re_newline_anchor is set,
655 we'll never use init_state_nl, so do not check it. */
656 if (dfa->init_state->nodes.nelem == 0
657 && dfa->init_state_word->nodes.nelem == 0
658 && (dfa->init_state_nl->nodes.nelem == 0
659 || !preg->re_newline_anchor))
660 {
661 if (start != 0 && last_start != 0)
662 return REG_NOMATCH;
663 start = last_start = 0;
664 }
665
666 /* We must check the longest matching, if nmatch > 0. */
667 fl_longest_match = (nmatch != 0 || dfa->nbackref);
668
669 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
670 preg->re_translate,
671 preg->re_syntax & REG_IGNORE_CASE, dfa);
672 if (BE (err != REG_NOERROR, 0))
673 goto free_return;
674 mctx.input.stop = stop;
675 mctx.input.raw_stop = stop;
676 mctx.input.newline_anchor = preg->re_newline_anchor;
677
678 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
679 if (BE (err != REG_NOERROR, 0))
680 goto free_return;
681
682 /* We will log all the DFA states through which the dfa pass,
683 if nmatch > 1, or this dfa has "multibyte node", which is a
684 back-reference or a node which can accept multibyte character or
685 multi character collating element. */
686 if (nmatch > 1 || dfa->has_mb_node)
687 {
688 mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1);
689 if (BE (mctx.state_log == NULL, 0))
690 {
691 err = REG_ESPACE;
692 goto free_return;
693 }
694 }
695 else
696 mctx.state_log = NULL;
697
698 match_first = start;
699 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
700 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
701
702 /* Check incrementally whether of not the input string match. */
703 incr = (last_start < start) ? -1 : 1;
704 left_lim = (last_start < start) ? last_start : start;
705 right_lim = (last_start < start) ? start : last_start;
706 sb = dfa->mb_cur_max == 1;
707 match_kind =
708 (fastmap
709 ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
710 | (start <= last_start ? 2 : 0)
711 | (t != NULL ? 1 : 0))
712 : 8);
713
714 for (;; match_first += incr)
715 {
716 err = REG_NOMATCH;
717 if (match_first < left_lim || right_lim < match_first)
718 goto free_return;
719
720 /* Advance as rapidly as possible through the string, until we
721 find a plausible place to start matching. This may be done
722 with varying efficiency, so there are various possibilities:
723 only the most common of them are specialized, in order to
724 save on code size. We use a switch statement for speed. */
725 switch (match_kind)
726 {
727 case 8:
728 /* No fastmap. */
729 break;
730
731 case 7:
732 /* Fastmap with single-byte translation, match forward. */
733 while (BE (match_first < right_lim, 1)
734 && !fastmap[t[(unsigned char) string[match_first]]])
735 ++match_first;
736 goto forward_match_found_start_or_reached_end;
737
738 case 6:
739 /* Fastmap without translation, match forward. */
740 while (BE (match_first < right_lim, 1)
741 && !fastmap[(unsigned char) string[match_first]])
742 ++match_first;
743
744 forward_match_found_start_or_reached_end:
745 if (BE (match_first == right_lim, 0))
746 {
747 ch = match_first >= length
748 ? 0 : (unsigned char) string[match_first];
749 if (!fastmap[t ? t[ch] : ch])
750 goto free_return;
751 }
752 break;
753
754 case 4:
755 case 5:
756 /* Fastmap without multi-byte translation, match backwards. */
757 while (match_first >= left_lim)
758 {
759 ch = match_first >= length
760 ? 0 : (unsigned char) string[match_first];
761 if (fastmap[t ? t[ch] : ch])
762 break;
763 --match_first;
764 }
765 if (match_first < left_lim)
766 goto free_return;
767 break;
768
769 default:
770 /* In this case, we can't determine easily the current byte,
771 since it might be a component byte of a multibyte
772 character. Then we use the constructed buffer instead. */
773 for (;;)
774 {
775 /* If MATCH_FIRST is out of the valid range, reconstruct the
776 buffers. */
777 __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
778 if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
779 {
780 err = re_string_reconstruct (&mctx.input, match_first,
781 eflags);
782 if (BE (err != REG_NOERROR, 0))
783 goto free_return;
784
785 offset = match_first - mctx.input.raw_mbs_idx;
786 }
787 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
788 Note that MATCH_FIRST must not be smaller than 0. */
789 ch = (match_first >= length
790 ? 0 : re_string_byte_at (&mctx.input, offset));
791 if (fastmap[ch])
792 break;
793 match_first += incr;
794 if (match_first < left_lim || match_first > right_lim)
795 {
796 err = REG_NOMATCH;
797 goto free_return;
798 }
799 }
800 break;
801 }
802
803 /* Reconstruct the buffers so that the matcher can assume that
804 the matching starts from the beginning of the buffer. */
805 err = re_string_reconstruct (&mctx.input, match_first, eflags);
806 if (BE (err != REG_NOERROR, 0))
807 goto free_return;
808
809#ifdef RE_ENABLE_I18N
810 /* Don't consider this char as a possible match start if it part,
811 yet isn't the head, of a multibyte character. */
812 if (!sb && !re_string_first_byte (&mctx.input, 0))
813 continue;
814#endif
815
816 /* It seems to be appropriate one, then use the matcher. */
817 /* We assume that the matching starts from 0. */
818 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
819 match_last = check_matching (&mctx, fl_longest_match,
820 start <= last_start ? &match_first : NULL);
821 if (match_last != REG_MISSING)
822 {
823 if (BE (match_last == REG_ERROR, 0))
824 {
825 err = REG_ESPACE;
826 goto free_return;
827 }
828 else
829 {
830 mctx.match_last = match_last;
831 if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
832 {
833 re_dfastate_t *pstate = mctx.state_log[match_last];
834 mctx.last_node = check_halt_state_context (&mctx, pstate,
835 match_last);
836 }
837 if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
838 || dfa->nbackref)
839 {
840 err = prune_impossible_nodes (&mctx);
841 if (err == REG_NOERROR)
842 break;
843 if (BE (err != REG_NOMATCH, 0))
844 goto free_return;
845 match_last = REG_MISSING;
846 }
847 else
848 break; /* We found a match. */
849 }
850 }
851
852 match_ctx_clean (&mctx);
853 }
854
855#ifdef DEBUG
856 assert (match_last != REG_MISSING);
857 assert (err == REG_NOERROR);
858#endif
859
860 /* Set pmatch[] if we need. */
861 if (nmatch > 0)
862 {
863 Idx reg_idx;
864
865 /* Initialize registers. */
866 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
867 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
868
869 /* Set the points where matching start/end. */
870 pmatch[0].rm_so = 0;
871 pmatch[0].rm_eo = mctx.match_last;
872 /* FIXME: This function should fail if mctx.match_last exceeds
873 the maximum possible regoff_t value. We need a new error
874 code REG_OVERFLOW. */
875
876 if (!preg->re_no_sub && nmatch > 1)
877 {
878 err = set_regs (preg, &mctx, nmatch, pmatch,
879 dfa->has_plural_match && dfa->nbackref > 0);
880 if (BE (err != REG_NOERROR, 0))
881 goto free_return;
882 }
883
884 /* At last, add the offset to the each registers, since we slided
885 the buffers so that we could assume that the matching starts
886 from 0. */
887 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
888 if (pmatch[reg_idx].rm_so != -1)
889 {
890#ifdef RE_ENABLE_I18N
891 if (BE (mctx.input.offsets_needed != 0, 0))
892 {
893 pmatch[reg_idx].rm_so =
894 (pmatch[reg_idx].rm_so == mctx.input.valid_len
895 ? mctx.input.valid_raw_len
896 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
897 pmatch[reg_idx].rm_eo =
898 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
899 ? mctx.input.valid_raw_len
900 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
901 }
902#else
903 assert (mctx.input.offsets_needed == 0);
904#endif
905 pmatch[reg_idx].rm_so += match_first;
906 pmatch[reg_idx].rm_eo += match_first;
907 }
908 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
909 {
910 pmatch[nmatch + reg_idx].rm_so = -1;
911 pmatch[nmatch + reg_idx].rm_eo = -1;
912 }
913
914 if (dfa->subexp_map)
915 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
916 if (dfa->subexp_map[reg_idx] != reg_idx)
917 {
918 pmatch[reg_idx + 1].rm_so
919 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
920 pmatch[reg_idx + 1].rm_eo
921 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
922 }
923 }
924
925 free_return:
926 re_free (mctx.state_log);
927 if (dfa->nbackref)
928 match_ctx_free (&mctx);
929 re_string_destruct (&mctx.input);
930 return err;
931}
932
933static reg_errcode_t
934internal_function
935prune_impossible_nodes (re_match_context_t *mctx)
936{
937 re_dfa_t *const dfa = mctx->dfa;
938 Idx halt_node, match_last;
939 reg_errcode_t ret;
940 re_dfastate_t **sifted_states;
941 re_dfastate_t **lim_states = NULL;
942 re_sift_context_t sctx;
943#ifdef DEBUG
944 assert (mctx->state_log != NULL);
945#endif
946 match_last = mctx->match_last;
947 halt_node = mctx->last_node;
948 sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1);
949 if (BE (sifted_states == NULL, 0))
950 {
951 ret = REG_ESPACE;
952 goto free_return;
953 }
954 if (dfa->nbackref)
955 {
956 lim_states = re_xmalloc (re_dfastate_t *, match_last + 1);
957 if (BE (lim_states == NULL, 0))
958 {
959 ret = REG_ESPACE;
960 goto free_return;
961 }
962 while (1)
963 {
964 memset (lim_states, '\0',
965 sizeof (re_dfastate_t *) * (match_last + 1));
966 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
967 match_last);
968 ret = sift_states_backward (mctx, &sctx);
969 re_node_set_free (&sctx.limits);
970 if (BE (ret != REG_NOERROR, 0))
971 goto free_return;
972 if (sifted_states[0] != NULL || lim_states[0] != NULL)
973 break;
974 do
975 {
976 --match_last;
977 if (! REG_VALID_INDEX (match_last))
978 {
979 ret = REG_NOMATCH;
980 goto free_return;
981 }
982 } while (mctx->state_log[match_last] == NULL
983 || !mctx->state_log[match_last]->halt);
984 halt_node = check_halt_state_context (mctx,
985 mctx->state_log[match_last],
986 match_last);
987 }
988 ret = merge_state_array (dfa, sifted_states, lim_states,
989 match_last + 1);
990 re_free (lim_states);
991 lim_states = NULL;
992 if (BE (ret != REG_NOERROR, 0))
993 goto free_return;
994 }
995 else
996 {
997 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
998 ret = sift_states_backward (mctx, &sctx);
999 re_node_set_free (&sctx.limits);
1000 if (BE (ret != REG_NOERROR, 0))
1001 goto free_return;
1002 }
1003 re_free (mctx->state_log);
1004 mctx->state_log = sifted_states;
1005 sifted_states = NULL;
1006 mctx->last_node = halt_node;
1007 mctx->match_last = match_last;
1008 ret = REG_NOERROR;
1009 free_return:
1010 re_free (sifted_states);
1011 re_free (lim_states);
1012 return ret;
1013}
1014
1015/* Acquire an initial state and return it.
1016 We must select appropriate initial state depending on the context,
1017 since initial states may have constraints like "\<", "^", etc.. */
1018
1019static inline re_dfastate_t *
1020__attribute ((always_inline)) internal_function
1021acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1022 Idx idx)
1023{
1024 re_dfa_t *const dfa = mctx->dfa;
1025 if (dfa->init_state->has_constraint)
1026 {
1027 unsigned int context;
1028 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1029 if (IS_WORD_CONTEXT (context))
1030 return dfa->init_state_word;
1031 else if (IS_ORDINARY_CONTEXT (context))
1032 return dfa->init_state;
1033 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1034 return dfa->init_state_begbuf;
1035 else if (IS_NEWLINE_CONTEXT (context))
1036 return dfa->init_state_nl;
1037 else if (IS_BEGBUF_CONTEXT (context))
1038 {
1039 /* It is relatively rare case, then calculate on demand. */
1040 return re_acquire_state_context (err, dfa,
1041 dfa->init_state->entrance_nodes,
1042 context);
1043 }
1044 else
1045 /* Must not happen? */
1046 return dfa->init_state;
1047 }
1048 else
1049 return dfa->init_state;
1050}
1051
1052/* Check whether the regular expression match input string INPUT or not,
1053 and return the index where the matching end. Return REG_MISSING if
1054 there is no match, and return REG_ERROR in case of an error.
1055 FL_LONGEST_MATCH means we want the POSIX longest matching.
1056 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1057 next place where we may want to try matching.
1058 Note that the matcher assume that the maching starts from the current
1059 index of the buffer. */
1060
1061static Idx
1062internal_function
1063check_matching (re_match_context_t *mctx, bool fl_longest_match,
1064 Idx *p_match_first)
1065{
1066 re_dfa_t *const dfa = mctx->dfa;
1067 reg_errcode_t err;
1068 Idx match = 0;
1069 Idx match_last = REG_MISSING;
1070 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1071 re_dfastate_t *cur_state;
1072 bool at_init_state = p_match_first != NULL;
1073 Idx next_start_idx = cur_str_idx;
1074
1075 err = REG_NOERROR;
1076 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1077 /* An initial state must not be NULL (invalid). */
1078 if (BE (cur_state == NULL, 0))
1079 {
1080 assert (err == REG_ESPACE);
1081 return REG_ERROR;
1082 }
1083
1084 if (mctx->state_log != NULL)
1085 {
1086 mctx->state_log[cur_str_idx] = cur_state;
1087
1088 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1089 later. E.g. Processing back references. */
1090 if (BE (dfa->nbackref, 0))
1091 {
1092 at_init_state = false;
1093 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1094 if (BE (err != REG_NOERROR, 0))
1095 return err;
1096
1097 if (cur_state->has_backref)
1098 {
1099 err = transit_state_bkref (mctx, &cur_state->nodes);
1100 if (BE (err != REG_NOERROR, 0))
1101 return err;
1102 }
1103 }
1104 }
1105
1106 /* If the RE accepts NULL string. */
1107 if (BE (cur_state->halt, 0))
1108 {
1109 if (!cur_state->has_constraint
1110 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1111 {
1112 if (!fl_longest_match)
1113 return cur_str_idx;
1114 else
1115 {
1116 match_last = cur_str_idx;
1117 match = 1;
1118 }
1119 }
1120 }
1121
1122 while (!re_string_eoi (&mctx->input))
1123 {
1124 re_dfastate_t *old_state = cur_state;
1125 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1126
1127 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1128 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1129 && mctx->input.valid_len < mctx->input.len))
1130 {
1131 err = extend_buffers (mctx);
1132 if (BE (err != REG_NOERROR, 0))
1133 {
1134 assert (err == REG_ESPACE);
1135 return REG_ERROR;
1136 }
1137 }
1138
1139 cur_state = transit_state (&err, mctx, cur_state);
1140 if (mctx->state_log != NULL)
1141 cur_state = merge_state_with_log (&err, mctx, cur_state);
1142
1143 if (cur_state == NULL)
1144 {
1145 /* Reached the invalid state or an error. Try to recover a valid
1146 state using the state log, if available and if we have not
1147 already found a valid (even if not the longest) match. */
1148 if (BE (err != REG_NOERROR, 0))
1149 return REG_ERROR;
1150
1151 if (mctx->state_log == NULL
1152 || (match && !fl_longest_match)
1153 || (cur_state = find_recover_state (&err, mctx)) == NULL)
1154 break;
1155 }
1156
1157 if (BE (at_init_state, 0))
1158 {
1159 if (old_state == cur_state)
1160 next_start_idx = next_char_idx;
1161 else
1162 at_init_state = false;
1163 }
1164
1165 if (cur_state->halt)
1166 {
1167 /* Reached a halt state.
1168 Check the halt state can satisfy the current context. */
1169 if (!cur_state->has_constraint
1170 || check_halt_state_context (mctx, cur_state,
1171 re_string_cur_idx (&mctx->input)))
1172 {
1173 /* We found an appropriate halt state. */
1174 match_last = re_string_cur_idx (&mctx->input);
1175 match = 1;
1176
1177 /* We found a match, do not modify match_first below. */
1178 p_match_first = NULL;
1179 if (!fl_longest_match)
1180 break;
1181 }
1182 }
1183 }
1184
1185 if (p_match_first)
1186 *p_match_first += next_start_idx;
1187
1188 return match_last;
1189}
1190
1191/* Check NODE match the current context. */
1192
1193static bool
1194internal_function
1195check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1196{
1197 re_token_type_t type = dfa->nodes[node].type;
1198 unsigned int constraint = dfa->nodes[node].constraint;
1199 if (type != END_OF_RE)
1200 return false;
1201 if (!constraint)
1202 return true;
1203 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1204 return false;
1205 return true;
1206}
1207
1208/* Check the halt state STATE match the current context.
1209 Return 0 if not match, if the node, STATE has, is a halt node and
1210 match the context, return the node. */
1211
1212static Idx
1213internal_function
1214check_halt_state_context (const re_match_context_t *mctx,
1215 const re_dfastate_t *state, Idx idx)
1216{
1217 Idx i;
1218 unsigned int context;
1219#ifdef DEBUG
1220 assert (state->halt);
1221#endif
1222 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1223 for (i = 0; i < state->nodes.nelem; ++i)
1224 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1225 return state->nodes.elems[i];
1226 return 0;
1227}
1228
1229/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1230 corresponding to the DFA).
1231 Return the destination node, and update EPS_VIA_NODES;
1232 return REG_MISSING in case of errors. */
1233
1234static Idx
1235internal_function
1236proceed_next_node (const re_match_context_t *mctx,
1237 Idx nregs, regmatch_t *regs, Idx *pidx, Idx node,
1238 re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
1239{
1240 re_dfa_t *const dfa = mctx->dfa;
1241 Idx i;
1242 bool ok;
1243 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1244 {
1245 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1246 re_node_set *edests = &dfa->edests[node];
1247 Idx dest_node;
1248 ok = re_node_set_insert (eps_via_nodes, node);
1249 if (BE (! ok, 0))
1250 return REG_ERROR;
1251 /* Pick up a valid destination, or return REG_MISSING if none
1252 is found. */
1253 for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1254 {
1255 Idx candidate = edests->elems[i];
1256 if (!re_node_set_contains (cur_nodes, candidate))
1257 continue;
1258 if (dest_node == REG_MISSING)
1259 dest_node = candidate;
1260
1261 else
1262 {
1263 /* In order to avoid infinite loop like "(a*)*", return the second
1264 epsilon-transition if the first was already considered. */
1265 if (re_node_set_contains (eps_via_nodes, dest_node))
1266 return candidate;
1267
1268 /* Otherwise, push the second epsilon-transition on the fail stack. */
1269 else if (fs != NULL
1270 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1271 eps_via_nodes))
1272 return REG_ERROR;
1273
1274 /* We know we are going to exit. */
1275 break;
1276 }
1277 }
1278 return dest_node;
1279 }
1280 else
1281 {
1282 Idx naccepted = 0;
1283 re_token_type_t type = dfa->nodes[node].type;
1284
1285#ifdef RE_ENABLE_I18N
1286 if (dfa->nodes[node].accept_mb)
1287 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1288 else
1289#endif /* RE_ENABLE_I18N */
1290 if (type == OP_BACK_REF)
1291 {
1292 Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1293 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1294 if (fs != NULL)
1295 {
1296 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1297 return REG_MISSING;
1298 else if (naccepted)
1299 {
1300 char *buf = (char *) re_string_get_buffer (&mctx->input);
1301 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1302 naccepted) != 0)
1303 return REG_MISSING;
1304 }
1305 }
1306
1307 if (naccepted == 0)
1308 {
1309 Idx dest_node;
1310 ok = re_node_set_insert (eps_via_nodes, node);
1311 if (BE (! ok, 0))
1312 return REG_ERROR;
1313 dest_node = dfa->edests[node].elems[0];
1314 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1315 dest_node))
1316 return dest_node;
1317 }
1318 }
1319
1320 if (naccepted != 0
1321 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1322 {
1323 Idx dest_node = dfa->nexts[node];
1324 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1325 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1326 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1327 dest_node)))
1328 return REG_MISSING;
1329 re_node_set_empty (eps_via_nodes);
1330 return dest_node;
1331 }
1332 }
1333 return REG_MISSING;
1334}
1335
1336static reg_errcode_t
1337internal_function
1338push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1339 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1340{
1341 reg_errcode_t err;
1342 Idx num = fs->num++;
1343 if (fs->num == fs->alloc)
1344 {
1345 struct re_fail_stack_ent_t *new_array =
1346 re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc);
1347 if (new_array == NULL)
1348 return REG_ESPACE;
1349 fs->stack = new_array;
1350 }
1351 fs->stack[num].idx = str_idx;
1352 fs->stack[num].node = dest_node;
1353 fs->stack[num].regs = re_xmalloc (regmatch_t, nregs);
1354 if (fs->stack[num].regs == NULL)
1355 return REG_ESPACE;
1356 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1357 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1358 return err;
1359}
1360
1361static Idx
1362internal_function
1363pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx,
1364 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1365{
1366 Idx num = --fs->num;
1367 assert (REG_VALID_INDEX (num));
1368 *pidx = fs->stack[num].idx;
1369 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1370 re_node_set_free (eps_via_nodes);
1371 re_free (fs->stack[num].regs);
1372 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1373 return fs->stack[num].node;
1374}
1375
1376/* Set the positions where the subexpressions are starts/ends to registers
1377 PMATCH.
1378 Note: We assume that pmatch[0] is already set, and
1379 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1380
1381static reg_errcode_t
1382internal_function
1383set_regs (const regex_t *preg, const re_match_context_t *mctx,
1384 size_t nmatch, regmatch_t *pmatch, bool fl_backtrack)
1385{
1386 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1387 Idx idx, cur_node;
1388 re_node_set eps_via_nodes;
1389 struct re_fail_stack_t *fs;
1390 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1391 regmatch_t *prev_idx_match;
1392 bool prev_idx_match_malloced = false;
1393
1394#ifdef DEBUG
1395 assert (nmatch > 1);
1396 assert (mctx->state_log != NULL);
1397#endif
1398 if (fl_backtrack)
1399 {
1400 fs = &fs_body;
1401 fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc);
1402 if (fs->stack == NULL)
1403 return REG_ESPACE;
1404 }
1405 else
1406 fs = NULL;
1407
1408 cur_node = dfa->init_node;
1409 re_node_set_init_empty (&eps_via_nodes);
1410
1411 if (re_alloc_oversized (nmatch, sizeof (regmatch_t)))
1412 {
1413 free_fail_stack_return (fs);
1414 return REG_ESPACE;
1415 }
1416 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1417 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1418 else
1419 {
1420 prev_idx_match = re_malloc (regmatch_t, nmatch);
1421 if (prev_idx_match == NULL)
1422 {
1423 free_fail_stack_return (fs);
1424 return REG_ESPACE;
1425 }
1426 prev_idx_match_malloced = true;
1427 }
1428 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1429
1430 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1431 {
1432 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1433
1434 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1435 {
1436 Idx reg_idx;
1437 if (fs)
1438 {
1439 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1440 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1441 break;
1442 if (reg_idx == nmatch)
1443 {
1444 re_node_set_free (&eps_via_nodes);
1445 if (prev_idx_match_malloced)
1446 re_free (prev_idx_match);
1447 return free_fail_stack_return (fs);
1448 }
1449 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1450 &eps_via_nodes);
1451 }
1452 else
1453 {
1454 re_node_set_free (&eps_via_nodes);
1455 if (prev_idx_match_malloced)
1456 re_free (prev_idx_match);
1457 return REG_NOERROR;
1458 }
1459 }
1460
1461 /* Proceed to next node. */
1462 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1463 &eps_via_nodes, fs);
1464
1465 if (BE (! REG_VALID_INDEX (cur_node), 0))
1466 {
1467 if (BE (cur_node == REG_ERROR, 0))
1468 {
1469 re_node_set_free (&eps_via_nodes);
1470 if (prev_idx_match_malloced)
1471 re_free (prev_idx_match);
1472 free_fail_stack_return (fs);
1473 return REG_ESPACE;
1474 }
1475 if (fs)
1476 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1477 &eps_via_nodes);
1478 else
1479 {
1480 re_node_set_free (&eps_via_nodes);
1481 if (prev_idx_match_malloced)
1482 re_free (prev_idx_match);
1483 return REG_NOMATCH;
1484 }
1485 }
1486 }
1487 re_node_set_free (&eps_via_nodes);
1488 if (prev_idx_match_malloced)
1489 re_free (prev_idx_match);
1490 return free_fail_stack_return (fs);
1491}
1492
1493static reg_errcode_t
1494internal_function
1495free_fail_stack_return (struct re_fail_stack_t *fs)
1496{
1497 if (fs)
1498 {
1499 Idx fs_idx;
1500 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1501 {
1502 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1503 re_free (fs->stack[fs_idx].regs);
1504 }
1505 re_free (fs->stack);
1506 }
1507 return REG_NOERROR;
1508}
1509
1510static void
1511internal_function
1512update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
1513 Idx cur_node, Idx cur_idx, Idx nmatch)
1514{
1515 int type = dfa->nodes[cur_node].type;
1516 if (type == OP_OPEN_SUBEXP)
1517 {
1518 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1519
1520 /* We are at the first node of this sub expression. */
1521 if (reg_num < nmatch)
1522 {
1523 pmatch[reg_num].rm_so = cur_idx;
1524 pmatch[reg_num].rm_eo = -1;
1525 }
1526 }
1527 else if (type == OP_CLOSE_SUBEXP)
1528 {
1529 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1530 if (reg_num < nmatch)
1531 {
1532 /* We are at the last node of this sub expression. */
1533 if (pmatch[reg_num].rm_so < cur_idx)
1534 {
1535 pmatch[reg_num].rm_eo = cur_idx;
1536 /* This is a non-empty match or we are not inside an optional
1537 subexpression. Accept this right away. */
1538 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1539 }
1540 else
1541 {
1542 if (dfa->nodes[cur_node].opt_subexp
1543 && prev_idx_match[reg_num].rm_so != -1)
1544 /* We transited through an empty match for an optional
1545 subexpression, like (a?)*, and this is not the subexp's
1546 first match. Copy back the old content of the registers
1547 so that matches of an inner subexpression are undone as
1548 well, like in ((a?))*. */
1549 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1550 else
1551 /* We completed a subexpression, but it may be part of
1552 an optional one, so do not update PREV_IDX_MATCH. */
1553 pmatch[reg_num].rm_eo = cur_idx;
1554 }
1555 }
1556 }
1557}
1558
1559/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1560 and sift the nodes in each states according to the following rules.
1561 Updated state_log will be wrote to STATE_LOG.
1562
1563 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1564 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1565 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1566 the LAST_NODE, we throw away the node `a'.
1567 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1568 string `s' and transit to `b':
1569 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1570 away the node `a'.
1571 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1572 thrown away, we throw away the node `a'.
1573 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1574 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1575 node `a'.
1576 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1577 we throw away the node `a'. */
1578
1579#define STATE_NODE_CONTAINS(state,node) \
1580 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1581
1582static reg_errcode_t
1583internal_function
1584sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
1585{
1586 reg_errcode_t err;
1587 int null_cnt = 0;
1588 Idx str_idx = sctx->last_str_idx;
1589 re_node_set cur_dest;
1590
1591#ifdef DEBUG
1592 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1593#endif
1594
1595 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1596 transit to the last_node and the last_node itself. */
1597 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1598 if (BE (err != REG_NOERROR, 0))
1599 return err;
1600 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601 if (BE (err != REG_NOERROR, 0))
1602 goto free_return;
1603
1604 /* Then check each states in the state_log. */
1605 while (str_idx > 0)
1606 {
1607 /* Update counters. */
1608 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1609 if (null_cnt > mctx->max_mb_elem_len)
1610 {
1611 memset (sctx->sifted_states, '\0',
1612 sizeof (re_dfastate_t *) * str_idx);
1613 re_node_set_free (&cur_dest);
1614 return REG_NOERROR;
1615 }
1616 re_node_set_empty (&cur_dest);
1617 --str_idx;
1618
1619 if (mctx->state_log[str_idx])
1620 {
1621 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1622 if (BE (err != REG_NOERROR, 0))
1623 goto free_return;
1624 }
1625
1626 /* Add all the nodes which satisfy the following conditions:
1627 - It can epsilon transit to a node in CUR_DEST.
1628 - It is in CUR_SRC.
1629 And update state_log. */
1630 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1631 if (BE (err != REG_NOERROR, 0))
1632 goto free_return;
1633 }
1634 err = REG_NOERROR;
1635 free_return:
1636 re_node_set_free (&cur_dest);
1637 return err;
1638}
1639
1640static reg_errcode_t
1641internal_function
1642build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
1643 Idx str_idx, re_node_set *cur_dest)
1644{
1645 re_dfa_t *const dfa = mctx->dfa;
1646 re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1647 Idx i;
1648
1649 /* Then build the next sifted state.
1650 We build the next sifted state on `cur_dest', and update
1651 `sifted_states[str_idx]' with `cur_dest'.
1652 Note:
1653 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1654 `cur_src' points the node_set of the old `state_log[str_idx]'
1655 (with the epsilon nodes pre-filtered out). */
1656 for (i = 0; i < cur_src->nelem; i++)
1657 {
1658 Idx prev_node = cur_src->elems[i];
1659 int naccepted = 0;
1660 bool ok;
1661
1662#ifdef DEBUG
1663 re_token_type_t type = dfa->nodes[prev_node].type;
1664 assert (!IS_EPSILON_NODE (type));
1665#endif
1666#ifdef RE_ENABLE_I18N
1667 /* If the node may accept `multi byte'. */
1668 if (dfa->nodes[prev_node].accept_mb)
1669 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1670 str_idx, sctx->last_str_idx);
1671#endif /* RE_ENABLE_I18N */
1672
1673 /* We don't check backreferences here.
1674 See update_cur_sifted_state(). */
1675 if (!naccepted
1676 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1677 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1678 dfa->nexts[prev_node]))
1679 naccepted = 1;
1680
1681 if (naccepted == 0)
1682 continue;
1683
1684 if (sctx->limits.nelem)
1685 {
1686 Idx to_idx = str_idx + naccepted;
1687 if (check_dst_limits (mctx, &sctx->limits,
1688 dfa->nexts[prev_node], to_idx,
1689 prev_node, str_idx))
1690 continue;
1691 }
1692 ok = re_node_set_insert (cur_dest, prev_node);
1693 if (BE (! ok, 0))
1694 return REG_ESPACE;
1695 }
1696
1697 return REG_NOERROR;
1698}
1699
1700/* Helper functions. */
1701
1702static reg_errcode_t
1703internal_function
1704clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1705{
1706 Idx top = mctx->state_log_top;
1707
1708 if (next_state_log_idx >= mctx->input.bufs_len
1709 || (next_state_log_idx >= mctx->input.valid_len
1710 && mctx->input.valid_len < mctx->input.len))
1711 {
1712 reg_errcode_t err;
1713 err = extend_buffers (mctx);
1714 if (BE (err != REG_NOERROR, 0))
1715 return err;
1716 }
1717
1718 if (top < next_state_log_idx)
1719 {
1720 memset (mctx->state_log + top + 1, '\0',
1721 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1722 mctx->state_log_top = next_state_log_idx;
1723 }
1724 return REG_NOERROR;
1725}
1726
1727static reg_errcode_t
1728internal_function
1729merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
1730 Idx num)
1731{
1732 Idx st_idx;
1733 reg_errcode_t err;
1734 for (st_idx = 0; st_idx < num; ++st_idx)
1735 {
1736 if (dst[st_idx] == NULL)
1737 dst[st_idx] = src[st_idx];
1738 else if (src[st_idx] != NULL)
1739 {
1740 re_node_set merged_set;
1741 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1742 &src[st_idx]->nodes);
1743 if (BE (err != REG_NOERROR, 0))
1744 return err;
1745 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1746 re_node_set_free (&merged_set);
1747 if (BE (err != REG_NOERROR, 0))
1748 return err;
1749 }
1750 }
1751 return REG_NOERROR;
1752}
1753
1754static reg_errcode_t
1755internal_function
1756update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
1757 Idx str_idx, re_node_set *dest_nodes)
1758{
1759 re_dfa_t *const dfa = mctx->dfa;
1760 reg_errcode_t err;
1761 const re_node_set *candidates;
1762 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1763 : &mctx->state_log[str_idx]->nodes);
1764
1765 if (dest_nodes->nelem == 0)
1766 sctx->sifted_states[str_idx] = NULL;
1767 else
1768 {
1769 if (candidates)
1770 {
1771 /* At first, add the nodes which can epsilon transit to a node in
1772 DEST_NODE. */
1773 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1774 if (BE (err != REG_NOERROR, 0))
1775 return err;
1776
1777 /* Then, check the limitations in the current sift_context. */
1778 if (sctx->limits.nelem)
1779 {
1780 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1781 mctx->bkref_ents, str_idx);
1782 if (BE (err != REG_NOERROR, 0))
1783 return err;
1784 }
1785 }
1786
1787 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1788 if (BE (err != REG_NOERROR, 0))
1789 return err;
1790 }
1791
1792 if (candidates && mctx->state_log[str_idx]->has_backref)
1793 {
1794 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1795 if (BE (err != REG_NOERROR, 0))
1796 return err;
1797 }
1798 return REG_NOERROR;
1799}
1800
1801static reg_errcode_t
1802internal_function
1803add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
1804 const re_node_set *candidates)
1805{
1806 reg_errcode_t err = REG_NOERROR;
1807 Idx i;
1808
1809 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1810 if (BE (err != REG_NOERROR, 0))
1811 return err;
1812
1813 if (!state->inveclosure.alloc)
1814 {
1815 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1816 if (BE (err != REG_NOERROR, 0))
1817 return REG_ESPACE;
1818 for (i = 0; i < dest_nodes->nelem; i++)
1819 re_node_set_merge (&state->inveclosure,
1820 dfa->inveclosures + dest_nodes->elems[i]);
1821 }
1822 return re_node_set_add_intersect (dest_nodes, candidates,
1823 &state->inveclosure);
1824}
1825
1826static reg_errcode_t
1827internal_function
1828sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1829 const re_node_set *candidates)
1830{
1831 Idx ecl_idx;
1832 reg_errcode_t err;
1833 re_node_set *inv_eclosure = dfa->inveclosures + node;
1834 re_node_set except_nodes;
1835 re_node_set_init_empty (&except_nodes);
1836 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1837 {
1838 Idx cur_node = inv_eclosure->elems[ecl_idx];
1839 if (cur_node == node)
1840 continue;
1841 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1842 {
1843 Idx edst1 = dfa->edests[cur_node].elems[0];
1844 Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1845 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1846 if ((!re_node_set_contains (inv_eclosure, edst1)
1847 && re_node_set_contains (dest_nodes, edst1))
1848 || (REG_VALID_NONZERO_INDEX (edst2)
1849 && !re_node_set_contains (inv_eclosure, edst2)
1850 && re_node_set_contains (dest_nodes, edst2)))
1851 {
1852 err = re_node_set_add_intersect (&except_nodes, candidates,
1853 dfa->inveclosures + cur_node);
1854 if (BE (err != REG_NOERROR, 0))
1855 {
1856 re_node_set_free (&except_nodes);
1857 return err;
1858 }
1859 }
1860 }
1861 }
1862 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1863 {
1864 Idx cur_node = inv_eclosure->elems[ecl_idx];
1865 if (!re_node_set_contains (&except_nodes, cur_node))
1866 {
1867 Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1868 re_node_set_remove_at (dest_nodes, idx);
1869 }
1870 }
1871 re_node_set_free (&except_nodes);
1872 return REG_NOERROR;
1873}
1874
1875static bool
1876internal_function
1877check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1878 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1879{
1880 re_dfa_t *const dfa = mctx->dfa;
1881 Idx lim_idx, src_pos, dst_pos;
1882
1883 Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1884 Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1885 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1886 {
1887 Idx subexp_idx;
1888 struct re_backref_cache_entry *ent;
1889 ent = mctx->bkref_ents + limits->elems[lim_idx];
1890 subexp_idx = dfa->nodes[ent->node].opr.idx;
1891
1892 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1893 subexp_idx, dst_node, dst_idx,
1894 dst_bkref_idx);
1895 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1896 subexp_idx, src_node, src_idx,
1897 src_bkref_idx);
1898
1899 /* In case of:
1900 <src> <dst> ( <subexp> )
1901 ( <subexp> ) <src> <dst>
1902 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1903 if (src_pos == dst_pos)
1904 continue; /* This is unrelated limitation. */
1905 else
1906 return true;
1907 }
1908 return false;
1909}
1910
1911static int
1912internal_function
1913check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1914 Idx subexp_idx, Idx from_node, Idx bkref_idx)
1915{
1916 re_dfa_t *const dfa = mctx->dfa;
1917 re_node_set *eclosures = dfa->eclosures + from_node;
1918 Idx node_idx;
1919
1920 /* Else, we are on the boundary: examine the nodes on the epsilon
1921 closure. */
1922 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1923 {
1924 Idx node = eclosures->elems[node_idx];
1925 switch (dfa->nodes[node].type)
1926 {
1927 case OP_BACK_REF:
1928 if (bkref_idx != REG_MISSING)
1929 {
1930 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1931 do
1932 {
1933 Idx dst;
1934 int cpos;
1935
1936 if (ent->node != node)
1937 continue;
1938
1939 if (subexp_idx < BITSET_WORD_BITS
1940 && !(ent->eps_reachable_subexps_map
1941 & ((bitset_word) 1 << subexp_idx)))
1942 continue;
1943
1944 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1945 OP_CLOSE_SUBEXP cases below. But, if the
1946 destination node is the same node as the source
1947 node, don't recurse because it would cause an
1948 infinite loop: a regex that exhibits this behavior
1949 is ()\1*\1* */
1950 dst = dfa->edests[node].elems[0];
1951 if (dst == from_node)
1952 {
1953 if (boundaries & 1)
1954 return -1;
1955 else /* if (boundaries & 2) */
1956 return 0;
1957 }
1958
1959 cpos =
1960 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1961 dst, bkref_idx);
1962 if (cpos == -1 /* && (boundaries & 1) */)
1963 return -1;
1964 if (cpos == 0 && (boundaries & 2))
1965 return 0;
1966
1967 if (subexp_idx < BITSET_WORD_BITS)
1968 ent->eps_reachable_subexps_map &=
1969 ~ ((bitset_word) 1 << subexp_idx);
1970 }
1971 while (ent++->more);
1972 }
1973 break;
1974
1975 case OP_OPEN_SUBEXP:
1976 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1977 return -1;
1978 break;
1979
1980 case OP_CLOSE_SUBEXP:
1981 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1982 return 0;
1983 break;
1984
1985 default:
1986 break;
1987 }
1988 }
1989
1990 return (boundaries & 2) ? 1 : 0;
1991}
1992
1993static int
1994internal_function
1995check_dst_limits_calc_pos (const re_match_context_t *mctx,
1996 Idx limit, Idx subexp_idx,
1997 Idx from_node, Idx str_idx, Idx bkref_idx)
1998{
1999 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2000 int boundaries;
2001
2002 /* If we are outside the range of the subexpression, return -1 or 1. */
2003 if (str_idx < lim->subexp_from)
2004 return -1;
2005
2006 if (lim->subexp_to < str_idx)
2007 return 1;
2008
2009 /* If we are within the subexpression, return 0. */
2010 boundaries = (str_idx == lim->subexp_from);
2011 boundaries |= (str_idx == lim->subexp_to) << 1;
2012 if (boundaries == 0)
2013 return 0;
2014
2015 /* Else, examine epsilon closure. */
2016 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2017 from_node, bkref_idx);
2018}
2019
2020/* Check the limitations of sub expressions LIMITS, and remove the nodes
2021 which are against limitations from DEST_NODES. */
2022
2023static reg_errcode_t
2024internal_function
2025check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
2026 const re_node_set *candidates, re_node_set *limits,
2027 struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2028{
2029 reg_errcode_t err;
2030 Idx node_idx, lim_idx;
2031
2032 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2033 {
2034 Idx subexp_idx;
2035 struct re_backref_cache_entry *ent;
2036 ent = bkref_ents + limits->elems[lim_idx];
2037
2038 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2039 continue; /* This is unrelated limitation. */
2040
2041 subexp_idx = dfa->nodes[ent->node].opr.idx;
2042 if (ent->subexp_to == str_idx)
2043 {
2044 Idx ops_node = REG_MISSING;
2045 Idx cls_node = REG_MISSING;
2046 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2047 {
2048 Idx node = dest_nodes->elems[node_idx];
2049 re_token_type_t type = dfa->nodes[node].type;
2050 if (type == OP_OPEN_SUBEXP
2051 && subexp_idx == dfa->nodes[node].opr.idx)
2052 ops_node = node;
2053 else if (type == OP_CLOSE_SUBEXP
2054 && subexp_idx == dfa->nodes[node].opr.idx)
2055 cls_node = node;
2056 }
2057
2058 /* Check the limitation of the open subexpression. */
2059 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2060 if (REG_VALID_INDEX (ops_node))
2061 {
2062 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2063 candidates);
2064 if (BE (err != REG_NOERROR, 0))
2065 return err;
2066 }
2067
2068 /* Check the limitation of the close subexpression. */
2069 if (REG_VALID_INDEX (cls_node))
2070 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2071 {
2072 Idx node = dest_nodes->elems[node_idx];
2073 if (!re_node_set_contains (dfa->inveclosures + node,
2074 cls_node)
2075 && !re_node_set_contains (dfa->eclosures + node,
2076 cls_node))
2077 {
2078 /* It is against this limitation.
2079 Remove it form the current sifted state. */
2080 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2081 candidates);
2082 if (BE (err != REG_NOERROR, 0))
2083 return err;
2084 --node_idx;
2085 }
2086 }
2087 }
2088 else /* (ent->subexp_to != str_idx) */
2089 {
2090 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2091 {
2092 Idx node = dest_nodes->elems[node_idx];
2093 re_token_type_t type = dfa->nodes[node].type;
2094 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2095 {
2096 if (subexp_idx != dfa->nodes[node].opr.idx)
2097 continue;
2098 /* It is against this limitation.
2099 Remove it form the current sifted state. */
2100 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2101 candidates);
2102 if (BE (err != REG_NOERROR, 0))
2103 return err;
2104 }
2105 }
2106 }
2107 }
2108 return REG_NOERROR;
2109}
2110
2111static reg_errcode_t
2112internal_function
2113sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
2114 Idx str_idx, const re_node_set *candidates)
2115{
2116 re_dfa_t *const dfa = mctx->dfa;
2117 reg_errcode_t err;
2118 Idx node_idx, node;
2119 re_sift_context_t local_sctx;
2120 Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2121
2122 if (first_idx == REG_MISSING)
2123 return REG_NOERROR;
2124
2125 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2126
2127 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2128 {
2129 Idx enabled_idx;
2130 re_token_type_t type;
2131 struct re_backref_cache_entry *entry;
2132 node = candidates->elems[node_idx];
2133 type = dfa->nodes[node].type;
2134 /* Avoid infinite loop for the REs like "()\1+". */
2135 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2136 continue;
2137 if (type != OP_BACK_REF)
2138 continue;
2139
2140 entry = mctx->bkref_ents + first_idx;
2141 enabled_idx = first_idx;
2142 do
2143 {
2144 bool ok;
2145 Idx subexp_len, to_idx, dst_node;
2146 re_dfastate_t *cur_state;
2147
2148 if (entry->node != node)
2149 continue;
2150 subexp_len = entry->subexp_to - entry->subexp_from;
2151 to_idx = str_idx + subexp_len;
2152 dst_node = (subexp_len ? dfa->nexts[node]
2153 : dfa->edests[node].elems[0]);
2154
2155 if (to_idx > sctx->last_str_idx
2156 || sctx->sifted_states[to_idx] == NULL
2157 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2158 || check_dst_limits (mctx, &sctx->limits, node,
2159 str_idx, dst_node, to_idx))
2160 continue;
2161
2162 if (local_sctx.sifted_states == NULL)
2163 {
2164 local_sctx = *sctx;
2165 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2166 if (BE (err != REG_NOERROR, 0))
2167 goto free_return;
2168 }
2169 local_sctx.last_node = node;
2170 local_sctx.last_str_idx = str_idx;
2171 ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2172 if (BE (! ok, 0))
2173 {
2174 err = REG_ESPACE;
2175 goto free_return;
2176 }
2177 cur_state = local_sctx.sifted_states[str_idx];
2178 err = sift_states_backward (mctx, &local_sctx);
2179 if (BE (err != REG_NOERROR, 0))
2180 goto free_return;
2181 if (sctx->limited_states != NULL)
2182 {
2183 err = merge_state_array (dfa, sctx->limited_states,
2184 local_sctx.sifted_states,
2185 str_idx + 1);
2186 if (BE (err != REG_NOERROR, 0))
2187 goto free_return;
2188 }
2189 local_sctx.sifted_states[str_idx] = cur_state;
2190 re_node_set_remove (&local_sctx.limits, enabled_idx);
2191
2192 /* mctx->bkref_ents may have changed, reload the pointer. */
2193 entry = mctx->bkref_ents + enabled_idx;
2194 }
2195 while (enabled_idx++, entry++->more);
2196 }
2197 err = REG_NOERROR;
2198 free_return:
2199 if (local_sctx.sifted_states != NULL)
2200 {
2201 re_node_set_free (&local_sctx.limits);
2202 }
2203
2204 return err;
2205}
2206
2207
2208#ifdef RE_ENABLE_I18N
2209static int
2210internal_function
2211sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2212 Idx node_idx, Idx str_idx, Idx max_str_idx)
2213{
2214 re_dfa_t *const dfa = mctx->dfa;
2215 int naccepted;
2216 /* Check the node can accept `multi byte'. */
2217 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2218 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2219 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2220 dfa->nexts[node_idx]))
2221 /* The node can't accept the `multi byte', or the
2222 destination was already thrown away, then the node
2223 could't accept the current input `multi byte'. */
2224 naccepted = 0;
2225 /* Otherwise, it is sure that the node could accept
2226 `naccepted' bytes input. */
2227 return naccepted;
2228}
2229#endif /* RE_ENABLE_I18N */
2230
2231
2232/* Functions for state transition. */
2233
2234/* Return the next state to which the current state STATE will transit by
2235 accepting the current input byte, and update STATE_LOG if necessary.
2236 If STATE can accept a multibyte char/collating element/back reference
2237 update the destination of STATE_LOG. */
2238
2239static re_dfastate_t *
2240internal_function
2241transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2242 re_dfastate_t *state)
2243{
2244 re_dfastate_t **trtable;
2245 unsigned char ch;
2246
2247#ifdef RE_ENABLE_I18N
2248 /* If the current state can accept multibyte. */
2249 if (BE (state->accept_mb, 0))
2250 {
2251 *err = transit_state_mb (mctx, state);
2252 if (BE (*err != REG_NOERROR, 0))
2253 return NULL;
2254 }
2255#endif /* RE_ENABLE_I18N */
2256
2257 /* Then decide the next state with the single byte. */
2258#if 0
2259 if (0)
2260 /* don't use transition table */
2261 return transit_state_sb (err, mctx, state);
2262#endif
2263
2264 /* Use transition table */
2265 ch = re_string_fetch_byte (&mctx->input);
2266 for (;;)
2267 {
2268 trtable = state->trtable;
2269 if (BE (trtable != NULL, 1))
2270 return trtable[ch];
2271
2272 trtable = state->word_trtable;
2273 if (BE (trtable != NULL, 1))
2274 {
2275 unsigned int context;
2276 context
2277 = re_string_context_at (&mctx->input,
2278 re_string_cur_idx (&mctx->input) - 1,
2279 mctx->eflags);
2280 if (IS_WORD_CONTEXT (context))
2281 return trtable[ch + SBC_MAX];
2282 else
2283 return trtable[ch];
2284 }
2285
2286 if (!build_trtable (mctx->dfa, state))
2287 {
2288 *err = REG_ESPACE;
2289 return NULL;
2290 }
2291
2292 /* Retry, we now have a transition table. */
2293 }
2294}
2295
2296/* Update the state_log if we need */
2297re_dfastate_t *
2298internal_function
2299merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2300 re_dfastate_t *next_state)
2301{
2302 re_dfa_t *const dfa = mctx->dfa;
2303 Idx cur_idx = re_string_cur_idx (&mctx->input);
2304
2305 if (cur_idx > mctx->state_log_top)
2306 {
2307 mctx->state_log[cur_idx] = next_state;
2308 mctx->state_log_top = cur_idx;
2309 }
2310 else if (mctx->state_log[cur_idx] == 0)
2311 {
2312 mctx->state_log[cur_idx] = next_state;
2313 }
2314 else
2315 {
2316 re_dfastate_t *pstate;
2317 unsigned int context;
2318 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2319 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2320 the destination of a multibyte char/collating element/
2321 back reference. Then the next state is the union set of
2322 these destinations and the results of the transition table. */
2323 pstate = mctx->state_log[cur_idx];
2324 log_nodes = pstate->entrance_nodes;
2325 if (next_state != NULL)
2326 {
2327 table_nodes = next_state->entrance_nodes;
2328 *err = re_node_set_init_union (&next_nodes, table_nodes,
2329 log_nodes);
2330 if (BE (*err != REG_NOERROR, 0))
2331 return NULL;
2332 }
2333 else
2334 next_nodes = *log_nodes;
2335 /* Note: We already add the nodes of the initial state,
2336 then we don't need to add them here. */
2337
2338 context = re_string_context_at (&mctx->input,
2339 re_string_cur_idx (&mctx->input) - 1,
2340 mctx->eflags);
2341 next_state = mctx->state_log[cur_idx]
2342 = re_acquire_state_context (err, dfa, &next_nodes, context);
2343 /* We don't need to check errors here, since the return value of
2344 this function is next_state and ERR is already set. */
2345
2346 if (table_nodes != NULL)
2347 re_node_set_free (&next_nodes);
2348 }
2349
2350 if (BE (dfa->nbackref, 0) && next_state != NULL)
2351 {
2352 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2353 later. We must check them here, since the back references in the
2354 next state might use them. */
2355 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2356 cur_idx);
2357 if (BE (*err != REG_NOERROR, 0))
2358 return NULL;
2359
2360 /* If the next state has back references. */
2361 if (next_state->has_backref)
2362 {
2363 *err = transit_state_bkref (mctx, &next_state->nodes);
2364 if (BE (*err != REG_NOERROR, 0))
2365 return NULL;
2366 next_state = mctx->state_log[cur_idx];
2367 }
2368 }
2369
2370 return next_state;
2371}
2372
2373/* Skip bytes in the input that correspond to part of a
2374 multi-byte match, then look in the log for a state
2375 from which to restart matching. */
2376static re_dfastate_t *
2377internal_function
2378find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2379{
2380 re_dfastate_t *cur_state = NULL;
2381 do
2382 {
2383 Idx max = mctx->state_log_top;
2384 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2385
2386 do
2387 {
2388 if (++cur_str_idx > max)
2389 return NULL;
2390 re_string_skip_bytes (&mctx->input, 1);
2391 }
2392 while (mctx->state_log[cur_str_idx] == NULL);
2393
2394 cur_state = merge_state_with_log (err, mctx, NULL);
2395 }
2396 while (*err == REG_NOERROR && cur_state == NULL);
2397 return cur_state;
2398}
2399
2400/* Helper functions for transit_state. */
2401
2402/* From the node set CUR_NODES, pick up the nodes whose types are
2403 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2404 expression. And register them to use them later for evaluating the
2405 correspoding back references. */
2406
2407static reg_errcode_t
2408internal_function
2409check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2410 Idx str_idx)
2411{
2412 re_dfa_t *const dfa = mctx->dfa;
2413 Idx node_idx;
2414 reg_errcode_t err;
2415
2416 /* TODO: This isn't efficient.
2417 Because there might be more than one nodes whose types are
2418 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2419 nodes.
2420 E.g. RE: (a){2} */
2421 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2422 {
2423 Idx node = cur_nodes->elems[node_idx];
2424 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2425 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2426 && (dfa->used_bkref_map
2427 & ((bitset_word) 1 << dfa->nodes[node].opr.idx)))
2428 {
2429 err = match_ctx_add_subtop (mctx, node, str_idx);
2430 if (BE (err != REG_NOERROR, 0))
2431 return err;
2432 }
2433 }
2434 return REG_NOERROR;
2435}
2436
2437#if 0
2438/* Return the next state to which the current state STATE will transit by
2439 accepting the current input byte. */
2440
2441static re_dfastate_t *
2442transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2443 re_dfastate_t *state)
2444{
2445 re_dfa_t *const dfa = mctx->dfa;
2446 re_node_set next_nodes;
2447 re_dfastate_t *next_state;
2448 Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2449 unsigned int context;
2450
2451 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2452 if (BE (*err != REG_NOERROR, 0))
2453 return NULL;
2454 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2455 {
2456 Idx cur_node = state->nodes.elems[node_cnt];
2457 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2458 {
2459 *err = re_node_set_merge (&next_nodes,
2460 dfa->eclosures + dfa->nexts[cur_node]);
2461 if (BE (*err != REG_NOERROR, 0))
2462 {
2463 re_node_set_free (&next_nodes);
2464 return NULL;
2465 }
2466 }
2467 }
2468 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2469 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2470 /* We don't need to check errors here, since the return value of
2471 this function is next_state and ERR is already set. */
2472
2473 re_node_set_free (&next_nodes);
2474 re_string_skip_bytes (&mctx->input, 1);
2475 return next_state;
2476}
2477#endif
2478
2479#ifdef RE_ENABLE_I18N
2480static reg_errcode_t
2481internal_function
2482transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2483{
2484 re_dfa_t *const dfa = mctx->dfa;
2485 reg_errcode_t err;
2486 Idx i;
2487
2488 for (i = 0; i < pstate->nodes.nelem; ++i)
2489 {
2490 re_node_set dest_nodes, *new_nodes;
2491 Idx cur_node_idx = pstate->nodes.elems[i];
2492 int naccepted;
2493 Idx dest_idx;
2494 unsigned int context;
2495 re_dfastate_t *dest_state;
2496
2497 if (!dfa->nodes[cur_node_idx].accept_mb)
2498 continue;
2499
2500 if (dfa->nodes[cur_node_idx].constraint)
2501 {
2502 context = re_string_context_at (&mctx->input,
2503 re_string_cur_idx (&mctx->input),
2504 mctx->eflags);
2505 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2506 context))
2507 continue;
2508 }
2509
2510 /* How many bytes the node can accept? */
2511 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2512 re_string_cur_idx (&mctx->input));
2513 if (naccepted == 0)
2514 continue;
2515
2516 /* The node can accepts `naccepted' bytes. */
2517 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2518 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2519 : mctx->max_mb_elem_len);
2520 err = clean_state_log_if_needed (mctx, dest_idx);
2521 if (BE (err != REG_NOERROR, 0))
2522 return err;
2523#ifdef DEBUG
2524 assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2525#endif
2526 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2527
2528 dest_state = mctx->state_log[dest_idx];
2529 if (dest_state == NULL)
2530 dest_nodes = *new_nodes;
2531 else
2532 {
2533 err = re_node_set_init_union (&dest_nodes,
2534 dest_state->entrance_nodes, new_nodes);
2535 if (BE (err != REG_NOERROR, 0))
2536 return err;
2537 }
2538 context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2539 mctx->state_log[dest_idx]
2540 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2541 if (dest_state != NULL)
2542 re_node_set_free (&dest_nodes);
2543 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2544 return err;
2545 }
2546 return REG_NOERROR;
2547}
2548#endif /* RE_ENABLE_I18N */
2549
2550static reg_errcode_t
2551internal_function
2552transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2553{
2554 re_dfa_t *const dfa = mctx->dfa;
2555 reg_errcode_t err;
2556 Idx i;
2557 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2558
2559 for (i = 0; i < nodes->nelem; ++i)
2560 {
2561 Idx dest_str_idx, prev_nelem, bkc_idx;
2562 Idx node_idx = nodes->elems[i];
2563 unsigned int context;
2564 const re_token_t *node = dfa->nodes + node_idx;
2565 re_node_set *new_dest_nodes;
2566
2567 /* Check whether `node' is a backreference or not. */
2568 if (node->type != OP_BACK_REF)
2569 continue;
2570
2571 if (node->constraint)
2572 {
2573 context = re_string_context_at (&mctx->input, cur_str_idx,
2574 mctx->eflags);
2575 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2576 continue;
2577 }
2578
2579 /* `node' is a backreference.
2580 Check the substring which the substring matched. */
2581 bkc_idx = mctx->nbkref_ents;
2582 err = get_subexp (mctx, node_idx, cur_str_idx);
2583 if (BE (err != REG_NOERROR, 0))
2584 goto free_return;
2585
2586 /* And add the epsilon closures (which is `new_dest_nodes') of
2587 the backreference to appropriate state_log. */
2588#ifdef DEBUG
2589 assert (dfa->nexts[node_idx] != REG_MISSING);
2590#endif
2591 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2592 {
2593 Idx subexp_len;
2594 re_dfastate_t *dest_state;
2595 struct re_backref_cache_entry *bkref_ent;
2596 bkref_ent = mctx->bkref_ents + bkc_idx;
2597 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2598 continue;
2599 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2600 new_dest_nodes = (subexp_len == 0
2601 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2602 : dfa->eclosures + dfa->nexts[node_idx]);
2603 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2604 - bkref_ent->subexp_from);
2605 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2606 mctx->eflags);
2607 dest_state = mctx->state_log[dest_str_idx];
2608 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2609 : mctx->state_log[cur_str_idx]->nodes.nelem);
2610 /* Add `new_dest_node' to state_log. */
2611 if (dest_state == NULL)
2612 {
2613 mctx->state_log[dest_str_idx]
2614 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2615 context);
2616 if (BE (mctx->state_log[dest_str_idx] == NULL
2617 && err != REG_NOERROR, 0))
2618 goto free_return;
2619 }
2620 else
2621 {
2622 re_node_set dest_nodes;
2623 err = re_node_set_init_union (&dest_nodes,
2624 dest_state->entrance_nodes,
2625 new_dest_nodes);
2626 if (BE (err != REG_NOERROR, 0))
2627 {
2628 re_node_set_free (&dest_nodes);
2629 goto free_return;
2630 }
2631 mctx->state_log[dest_str_idx]
2632 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2633 re_node_set_free (&dest_nodes);
2634 if (BE (mctx->state_log[dest_str_idx] == NULL
2635 && err != REG_NOERROR, 0))
2636 goto free_return;
2637 }
2638 /* We need to check recursively if the backreference can epsilon
2639 transit. */
2640 if (subexp_len == 0
2641 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2642 {
2643 err = check_subexp_matching_top (mctx, new_dest_nodes,
2644 cur_str_idx);
2645 if (BE (err != REG_NOERROR, 0))
2646 goto free_return;
2647 err = transit_state_bkref (mctx, new_dest_nodes);
2648 if (BE (err != REG_NOERROR, 0))
2649 goto free_return;
2650 }
2651 }
2652 }
2653 err = REG_NOERROR;
2654 free_return:
2655 return err;
2656}
2657
2658/* Enumerate all the candidates which the backreference BKREF_NODE can match
2659 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2660 Note that we might collect inappropriate candidates here.
2661 However, the cost of checking them strictly here is too high, then we
2662 delay these checking for prune_impossible_nodes(). */
2663
2664static reg_errcode_t
2665internal_function
2666get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2667{
2668 re_dfa_t *const dfa = mctx->dfa;
2669 Idx subexp_num, sub_top_idx;
2670 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2671 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2672 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2673 if (cache_idx != REG_MISSING)
2674 {
2675 const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2676 do
2677 if (entry->node == bkref_node)
2678 return REG_NOERROR; /* We already checked it. */
2679 while (entry++->more);
2680 }
2681
2682 subexp_num = dfa->nodes[bkref_node].opr.idx;
2683
2684 /* For each sub expression */
2685 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2686 {
2687 reg_errcode_t err;
2688 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2689 re_sub_match_last_t *sub_last;
2690 Idx sub_last_idx, sl_str, bkref_str_off;
2691
2692 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2693 continue; /* It isn't related. */
2694
2695 sl_str = sub_top->str_idx;
2696 bkref_str_off = bkref_str_idx;
2697 /* At first, check the last node of sub expressions we already
2698 evaluated. */
2699 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2700 {
2701 regoff_t sl_str_diff;
2702 sub_last = sub_top->lasts[sub_last_idx];
2703 sl_str_diff = sub_last->str_idx - sl_str;
2704 /* The matched string by the sub expression match with the substring
2705 at the back reference? */
2706 if (sl_str_diff > 0)
2707 {
2708 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2709 {
2710 /* Not enough chars for a successful match. */
2711 if (bkref_str_off + sl_str_diff > mctx->input.len)
2712 break;
2713
2714 err = clean_state_log_if_needed (mctx,
2715 bkref_str_off
2716 + sl_str_diff);
2717 if (BE (err != REG_NOERROR, 0))
2718 return err;
2719 buf = (const char *) re_string_get_buffer (&mctx->input);
2720 }
2721 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2722 break; /* We don't need to search this sub expression any more. */
2723 }
2724 bkref_str_off += sl_str_diff;
2725 sl_str += sl_str_diff;
2726 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2727 bkref_str_idx);
2728
2729 /* Reload buf, since the preceding call might have reallocated
2730 the buffer. */
2731 buf = (const char *) re_string_get_buffer (&mctx->input);
2732
2733 if (err == REG_NOMATCH)
2734 continue;
2735 if (BE (err != REG_NOERROR, 0))
2736 return err;
2737 }
2738
2739 if (sub_last_idx < sub_top->nlasts)
2740 continue;
2741 if (sub_last_idx > 0)
2742 ++sl_str;
2743 /* Then, search for the other last nodes of the sub expression. */
2744 for (; sl_str <= bkref_str_idx; ++sl_str)
2745 {
2746 Idx cls_node;
2747 regoff_t sl_str_off;
2748 const re_node_set *nodes;
2749 sl_str_off = sl_str - sub_top->str_idx;
2750 /* The matched string by the sub expression match with the substring
2751 at the back reference? */
2752 if (sl_str_off > 0)
2753 {
2754 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2755 {
2756 /* If we are at the end of the input, we cannot match. */
2757 if (bkref_str_off >= mctx->input.len)
2758 break;
2759
2760 err = extend_buffers (mctx);
2761 if (BE (err != REG_NOERROR, 0))
2762 return err;
2763
2764 buf = (const char *) re_string_get_buffer (&mctx->input);
2765 }
2766 if (buf [bkref_str_off++] != buf[sl_str - 1])
2767 break; /* We don't need to search this sub expression
2768 any more. */
2769 }
2770 if (mctx->state_log[sl_str] == NULL)
2771 continue;
2772 /* Does this state have a ')' of the sub expression? */
2773 nodes = &mctx->state_log[sl_str]->nodes;
2774 cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2775 if (cls_node == REG_MISSING)
2776 continue; /* No. */
2777 if (sub_top->path == NULL)
2778 {
2779 sub_top->path = re_calloc (state_array_t,
2780 sl_str - sub_top->str_idx + 1);
2781 if (sub_top->path == NULL)
2782 return REG_ESPACE;
2783 }
2784 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2785 in the current context? */
2786 err = check_arrival (mctx, sub_top->path, sub_top->node,
2787 sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2788 if (err == REG_NOMATCH)
2789 continue;
2790 if (BE (err != REG_NOERROR, 0))
2791 return err;
2792 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2793 if (BE (sub_last == NULL, 0))
2794 return REG_ESPACE;
2795 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2796 bkref_str_idx);
2797 if (err == REG_NOMATCH)
2798 continue;
2799 }
2800 }
2801 return REG_NOERROR;
2802}
2803
2804/* Helper functions for get_subexp(). */
2805
2806/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2807 If it can arrive, register the sub expression expressed with SUB_TOP
2808 and SUB_LAST. */
2809
2810static reg_errcode_t
2811internal_function
2812get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2813 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2814{
2815 reg_errcode_t err;
2816 Idx to_idx;
2817 /* Can the subexpression arrive the back reference? */
2818 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2819 sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2820 if (err != REG_NOERROR)
2821 return err;
2822 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2823 sub_last->str_idx);
2824 if (BE (err != REG_NOERROR, 0))
2825 return err;
2826 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2827 return clean_state_log_if_needed (mctx, to_idx);
2828}
2829
2830/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2831 Search '(' if FL_OPEN, or search ')' otherwise.
2832 TODO: This function isn't efficient...
2833 Because there might be more than one nodes whose types are
2834 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2835 nodes.
2836 E.g. RE: (a){2} */
2837
2838static Idx
2839internal_function
2840find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2841 Idx subexp_idx, int type)
2842{
2843 Idx cls_idx;
2844 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2845 {
2846 Idx cls_node = nodes->elems[cls_idx];
2847 const re_token_t *node = dfa->nodes + cls_node;
2848 if (node->type == type
2849 && node->opr.idx == subexp_idx)
2850 return cls_node;
2851 }
2852 return REG_MISSING;
2853}
2854
2855/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2856 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2857 heavily reused.
2858 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2859
2860static reg_errcode_t
2861internal_function
2862check_arrival (re_match_context_t *mctx, state_array_t *path,
2863 Idx top_node, Idx top_str, Idx last_node, Idx last_str,
2864 int type)
2865{
2866 re_dfa_t *const dfa = mctx->dfa;
2867 reg_errcode_t err;
2868 Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2869 re_dfastate_t *cur_state = NULL;
2870 re_node_set *cur_nodes, next_nodes;
2871 re_dfastate_t **backup_state_log;
2872 unsigned int context;
2873
2874 subexp_num = dfa->nodes[top_node].opr.idx;
2875 /* Extend the buffer if we need. */
2876 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2877 {
2878 re_dfastate_t **new_array;
2879 Idx old_alloc = path->alloc;
2880 Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2881 if (BE (new_alloc < old_alloc, 0))
2882 return REG_ESPACE;
2883 new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc);
2884 if (BE (new_array == NULL, 0))
2885 return REG_ESPACE;
2886 path->array = new_array;
2887 path->alloc = new_alloc;
2888 memset (new_array + old_alloc, '\0',
2889 sizeof (re_dfastate_t *) * (new_alloc - old_alloc));
2890 }
2891
2892 str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2893
2894 /* Temporary modify MCTX. */
2895 backup_state_log = mctx->state_log;
2896 backup_cur_idx = mctx->input.cur_idx;
2897 mctx->state_log = path->array;
2898 mctx->input.cur_idx = str_idx;
2899
2900 /* Setup initial node set. */
2901 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2902 if (str_idx == top_str)
2903 {
2904 err = re_node_set_init_1 (&next_nodes, top_node);
2905 if (BE (err != REG_NOERROR, 0))
2906 return err;
2907 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2908 if (BE (err != REG_NOERROR, 0))
2909 {
2910 re_node_set_free (&next_nodes);
2911 return err;
2912 }
2913 }
2914 else
2915 {
2916 cur_state = mctx->state_log[str_idx];
2917 if (cur_state && cur_state->has_backref)
2918 {
2919 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2920 if (BE ( err != REG_NOERROR, 0))
2921 return err;
2922 }
2923 else
2924 re_node_set_init_empty (&next_nodes);
2925 }
2926 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2927 {
2928 if (next_nodes.nelem)
2929 {
2930 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2931 subexp_num, type);
2932 if (BE ( err != REG_NOERROR, 0))
2933 {
2934 re_node_set_free (&next_nodes);
2935 return err;
2936 }
2937 }
2938 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2939 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2940 {
2941 re_node_set_free (&next_nodes);
2942 return err;
2943 }
2944 mctx->state_log[str_idx] = cur_state;
2945 }
2946
2947 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2948 {
2949 re_node_set_empty (&next_nodes);
2950 if (mctx->state_log[str_idx + 1])
2951 {
2952 err = re_node_set_merge (&next_nodes,
2953 &mctx->state_log[str_idx + 1]->nodes);
2954 if (BE (err != REG_NOERROR, 0))
2955 {
2956 re_node_set_free (&next_nodes);
2957 return err;
2958 }
2959 }
2960 if (cur_state)
2961 {
2962 err = check_arrival_add_next_nodes (mctx, str_idx,
2963 &cur_state->non_eps_nodes, &next_nodes);
2964 if (BE (err != REG_NOERROR, 0))
2965 {
2966 re_node_set_free (&next_nodes);
2967 return err;
2968 }
2969 }
2970 ++str_idx;
2971 if (next_nodes.nelem)
2972 {
2973 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2974 if (BE (err != REG_NOERROR, 0))
2975 {
2976 re_node_set_free (&next_nodes);
2977 return err;
2978 }
2979 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2980 subexp_num, type);
2981 if (BE ( err != REG_NOERROR, 0))
2982 {
2983 re_node_set_free (&next_nodes);
2984 return err;
2985 }
2986 }
2987 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2988 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2989 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2990 {
2991 re_node_set_free (&next_nodes);
2992 return err;
2993 }
2994 mctx->state_log[str_idx] = cur_state;
2995 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2996 }
2997 re_node_set_free (&next_nodes);
2998 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2999 : &mctx->state_log[last_str]->nodes);
3000 path->next_idx = str_idx;
3001
3002 /* Fix MCTX. */
3003 mctx->state_log = backup_state_log;
3004 mctx->input.cur_idx = backup_cur_idx;
3005
3006 /* Then check the current node set has the node LAST_NODE. */
3007 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3008 return REG_NOERROR;
3009
3010 return REG_NOMATCH;
3011}
3012
3013/* Helper functions for check_arrival. */
3014
3015/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3016 to NEXT_NODES.
3017 TODO: This function is similar to the functions transit_state*(),
3018 however this function has many additional works.
3019 Can't we unify them? */
3020
3021static reg_errcode_t
3022internal_function
3023check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3024 re_node_set *cur_nodes,
3025 re_node_set *next_nodes)
3026{
3027 re_dfa_t *const dfa = mctx->dfa;
3028 bool ok;
3029 Idx cur_idx;
3030 reg_errcode_t err;
3031 re_node_set union_set;
3032 re_node_set_init_empty (&union_set);
3033 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3034 {
3035 int naccepted = 0;
3036 Idx cur_node = cur_nodes->elems[cur_idx];
3037#ifdef DEBUG
3038 re_token_type_t type = dfa->nodes[cur_node].type;
3039 assert (!IS_EPSILON_NODE (type));
3040#endif
3041#ifdef RE_ENABLE_I18N
3042 /* If the node may accept `multi byte'. */
3043 if (dfa->nodes[cur_node].accept_mb)
3044 {
3045 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3046 str_idx);
3047 if (naccepted > 1)
3048 {
3049 re_dfastate_t *dest_state;
3050 Idx next_node = dfa->nexts[cur_node];
3051 Idx next_idx = str_idx + naccepted;
3052 dest_state = mctx->state_log[next_idx];
3053 re_node_set_empty (&union_set);
3054 if (dest_state)
3055 {
3056 err = re_node_set_merge (&union_set, &dest_state->nodes);
3057 if (BE (err != REG_NOERROR, 0))
3058 {
3059 re_node_set_free (&union_set);
3060 return err;
3061 }
3062 }
3063 ok = re_node_set_insert (&union_set, next_node);
3064 if (BE (! ok, 0))
3065 {
3066 re_node_set_free (&union_set);
3067 return REG_ESPACE;
3068 }
3069 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3070 &union_set);
3071 if (BE (mctx->state_log[next_idx] == NULL
3072 && err != REG_NOERROR, 0))
3073 {
3074 re_node_set_free (&union_set);
3075 return err;
3076 }
3077 }
3078 }
3079#endif /* RE_ENABLE_I18N */
3080 if (naccepted
3081 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3082 {
3083 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3084 if (BE (! ok, 0))
3085 {
3086 re_node_set_free (&union_set);
3087 return REG_ESPACE;
3088 }
3089 }
3090 }
3091 re_node_set_free (&union_set);
3092 return REG_NOERROR;
3093}
3094
3095/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3096 CUR_NODES, however exclude the nodes which are:
3097 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3098 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3099*/
3100
3101static reg_errcode_t
3102internal_function
3103check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
3104 Idx ex_subexp, int type)
3105{
3106 reg_errcode_t err;
3107 Idx idx, outside_node;
3108 re_node_set new_nodes;
3109#ifdef DEBUG
3110 assert (cur_nodes->nelem);
3111#endif
3112 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3113 if (BE (err != REG_NOERROR, 0))
3114 return err;
3115 /* Create a new node set NEW_NODES with the nodes which are epsilon
3116 closures of the node in CUR_NODES. */
3117
3118 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3119 {
3120 Idx cur_node = cur_nodes->elems[idx];
3121 re_node_set *eclosure = dfa->eclosures + cur_node;
3122 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3123 if (outside_node == REG_MISSING)
3124 {
3125 /* There are no problematic nodes, just merge them. */
3126 err = re_node_set_merge (&new_nodes, eclosure);
3127 if (BE (err != REG_NOERROR, 0))
3128 {
3129 re_node_set_free (&new_nodes);
3130 return err;
3131 }
3132 }
3133 else
3134 {
3135 /* There are problematic nodes, re-calculate incrementally. */
3136 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3137 ex_subexp, type);
3138 if (BE (err != REG_NOERROR, 0))
3139 {
3140 re_node_set_free (&new_nodes);
3141 return err;
3142 }
3143 }
3144 }
3145 re_node_set_free (cur_nodes);
3146 *cur_nodes = new_nodes;
3147 return REG_NOERROR;
3148}
3149
3150/* Helper function for check_arrival_expand_ecl.
3151 Check incrementally the epsilon closure of TARGET, and if it isn't
3152 problematic append it to DST_NODES. */
3153
3154static reg_errcode_t
3155internal_function
3156check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
3157 Idx target, Idx ex_subexp, int type)
3158{
3159 Idx cur_node;
3160 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3161 {
3162 bool ok;
3163
3164 if (dfa->nodes[cur_node].type == type
3165 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3166 {
3167 if (type == OP_CLOSE_SUBEXP)
3168 {
3169 ok = re_node_set_insert (dst_nodes, cur_node);
3170 if (BE (! ok, 0))
3171 return REG_ESPACE;
3172 }
3173 break;
3174 }
3175 ok = re_node_set_insert (dst_nodes, cur_node);
3176 if (BE (! ok, 0))
3177 return REG_ESPACE;
3178 if (dfa->edests[cur_node].nelem == 0)
3179 break;
3180 if (dfa->edests[cur_node].nelem == 2)
3181 {
3182 reg_errcode_t ret =
3183 check_arrival_expand_ecl_sub (dfa, dst_nodes,
3184 dfa->edests[cur_node].elems[1],
3185 ex_subexp, type);
3186 if (BE (ret != REG_NOERROR, 0))
3187 return ret;
3188 }
3189 cur_node = dfa->edests[cur_node].elems[0];
3190 }
3191 return REG_NOERROR;
3192}
3193
3194
3195/* For all the back references in the current state, calculate the
3196 destination of the back references by the appropriate entry
3197 in MCTX->BKREF_ENTS. */
3198
3199static reg_errcode_t
3200internal_function
3201expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3202 Idx cur_str, Idx subexp_num, int type)
3203{
3204 re_dfa_t *const dfa = mctx->dfa;
3205 reg_errcode_t err;
3206 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3207 struct re_backref_cache_entry *ent;
3208
3209 if (cache_idx_start == REG_MISSING)
3210 return REG_NOERROR;
3211
3212 restart:
3213 ent = mctx->bkref_ents + cache_idx_start;
3214 do
3215 {
3216 Idx to_idx, next_node;
3217
3218 /* Is this entry ENT is appropriate? */
3219 if (!re_node_set_contains (cur_nodes, ent->node))
3220 continue; /* No. */
3221
3222 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3223 /* Calculate the destination of the back reference, and append it
3224 to MCTX->STATE_LOG. */
3225 if (to_idx == cur_str)
3226 {
3227 /* The backreference did epsilon transit, we must re-check all the
3228 node in the current state. */
3229 re_node_set new_dests;
3230 reg_errcode_t err2, err3;
3231 next_node = dfa->edests[ent->node].elems[0];
3232 if (re_node_set_contains (cur_nodes, next_node))
3233 continue;
3234 err = re_node_set_init_1 (&new_dests, next_node);
3235 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3236 err3 = re_node_set_merge (cur_nodes, &new_dests);
3237 re_node_set_free (&new_dests);
3238 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3239 || err3 != REG_NOERROR, 0))
3240 {
3241 err = (err != REG_NOERROR ? err
3242 : (err2 != REG_NOERROR ? err2 : err3));
3243 return err;
3244 }
3245 /* TODO: It is still inefficient... */
3246 goto restart;
3247 }
3248 else
3249 {
3250 re_node_set union_set;
3251 next_node = dfa->nexts[ent->node];
3252 if (mctx->state_log[to_idx])
3253 {
3254 bool ok;
3255 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3256 next_node))
3257 continue;
3258 err = re_node_set_init_copy (&union_set,
3259 &mctx->state_log[to_idx]->nodes);
3260 ok = re_node_set_insert (&union_set, next_node);
3261 if (BE (err != REG_NOERROR || ! ok, 0))
3262 {
3263 re_node_set_free (&union_set);
3264 err = err != REG_NOERROR ? err : REG_ESPACE;
3265 return err;
3266 }
3267 }
3268 else
3269 {
3270 err = re_node_set_init_1 (&union_set, next_node);
3271 if (BE (err != REG_NOERROR, 0))
3272 return err;
3273 }
3274 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3275 re_node_set_free (&union_set);
3276 if (BE (mctx->state_log[to_idx] == NULL
3277 && err != REG_NOERROR, 0))
3278 return err;
3279 }
3280 }
3281 while (ent++->more);
3282 return REG_NOERROR;
3283}
3284
3285/* Build transition table for the state.
3286 Return true if successful. */
3287
3288static bool
3289internal_function
3290build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
3291{
3292 reg_errcode_t err;
3293 Idx i, j;
3294 int ch;
3295 bool need_word_trtable = false;
3296 bitset_word elem, mask;
3297 bool dests_node_malloced = false, dest_states_malloced = false;
3298 Idx ndests; /* Number of the destination states from `state'. */
3299 re_dfastate_t **trtable;
3300 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3301 re_node_set follows, *dests_node;
3302 bitset *dests_ch;
3303 bitset acceptable;
3304
3305 struct dests_alloc
3306 {
3307 re_node_set dests_node[SBC_MAX];
3308 bitset dests_ch[SBC_MAX];
3309 } *dests_alloc;
3310
3311 /* We build DFA states which corresponds to the destination nodes
3312 from `state'. `dests_node[i]' represents the nodes which i-th
3313 destination state contains, and `dests_ch[i]' represents the
3314 characters which i-th destination state accepts. */
3315 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3316 dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]);
3317 else
3318 {
3319 dests_alloc = re_malloc (struct dests_alloc, 1);
3320 if (BE (dests_alloc == NULL, 0))
3321 return false;
3322 dests_node_malloced = true;
3323 }
3324 dests_node = dests_alloc->dests_node;
3325 dests_ch = dests_alloc->dests_ch;
3326
3327 /* Initialize transiton table. */
3328 state->word_trtable = state->trtable = NULL;
3329
3330 /* At first, group all nodes belonging to `state' into several
3331 destinations. */
3332 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3333 if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3334 {
3335 if (dests_node_malloced)
3336 free (dests_alloc);
3337 if (ndests == 0)
3338 {
3339 state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3340 return true;
3341 }
3342 return false;
3343 }
3344
3345 err = re_node_set_alloc (&follows, ndests + 1);
3346 if (BE (err != REG_NOERROR, 0))
3347 goto out_free;
3348
3349 /* Avoid arithmetic overflow in size calculation. */
3350 if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)
3351 / (3 * sizeof (re_dfastate_t *)))
3352 < ndests, 0))
3353 goto out_free;
3354
3355 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3356 + ndests * 3 * sizeof (re_dfastate_t *)))
3357 dest_states = (re_dfastate_t **)
3358 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3359 else
3360 {
3361 dest_states = (re_dfastate_t **)
3362 malloc (ndests * 3 * sizeof (re_dfastate_t *));
3363 if (BE (dest_states == NULL, 0))
3364 {
3365out_free:
3366 if (dest_states_malloced)
3367 free (dest_states);
3368 re_node_set_free (&follows);
3369 for (i = 0; i < ndests; ++i)
3370 re_node_set_free (dests_node + i);
3371 if (dests_node_malloced)
3372 free (dests_alloc);
3373 return false;
3374 }
3375 dest_states_malloced = true;
3376 }
3377 dest_states_word = dest_states + ndests;
3378 dest_states_nl = dest_states_word + ndests;
3379 bitset_empty (acceptable);
3380
3381 /* Then build the states for all destinations. */
3382 for (i = 0; i < ndests; ++i)
3383 {
3384 Idx next_node;
3385 re_node_set_empty (&follows);
3386 /* Merge the follows of this destination states. */
3387 for (j = 0; j < dests_node[i].nelem; ++j)
3388 {
3389 next_node = dfa->nexts[dests_node[i].elems[j]];
3390 if (next_node != REG_MISSING)
3391 {
3392 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3393 if (BE (err != REG_NOERROR, 0))
3394 goto out_free;
3395 }
3396 }
3397 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3398 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3399 goto out_free;
3400 /* If the new state has context constraint,
3401 build appropriate states for these contexts. */
3402 if (dest_states[i]->has_constraint)
3403 {
3404 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3405 CONTEXT_WORD);
3406 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3407 goto out_free;
3408
3409 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3410 need_word_trtable = true;
3411
3412 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3413 CONTEXT_NEWLINE);
3414 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3415 goto out_free;
3416 }
3417 else
3418 {
3419 dest_states_word[i] = dest_states[i];
3420 dest_states_nl[i] = dest_states[i];
3421 }
3422 bitset_merge (acceptable, dests_ch[i]);
3423 }
3424
3425 if (!BE (need_word_trtable, 0))
3426 {
3427 /* We don't care about whether the following character is a word
3428 character, or we are in a single-byte character set so we can
3429 discern by looking at the character code: allocate a
3430 256-entry transition table. */
3431 trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3432 if (BE (trtable == NULL, 0))
3433 goto out_free;
3434
3435 /* For all characters ch...: */
3436 for (i = 0; i < BITSET_WORDS; ++i)
3437 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3438 elem;
3439 mask <<= 1, elem >>= 1, ++ch)
3440 if (BE (elem & 1, 0))
3441 {
3442 /* There must be exactly one destination which accepts
3443 character ch. See group_nodes_into_DFAstates. */
3444 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3445 ;
3446
3447 /* j-th destination accepts the word character ch. */
3448 if (dfa->word_char[i] & mask)
3449 trtable[ch] = dest_states_word[j];
3450 else
3451 trtable[ch] = dest_states[j];
3452 }
3453 }
3454 else
3455 {
3456 /* We care about whether the following character is a word
3457 character, and we are in a multi-byte character set: discern
3458 by looking at the character code: build two 256-entry
3459 transition tables, one starting at trtable[0] and one
3460 starting at trtable[SBC_MAX]. */
3461 trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
3462 if (BE (trtable == NULL, 0))
3463 goto out_free;
3464
3465 /* For all characters ch...: */
3466 for (i = 0; i < BITSET_WORDS; ++i)
3467 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3468 elem;
3469 mask <<= 1, elem >>= 1, ++ch)
3470 if (BE (elem & 1, 0))
3471 {
3472 /* There must be exactly one destination which accepts
3473 character ch. See group_nodes_into_DFAstates. */
3474 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3475 ;
3476
3477 /* j-th destination accepts the word character ch. */
3478 trtable[ch] = dest_states[j];
3479 trtable[ch + SBC_MAX] = dest_states_word[j];
3480 }
3481 }
3482
3483 /* new line */
3484 if (bitset_contain (acceptable, NEWLINE_CHAR))
3485 {
3486 /* The current state accepts newline character. */
3487 for (j = 0; j < ndests; ++j)
3488 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3489 {
3490 /* k-th destination accepts newline character. */
3491 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3492 if (need_word_trtable)
3493 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3494 /* There must be only one destination which accepts
3495 newline. See group_nodes_into_DFAstates. */
3496 break;
3497 }
3498 }
3499
3500 if (dest_states_malloced)
3501 free (dest_states);
3502
3503 re_node_set_free (&follows);
3504 for (i = 0; i < ndests; ++i)
3505 re_node_set_free (dests_node + i);
3506
3507 if (dests_node_malloced)
3508 free (dests_alloc);
3509
3510 return true;
3511}
3512
3513/* Group all nodes belonging to STATE into several destinations.
3514 Then for all destinations, set the nodes belonging to the destination
3515 to DESTS_NODE[i] and set the characters accepted by the destination
3516 to DEST_CH[i]. This function return the number of destinations. */
3517
3518static Idx
3519internal_function
3520group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3521 re_node_set *dests_node, bitset *dests_ch)
3522{
3523 reg_errcode_t err;
3524 bool ok;
3525 Idx i, j, k;
3526 Idx ndests; /* Number of the destinations from `state'. */
3527 bitset accepts; /* Characters a node can accept. */
3528 const re_node_set *cur_nodes = &state->nodes;
3529 bitset_empty (accepts);
3530 ndests = 0;
3531
3532 /* For all the nodes belonging to `state', */
3533 for (i = 0; i < cur_nodes->nelem; ++i)
3534 {
3535 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3536 re_token_type_t type = node->type;
3537 unsigned int constraint = node->constraint;
3538
3539 /* Enumerate all single byte character this node can accept. */
3540 if (type == CHARACTER)
3541 bitset_set (accepts, node->opr.c);
3542 else if (type == SIMPLE_BRACKET)
3543 {
3544 bitset_merge (accepts, node->opr.sbcset);
3545 }
3546 else if (type == OP_PERIOD)
3547 {
3548#ifdef RE_ENABLE_I18N
3549 if (dfa->mb_cur_max > 1)
3550 bitset_merge (accepts, dfa->sb_char);
3551 else
3552#endif
3553 bitset_set_all (accepts);
3554 if (!(dfa->syntax & REG_DOT_NEWLINE))
3555 bitset_clear (accepts, '\n');
3556 if (dfa->syntax & REG_DOT_NOT_NULL)
3557 bitset_clear (accepts, '\0');
3558 }
3559#ifdef RE_ENABLE_I18N
3560 else if (type == OP_UTF8_PERIOD)
3561 {
3562 if (SBC_MAX / 2 % BITSET_WORD_BITS == 0)
3563 memset (accepts, -1, sizeof accepts / 2);
3564 else
3565 bitset_merge (accepts, utf8_sb_map);
3566 if (!(dfa->syntax & REG_DOT_NEWLINE))
3567 bitset_clear (accepts, '\n');
3568 if (dfa->syntax & REG_DOT_NOT_NULL)
3569 bitset_clear (accepts, '\0');
3570 }
3571#endif
3572 else
3573 continue;
3574
3575 /* Check the `accepts' and sift the characters which are not
3576 match it the context. */
3577 if (constraint)
3578 {
3579 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3580 {
3581 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3582 bitset_empty (accepts);
3583 if (accepts_newline)
3584 bitset_set (accepts, NEWLINE_CHAR);
3585 else
3586 continue;
3587 }
3588 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3589 {
3590 bitset_empty (accepts);
3591 continue;
3592 }
3593
3594 if (constraint & NEXT_WORD_CONSTRAINT)
3595 {
3596 bitset_word any_set = 0;
3597 if (type == CHARACTER && !node->word_char)
3598 {
3599 bitset_empty (accepts);
3600 continue;
3601 }
3602#ifdef RE_ENABLE_I18N
3603 if (dfa->mb_cur_max > 1)
3604 for (j = 0; j < BITSET_WORDS; ++j)
3605 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3606 else
3607#endif
3608 for (j = 0; j < BITSET_WORDS; ++j)
3609 any_set |= (accepts[j] &= dfa->word_char[j]);
3610 if (!any_set)
3611 continue;
3612 }
3613 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3614 {
3615 bitset_word any_set = 0;
3616 if (type == CHARACTER && node->word_char)
3617 {
3618 bitset_empty (accepts);
3619 continue;
3620 }
3621#ifdef RE_ENABLE_I18N
3622 if (dfa->mb_cur_max > 1)
3623 for (j = 0; j < BITSET_WORDS; ++j)
3624 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3625 else
3626#endif
3627 for (j = 0; j < BITSET_WORDS; ++j)
3628 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3629 if (!any_set)
3630 continue;
3631 }
3632 }
3633
3634 /* Then divide `accepts' into DFA states, or create a new
3635 state. Above, we make sure that accepts is not empty. */
3636 for (j = 0; j < ndests; ++j)
3637 {
3638 bitset intersec; /* Intersection sets, see below. */
3639 bitset remains;
3640 /* Flags, see below. */
3641 bitset_word has_intersec, not_subset, not_consumed;
3642
3643 /* Optimization, skip if this state doesn't accept the character. */
3644 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3645 continue;
3646
3647 /* Enumerate the intersection set of this state and `accepts'. */
3648 has_intersec = 0;
3649 for (k = 0; k < BITSET_WORDS; ++k)
3650 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3651 /* And skip if the intersection set is empty. */
3652 if (!has_intersec)
3653 continue;
3654
3655 /* Then check if this state is a subset of `accepts'. */
3656 not_subset = not_consumed = 0;
3657 for (k = 0; k < BITSET_WORDS; ++k)
3658 {
3659 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3660 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3661 }
3662
3663 /* If this state isn't a subset of `accepts', create a
3664 new group state, which has the `remains'. */
3665 if (not_subset)
3666 {
3667 bitset_copy (dests_ch[ndests], remains);
3668 bitset_copy (dests_ch[j], intersec);
3669 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3670 if (BE (err != REG_NOERROR, 0))
3671 goto error_return;
3672 ++ndests;
3673 }
3674
3675 /* Put the position in the current group. */
3676 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3677 if (BE (! ok, 0))
3678 goto error_return;
3679
3680 /* If all characters are consumed, go to next node. */
3681 if (!not_consumed)
3682 break;
3683 }
3684 /* Some characters remain, create a new group. */
3685 if (j == ndests)
3686 {
3687 bitset_copy (dests_ch[ndests], accepts);
3688 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3689 if (BE (err != REG_NOERROR, 0))
3690 goto error_return;
3691 ++ndests;
3692 bitset_empty (accepts);
3693 }
3694 }
3695 return ndests;
3696 error_return:
3697 for (j = 0; j < ndests; ++j)
3698 re_node_set_free (dests_node + j);
3699 return REG_MISSING;
3700}
3701
3702#ifdef RE_ENABLE_I18N
3703/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3704 Return the number of the bytes the node accepts.
3705 STR_IDX is the current index of the input string.
3706
3707 This function handles the nodes which can accept one character, or
3708 one collating element like '.', '[a-z]', opposite to the other nodes
3709 can only accept one byte. */
3710
3711static int
3712internal_function
3713check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
3714 const re_string_t *input, Idx str_idx)
3715{
3716 const re_token_t *node = dfa->nodes + node_idx;
3717 int char_len, elem_len;
3718 Idx i;
3719
3720 if (BE (node->type == OP_UTF8_PERIOD, 0))
3721 {
3722 unsigned char c = re_string_byte_at (input, str_idx), d;
3723 if (BE (c < 0xc2, 1))
3724 return 0;
3725
3726 if (str_idx + 2 > input->len)
3727 return 0;
3728
3729 d = re_string_byte_at (input, str_idx + 1);
3730 if (c < 0xe0)
3731 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3732 else if (c < 0xf0)
3733 {
3734 char_len = 3;
3735 if (c == 0xe0 && d < 0xa0)
3736 return 0;
3737 }
3738 else if (c < 0xf8)
3739 {
3740 char_len = 4;
3741 if (c == 0xf0 && d < 0x90)
3742 return 0;
3743 }
3744 else if (c < 0xfc)
3745 {
3746 char_len = 5;
3747 if (c == 0xf8 && d < 0x88)
3748 return 0;
3749 }
3750 else if (c < 0xfe)
3751 {
3752 char_len = 6;
3753 if (c == 0xfc && d < 0x84)
3754 return 0;
3755 }
3756 else
3757 return 0;
3758
3759 if (str_idx + char_len > input->len)
3760 return 0;
3761
3762 for (i = 1; i < char_len; ++i)
3763 {
3764 d = re_string_byte_at (input, str_idx + i);
3765 if (d < 0x80 || d > 0xbf)
3766 return 0;
3767 }
3768 return char_len;
3769 }
3770
3771 char_len = re_string_char_size_at (input, str_idx);
3772 if (node->type == OP_PERIOD)
3773 {
3774 if (char_len <= 1)
3775 return 0;
3776 /* FIXME: I don't think this if is needed, as both '\n'
3777 and '\0' are char_len == 1. */
3778 /* '.' accepts any one character except the following two cases. */
3779 if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
3780 re_string_byte_at (input, str_idx) == '\n') ||
3781 ((dfa->syntax & REG_DOT_NOT_NULL) &&
3782 re_string_byte_at (input, str_idx) == '\0'))
3783 return 0;
3784 return char_len;
3785 }
3786
3787 elem_len = re_string_elem_size_at (input, str_idx);
3788 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3789 return 0;
3790
3791 if (node->type == COMPLEX_BRACKET)
3792 {
3793 const re_charset_t *cset = node->opr.mbcset;
3794# ifdef _LIBC
3795 const unsigned char *pin
3796 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3797 Idx j;
3798 uint32_t nrules;
3799# endif /* _LIBC */
3800 int match_len = 0;
3801 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3802 ? re_string_wchar_at (input, str_idx) : 0);
3803
3804 /* match with multibyte character? */
3805 for (i = 0; i < cset->nmbchars; ++i)
3806 if (wc == cset->mbchars[i])
3807 {
3808 match_len = char_len;
3809 goto check_node_accept_bytes_match;
3810 }
3811 /* match with character_class? */
3812 for (i = 0; i < cset->nchar_classes; ++i)
3813 {
3814 wctype_t wt = cset->char_classes[i];
3815 if (__iswctype (wc, wt))
3816 {
3817 match_len = char_len;
3818 goto check_node_accept_bytes_match;
3819 }
3820 }
3821
3822# ifdef _LIBC
3823 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3824 if (nrules != 0)
3825 {
3826 unsigned int in_collseq = 0;
3827 const int32_t *table, *indirect;
3828 const unsigned char *weights, *extra;
3829 const char *collseqwc;
3830 int32_t idx;
3831 /* This #include defines a local function! */
3832# include <locale/weight.h>
3833
3834 /* match with collating_symbol? */
3835 if (cset->ncoll_syms)
3836 extra = (const unsigned char *)
3837 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3838 for (i = 0; i < cset->ncoll_syms; ++i)
3839 {
3840 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3841 /* Compare the length of input collating element and
3842 the length of current collating element. */
3843 if (*coll_sym != elem_len)
3844 continue;
3845 /* Compare each bytes. */
3846 for (j = 0; j < *coll_sym; j++)
3847 if (pin[j] != coll_sym[1 + j])
3848 break;
3849 if (j == *coll_sym)
3850 {
3851 /* Match if every bytes is equal. */
3852 match_len = j;
3853 goto check_node_accept_bytes_match;
3854 }
3855 }
3856
3857 if (cset->nranges)
3858 {
3859 if (elem_len <= char_len)
3860 {
3861 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3862 in_collseq = __collseq_table_lookup (collseqwc, wc);
3863 }
3864 else
3865 in_collseq = find_collation_sequence_value (pin, elem_len);
3866 }
3867 /* match with range expression? */
3868 for (i = 0; i < cset->nranges; ++i)
3869 if (cset->range_starts[i] <= in_collseq
3870 && in_collseq <= cset->range_ends[i])
3871 {
3872 match_len = elem_len;
3873 goto check_node_accept_bytes_match;
3874 }
3875
3876 /* match with equivalence_class? */
3877 if (cset->nequiv_classes)
3878 {
3879 const unsigned char *cp = pin;
3880 table = (const int32_t *)
3881 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3882 weights = (const unsigned char *)
3883 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3884 extra = (const unsigned char *)
3885 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3886 indirect = (const int32_t *)
3887 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3888 idx = findidx (&cp);
3889 if (idx > 0)
3890 for (i = 0; i < cset->nequiv_classes; ++i)
3891 {
3892 int32_t equiv_class_idx = cset->equiv_classes[i];
3893 size_t weight_len = weights[idx];
3894 if (weight_len == weights[equiv_class_idx])
3895 {
3896 Idx cnt = 0;
3897 while (cnt <= weight_len
3898 && (weights[equiv_class_idx + 1 + cnt]
3899 == weights[idx + 1 + cnt]))
3900 ++cnt;
3901 if (cnt > weight_len)
3902 {
3903 match_len = elem_len;
3904 goto check_node_accept_bytes_match;
3905 }
3906 }
3907 }
3908 }
3909 }
3910 else
3911# endif /* _LIBC */
3912 {
3913 /* match with range expression? */
3914#if __GNUC__ >= 2
3915 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3916#else
3917 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3918 cmp_buf[2] = wc;
3919#endif
3920 for (i = 0; i < cset->nranges; ++i)
3921 {
3922 cmp_buf[0] = cset->range_starts[i];
3923 cmp_buf[4] = cset->range_ends[i];
3924 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3925 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3926 {
3927 match_len = char_len;
3928 goto check_node_accept_bytes_match;
3929 }
3930 }
3931 }
3932 check_node_accept_bytes_match:
3933 if (!cset->non_match)
3934 return match_len;
3935 else
3936 {
3937 if (match_len > 0)
3938 return 0;
3939 else
3940 return (elem_len > char_len) ? elem_len : char_len;
3941 }
3942 }
3943 return 0;
3944}
3945
3946# ifdef _LIBC
3947static unsigned int
3948find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3949{
3950 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3951 if (nrules == 0)
3952 {
3953 if (mbs_len == 1)
3954 {
3955 /* No valid character. Match it as a single byte character. */
3956 const unsigned char *collseq = (const unsigned char *)
3957 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3958 return collseq[mbs[0]];
3959 }
3960 return UINT_MAX;
3961 }
3962 else
3963 {
3964 int32_t idx;
3965 const unsigned char *extra = (const unsigned char *)
3966 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3967 int32_t extrasize = (const unsigned char *)
3968 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3969
3970 for (idx = 0; idx < extrasize;)
3971 {
3972 int mbs_cnt;
3973 bool found = false;
3974 int32_t elem_mbs_len;
3975 /* Skip the name of collating element name. */
3976 idx = idx + extra[idx] + 1;
3977 elem_mbs_len = extra[idx++];
3978 if (mbs_len == elem_mbs_len)
3979 {
3980 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3981 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3982 break;
3983 if (mbs_cnt == elem_mbs_len)
3984 /* Found the entry. */
3985 found = true;
3986 }
3987 /* Skip the byte sequence of the collating element. */
3988 idx += elem_mbs_len;
3989 /* Adjust for the alignment. */
3990 idx = (idx + 3) & ~3;
3991 /* Skip the collation sequence value. */
3992 idx += sizeof (uint32_t);
3993 /* Skip the wide char sequence of the collating element. */
3994 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3995 /* If we found the entry, return the sequence value. */
3996 if (found)
3997 return *(uint32_t *) (extra + idx);
3998 /* Skip the collation sequence value. */
3999 idx += sizeof (uint32_t);
4000 }
4001 return UINT_MAX;
4002 }
4003}
4004# endif /* _LIBC */
4005#endif /* RE_ENABLE_I18N */
4006
4007/* Check whether the node accepts the byte which is IDX-th
4008 byte of the INPUT. */
4009
4010static bool
4011internal_function
4012check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4013 Idx idx)
4014{
4015 unsigned char ch;
4016 ch = re_string_byte_at (&mctx->input, idx);
4017 switch (node->type)
4018 {
4019 case CHARACTER:
4020 if (node->opr.c != ch)
4021 return false;
4022 break;
4023
4024 case SIMPLE_BRACKET:
4025 if (!bitset_contain (node->opr.sbcset, ch))
4026 return false;
4027 break;
4028
4029#ifdef RE_ENABLE_I18N
4030 case OP_UTF8_PERIOD:
4031 if (ch >= 0x80)
4032 return false;
4033 /* FALLTHROUGH */
4034#endif
4035 case OP_PERIOD:
4036 if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
4037 || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
4038 return false;
4039 break;
4040
4041 default:
4042 return false;
4043 }
4044
4045 if (node->constraint)
4046 {
4047 /* The node has constraints. Check whether the current context
4048 satisfies the constraints. */
4049 unsigned int context = re_string_context_at (&mctx->input, idx,
4050 mctx->eflags);
4051 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4052 return false;
4053 }
4054
4055 return true;
4056}
4057
4058/* Extend the buffers, if the buffers have run out. */
4059
4060static reg_errcode_t
4061internal_function
4062extend_buffers (re_match_context_t *mctx)
4063{
4064 reg_errcode_t ret;
4065 re_string_t *pstr = &mctx->input;
4066
4067 /* Double the lengthes of the buffers. */
4068 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4069 if (BE (ret != REG_NOERROR, 0))
4070 return ret;
4071
4072 if (mctx->state_log != NULL)
4073 {
4074 /* And double the length of state_log. */
4075 /* XXX We have no indication of the size of this buffer. If this
4076 allocation fail we have no indication that the state_log array
4077 does not have the right size. */
4078 re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *,
4079 pstr->bufs_len + 1);
4080 if (BE (new_array == NULL, 0))
4081 return REG_ESPACE;
4082 mctx->state_log = new_array;
4083 }
4084
4085 /* Then reconstruct the buffers. */
4086 if (pstr->icase)
4087 {
4088#ifdef RE_ENABLE_I18N
4089 if (pstr->mb_cur_max > 1)
4090 {
4091 ret = build_wcs_upper_buffer (pstr);
4092 if (BE (ret != REG_NOERROR, 0))
4093 return ret;
4094 }
4095 else
4096#endif /* RE_ENABLE_I18N */
4097 build_upper_buffer (pstr);
4098 }
4099 else
4100 {
4101#ifdef RE_ENABLE_I18N
4102 if (pstr->mb_cur_max > 1)
4103 build_wcs_buffer (pstr);
4104 else
4105#endif /* RE_ENABLE_I18N */
4106 {
4107 if (pstr->trans != NULL)
4108 re_string_translate_buffer (pstr);
4109 }
4110 }
4111 return REG_NOERROR;
4112}
4113
4114
4115/* Functions for matching context. */
4116
4117/* Initialize MCTX. */
4118
4119static reg_errcode_t
4120internal_function
4121match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4122{
4123 mctx->eflags = eflags;
4124 mctx->match_last = REG_MISSING;
4125 if (n > 0)
4126 {
4127 mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n);
4128 mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n);
4129 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4130 return REG_ESPACE;
4131 }
4132 /* Already zero-ed by the caller.
4133 else
4134 mctx->bkref_ents = NULL;
4135 mctx->nbkref_ents = 0;
4136 mctx->nsub_tops = 0; */
4137 mctx->abkref_ents = n;
4138 mctx->max_mb_elem_len = 1;
4139 mctx->asub_tops = n;
4140 return REG_NOERROR;
4141}
4142
4143/* Clean the entries which depend on the current input in MCTX.
4144 This function must be invoked when the matcher changes the start index
4145 of the input, or changes the input string. */
4146
4147static void
4148internal_function
4149match_ctx_clean (re_match_context_t *mctx)
4150{
4151 Idx st_idx;
4152 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4153 {
4154 Idx sl_idx;
4155 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4156 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4157 {
4158 re_sub_match_last_t *last = top->lasts[sl_idx];
4159 re_free (last->path.array);
4160 re_free (last);
4161 }
4162 re_free (top->lasts);
4163 if (top->path)
4164 {
4165 re_free (top->path->array);
4166 re_free (top->path);
4167 }
4168 free (top);
4169 }
4170
4171 mctx->nsub_tops = 0;
4172 mctx->nbkref_ents = 0;
4173}
4174
4175/* Free all the memory associated with MCTX. */
4176
4177static void
4178internal_function
4179match_ctx_free (re_match_context_t *mctx)
4180{
4181 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4182 match_ctx_clean (mctx);
4183 re_free (mctx->sub_tops);
4184 re_free (mctx->bkref_ents);
4185}
4186
4187/* Add a new backreference entry to MCTX.
4188 Note that we assume that caller never call this function with duplicate
4189 entry, and call with STR_IDX which isn't smaller than any existing entry.
4190*/
4191
4192static reg_errcode_t
4193internal_function
4194match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx,
4195 Idx from, Idx to)
4196{
4197 if (mctx->nbkref_ents >= mctx->abkref_ents)
4198 {
4199 struct re_backref_cache_entry* new_entry;
4200 new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4201 &mctx->abkref_ents);
4202 if (BE (new_entry == NULL, 0))
4203 {
4204 re_free (mctx->bkref_ents);
4205 return REG_ESPACE;
4206 }
4207 mctx->bkref_ents = new_entry;
4208 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4209 (sizeof (struct re_backref_cache_entry)
4210 * (mctx->abkref_ents - mctx->nbkref_ents)));
4211 }
4212 if (mctx->nbkref_ents > 0
4213 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4214 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4215
4216 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4217 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4218 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4219 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4220
4221 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4222 If bit N is clear, means that this entry won't epsilon-transition to
4223 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4224 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4225 such node.
4226
4227 A backreference does not epsilon-transition unless it is empty, so set
4228 to all zeros if FROM != TO. */
4229 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4230 = (from == to ? -1 : 0);
4231
4232 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4233 if (mctx->max_mb_elem_len < to - from)
4234 mctx->max_mb_elem_len = to - from;
4235 return REG_NOERROR;
4236}
4237
4238/* Return the first entry with the same str_idx, or REG_MISSING if none is
4239 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4240
4241static Idx
4242internal_function
4243search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4244{
4245 Idx left, right, mid, last;
4246 last = right = mctx->nbkref_ents;
4247 for (left = 0; left < right;)
4248 {
4249 mid = (left + right) / 2;
4250 if (mctx->bkref_ents[mid].str_idx < str_idx)
4251 left = mid + 1;
4252 else
4253 right = mid;
4254 }
4255 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4256 return left;
4257 else
4258 return REG_MISSING;
4259}
4260
4261/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4262 at STR_IDX. */
4263
4264static reg_errcode_t
4265internal_function
4266match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4267{
4268#ifdef DEBUG
4269 assert (mctx->sub_tops != NULL);
4270 assert (mctx->asub_tops > 0);
4271#endif
4272 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4273 {
4274 Idx new_asub_tops = mctx->asub_tops;
4275 re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops,
4276 re_sub_match_top_t *,
4277 &new_asub_tops);
4278 if (BE (new_array == NULL, 0))
4279 return REG_ESPACE;
4280 mctx->sub_tops = new_array;
4281 mctx->asub_tops = new_asub_tops;
4282 }
4283 mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
4284 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4285 return REG_ESPACE;
4286 mctx->sub_tops[mctx->nsub_tops]->node = node;
4287 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4288 return REG_NOERROR;
4289}
4290
4291/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4292 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4293
4294static re_sub_match_last_t *
4295internal_function
4296match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4297{
4298 re_sub_match_last_t *new_entry;
4299 if (BE (subtop->nlasts == subtop->alasts, 0))
4300 {
4301 Idx new_alasts = subtop->alasts;
4302 re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts,
4303 re_sub_match_last_t *,
4304 &new_alasts);
4305 if (BE (new_array == NULL, 0))
4306 return NULL;
4307 subtop->lasts = new_array;
4308 subtop->alasts = new_alasts;
4309 }
4310 new_entry = re_calloc (re_sub_match_last_t, 1);
4311 if (BE (new_entry != NULL, 1))
4312 {
4313 subtop->lasts[subtop->nlasts] = new_entry;
4314 new_entry->node = node;
4315 new_entry->str_idx = str_idx;
4316 ++subtop->nlasts;
4317 }
4318 return new_entry;
4319}
4320
4321static void
4322internal_function
4323sift_ctx_init (re_sift_context_t *sctx,
4324 re_dfastate_t **sifted_sts,
4325 re_dfastate_t **limited_sts,
4326 Idx last_node, Idx last_str_idx)
4327{
4328 sctx->sifted_states = sifted_sts;
4329 sctx->limited_states = limited_sts;
4330 sctx->last_node = last_node;
4331 sctx->last_str_idx = last_str_idx;
4332 re_node_set_init_empty (&sctx->limits);
4333}
diff --git a/lib/safe-read.c b/lib/safe-read.c
deleted file mode 100644
index 9caf8466..00000000
--- a/lib/safe-read.c
+++ /dev/null
@@ -1,80 +0,0 @@
1/* An interface to read and write that retries after interrupts.
2
3 Copyright (C) 1993, 1994, 1998, 2002, 2003, 2004, 2005 Free Software
4 Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24/* Specification. */
25#ifdef SAFE_WRITE
26# include "safe-write.h"
27#else
28# include "safe-read.h"
29#endif
30
31/* Get ssize_t. */
32#include <sys/types.h>
33#include <unistd.h>
34
35#include <errno.h>
36
37#ifdef EINTR
38# define IS_EINTR(x) ((x) == EINTR)
39#else
40# define IS_EINTR(x) 0
41#endif
42
43#include <limits.h>
44
45#ifdef SAFE_WRITE
46# define safe_rw safe_write
47# define rw write
48#else
49# define safe_rw safe_read
50# define rw read
51# undef const
52# define const /* empty */
53#endif
54
55/* Read(write) up to COUNT bytes at BUF from(to) descriptor FD, retrying if
56 interrupted. Return the actual number of bytes read(written), zero for EOF,
57 or SAFE_READ_ERROR(SAFE_WRITE_ERROR) upon error. */
58size_t
59safe_rw (int fd, void const *buf, size_t count)
60{
61 /* Work around a bug in Tru64 5.1. Attempting to read more than
62 INT_MAX bytes fails with errno == EINVAL. See
63 <http://lists.gnu.org/archive/html/bug-gnu-utils/2002-04/msg00010.html>.
64 When decreasing COUNT, keep it block-aligned. */
65 enum { BUGGY_READ_MAXIMUM = INT_MAX & ~8191 };
66
67 for (;;)
68 {
69 ssize_t result = rw (fd, buf, count);
70
71 if (0 <= result)
72 return result;
73 else if (IS_EINTR (errno))
74 continue;
75 else if (errno == EINVAL && BUGGY_READ_MAXIMUM < count)
76 count = BUGGY_READ_MAXIMUM;
77 else
78 return result;
79 }
80}
diff --git a/lib/safe-read.h b/lib/safe-read.h
deleted file mode 100644
index 9f8a59bd..00000000
--- a/lib/safe-read.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/* An interface to read() that retries after interrupts.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#include <stddef.h>
19
20#define SAFE_READ_ERROR ((size_t) -1)
21
22/* Read up to COUNT bytes at BUF from descriptor FD, retrying if interrupted.
23 Return the actual number of bytes read, zero for EOF, or SAFE_READ_ERROR
24 upon error. */
25extern size_t safe_read (int fd, void *buf, size_t count);
diff --git a/lib/safe-write.c b/lib/safe-write.c
deleted file mode 100644
index 4c375a6c..00000000
--- a/lib/safe-write.c
+++ /dev/null
@@ -1,19 +0,0 @@
1/* An interface to write that retries after interrupts.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#define SAFE_WRITE
19#include "safe-read.c"
diff --git a/lib/safe-write.h b/lib/safe-write.h
deleted file mode 100644
index c1946362..00000000
--- a/lib/safe-write.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/* An interface to write() that retries after interrupts.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#include <stddef.h>
19
20#define SAFE_WRITE_ERROR ((size_t) -1)
21
22/* Write up to COUNT bytes at BUF to descriptor FD, retrying if interrupted.
23 Return the actual number of bytes written, zero for EOF, or SAFE_WRITE_ERROR
24 upon error. */
25extern size_t safe_write (int fd, const void *buf, size_t count);
diff --git a/lib/snprintf.c b/lib/snprintf.c
deleted file mode 100644
index 633517de..00000000
--- a/lib/snprintf.c
+++ /dev/null
@@ -1,1023 +0,0 @@
1/*
2 * NOTE: If you change this file, please merge it into rsync, samba, etc.
3 */
4
5/*
6 * Copyright Patrick Powell 1995
7 * This code is based on code written by Patrick Powell (papowell@astart.com)
8 * It may be used for any purpose as long as this notice remains intact
9 * on all source code distributions
10 */
11
12/**************************************************************
13 * Original:
14 * Patrick Powell Tue Apr 11 09:48:21 PDT 1995
15 * A bombproof version of doprnt (dopr) included.
16 * Sigh. This sort of thing is always nasty do deal with. Note that
17 * the version here does not include floating point...
18 *
19 * snprintf() is used instead of sprintf() as it does limit checks
20 * for string length. This covers a nasty loophole.
21 *
22 * The other functions are there to prevent NULL pointers from
23 * causing nast effects.
24 *
25 * More Recently:
26 * Brandon Long <blong@fiction.net> 9/15/96 for mutt 0.43
27 * This was ugly. It is still ugly. I opted out of floating point
28 * numbers, but the formatter understands just about everything
29 * from the normal C string format, at least as far as I can tell from
30 * the Solaris 2.5 printf(3S) man page.
31 *
32 * Brandon Long <blong@fiction.net> 10/22/97 for mutt 0.87.1
33 * Ok, added some minimal floating point support, which means this
34 * probably requires libm on most operating systems. Don't yet
35 * support the exponent (e,E) and sigfig (g,G). Also, fmtint()
36 * was pretty badly broken, it just wasn't being exercised in ways
37 * which showed it, so that's been fixed. Also, formated the code
38 * to mutt conventions, and removed dead code left over from the
39 * original. Also, there is now a builtin-test, just compile with:
40 * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm
41 * and run snprintf for results.
42 *
43 * Thomas Roessler <roessler@guug.de> 01/27/98 for mutt 0.89i
44 * The PGP code was using unsigned hexadecimal formats.
45 * Unfortunately, unsigned formats simply didn't work.
46 *
47 * Michael Elkins <me@cs.hmc.edu> 03/05/98 for mutt 0.90.8
48 * The original code assumed that both snprintf() and vsnprintf() were
49 * missing. Some systems only have snprintf() but not vsnprintf(), so
50 * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
51 *
52 * Andrew Tridgell (tridge@samba.org) Oct 1998
53 * fixed handling of %.0f
54 * added test for HAVE_LONG_DOUBLE
55 *
56 * tridge@samba.org, idra@samba.org, April 2001
57 * got rid of fcvt code (twas buggy and made testing harder)
58 * added C99 semantics
59 *
60 * date: 2002/12/19 19:56:31; author: herb; state: Exp; lines: +2 -0
61 * actually print args for %g and %e
62 *
63 * date: 2002/06/03 13:37:52; author: jmcd; state: Exp; lines: +8 -0
64 * Since includes.h isn't included here, VA_COPY has to be defined here. I don't
65 * see any include file that is guaranteed to be here, so I'm defining it
66 * locally. Fixes AIX and Solaris builds.
67 *
68 * date: 2002/06/03 03:07:24; author: tridge; state: Exp; lines: +5 -13
69 * put the ifdef for HAVE_VA_COPY in one place rather than in lots of
70 * functions
71 *
72 * date: 2002/05/17 14:51:22; author: jmcd; state: Exp; lines: +21 -4
73 * Fix usage of va_list passed as an arg. Use __va_copy before using it
74 * when it exists.
75 *
76 * date: 2002/04/16 22:38:04; author: idra; state: Exp; lines: +20 -14
77 * Fix incorrect zpadlen handling in fmtfp.
78 * Thanks to Ollie Oldham <ollie.oldham@metro-optix.com> for spotting it.
79 * few mods to make it easier to compile the tests.
80 * addedd the "Ollie" test to the floating point ones.
81 *
82 * Martin Pool (mbp@samba.org) April 2003
83 * Remove NO_CONFIG_H so that the test case can be built within a source
84 * tree with less trouble.
85 * Remove unnecessary SAFE_FREE() definition.
86 *
87 * Martin Pool (mbp@samba.org) May 2003
88 * Put in a prototype for dummy_snprintf() to quiet compiler warnings.
89 *
90 * Move #endif to make sure VA_COPY, LDOUBLE, etc are defined even
91 * if the C library has some snprintf functions already.
92 **************************************************************/
93
94#ifndef NO_CONFIG_H
95#include "config.h"
96#else
97#define NULL 0
98#endif
99
100#ifdef TEST_SNPRINTF /* need math library headers for testing */
101
102/* In test mode, we pretend that this system doesn't have any snprintf
103 * functions, regardless of what config.h says. */
104# undef HAVE_SNPRINTF
105# undef HAVE_VSNPRINTF
106# undef HAVE_C99_VSNPRINTF
107# undef HAVE_ASPRINTF
108# undef HAVE_VASPRINTF
109# include <math.h>
110#endif /* TEST_SNPRINTF */
111
112#ifdef HAVE_STRING_H
113#include <string.h>
114#endif
115
116#ifdef HAVE_STRINGS_H
117#include <strings.h>
118#endif
119#ifdef HAVE_CTYPE_H
120#include <ctype.h>
121#endif
122#include <sys/types.h>
123#include <stdarg.h>
124#ifdef HAVE_STDLIB_H
125#include <stdlib.h>
126#endif
127
128#if defined(HAVE_SNPRINTF) && defined(HAVE_VSNPRINTF) && defined(HAVE_C99_VSNPRINTF)
129/* only include stdio.h if we are not re-defining snprintf or vsnprintf */
130#include <stdio.h>
131 /* make the compiler happy with an empty file */
132 void dummy_snprintf(void);
133 void dummy_snprintf(void) {}
134#endif /* HAVE_SNPRINTF, etc */
135
136#ifdef HAVE_LONG_DOUBLE
137#define LDOUBLE long double
138#else
139#define LDOUBLE double
140#endif
141
142#ifdef HAVE_LONG_LONG
143#define LLONG long long
144#else
145#define LLONG long
146#endif
147
148#ifndef VA_COPY
149#ifdef HAVE_VA_COPY
150#define VA_COPY(dest, src) va_copy(dest, src)
151#else
152#ifdef HAVE___VA_COPY
153#define VA_COPY(dest, src) __va_copy(dest, src)
154#else
155#define VA_COPY(dest, src) (dest) = (src)
156#endif
157#endif
158
159/*
160 * dopr(): poor man's version of doprintf
161 */
162
163/* format read states */
164#define DP_S_DEFAULT 0
165#define DP_S_FLAGS 1
166#define DP_S_MIN 2
167#define DP_S_DOT 3
168#define DP_S_MAX 4
169#define DP_S_MOD 5
170#define DP_S_CONV 6
171#define DP_S_DONE 7
172
173/* format flags - Bits */
174#define DP_F_MINUS (1 << 0)
175#define DP_F_PLUS (1 << 1)
176#define DP_F_SPACE (1 << 2)
177#define DP_F_NUM (1 << 3)
178#define DP_F_ZERO (1 << 4)
179#define DP_F_UP (1 << 5)
180#define DP_F_UNSIGNED (1 << 6)
181
182/* Conversion Flags */
183#define DP_C_SHORT 1
184#define DP_C_LONG 2
185#define DP_C_LDOUBLE 3
186#define DP_C_LLONG 4
187
188#define char_to_int(p) ((p)- '0')
189#ifndef MAX
190#define MAX(p,q) (((p) >= (q)) ? (p) : (q))
191#endif
192
193/* yes this really must be a ||. Don't muck with this (tridge) */
194#if !defined(HAVE_VSNPRINTF) || !defined(HAVE_C99_VSNPRINTF)
195
196static size_t dopr(char *buffer, size_t maxlen, const char *format,
197 va_list args_in);
198static void fmtstr(char *buffer, size_t *currlen, size_t maxlen,
199 char *value, int flags, int min, int max);
200static void fmtint(char *buffer, size_t *currlen, size_t maxlen,
201 long value, int base, int min, int max, int flags);
202static void fmtfp(char *buffer, size_t *currlen, size_t maxlen,
203 LDOUBLE fvalue, int min, int max, int flags);
204static void dopr_outch(char *buffer, size_t *currlen, size_t maxlen, char c);
205
206static size_t dopr(char *buffer, size_t maxlen, const char *format, va_list args_in)
207{
208 char ch;
209 LLONG value;
210 LDOUBLE fvalue;
211 char *strvalue;
212 int min;
213 int max;
214 int state;
215 int flags;
216 int cflags;
217 size_t currlen;
218 va_list args;
219
220 VA_COPY(args, args_in);
221
222 state = DP_S_DEFAULT;
223 currlen = flags = cflags = min = 0;
224 max = -1;
225 ch = *format++;
226
227 while (state != DP_S_DONE) {
228 if (ch == '\0')
229 state = DP_S_DONE;
230
231 switch(state) {
232 case DP_S_DEFAULT:
233 if (ch == '%')
234 state = DP_S_FLAGS;
235 else
236 dopr_outch (buffer, &currlen, maxlen, ch);
237 ch = *format++;
238 break;
239 case DP_S_FLAGS:
240 switch (ch) {
241 case '-':
242 flags |= DP_F_MINUS;
243 ch = *format++;
244 break;
245 case '+':
246 flags |= DP_F_PLUS;
247 ch = *format++;
248 break;
249 case ' ':
250 flags |= DP_F_SPACE;
251 ch = *format++;
252 break;
253 case '#':
254 flags |= DP_F_NUM;
255 ch = *format++;
256 break;
257 case '0':
258 flags |= DP_F_ZERO;
259 ch = *format++;
260 break;
261 default:
262 state = DP_S_MIN;
263 break;
264 }
265 break;
266 case DP_S_MIN:
267 if (isdigit((unsigned char)ch)) {
268 min = 10*min + char_to_int (ch);
269 ch = *format++;
270 } else if (ch == '*') {
271 min = va_arg (args, int);
272 ch = *format++;
273 state = DP_S_DOT;
274 } else {
275 state = DP_S_DOT;
276 }
277 break;
278 case DP_S_DOT:
279 if (ch == '.') {
280 state = DP_S_MAX;
281 ch = *format++;
282 } else {
283 state = DP_S_MOD;
284 }
285 break;
286 case DP_S_MAX:
287 if (isdigit((unsigned char)ch)) {
288 if (max < 0)
289 max = 0;
290 max = 10*max + char_to_int (ch);
291 ch = *format++;
292 } else if (ch == '*') {
293 max = va_arg (args, int);
294 ch = *format++;
295 state = DP_S_MOD;
296 } else {
297 state = DP_S_MOD;
298 }
299 break;
300 case DP_S_MOD:
301 switch (ch) {
302 case 'h':
303 cflags = DP_C_SHORT;
304 ch = *format++;
305 break;
306 case 'l':
307 cflags = DP_C_LONG;
308 ch = *format++;
309 if (ch == 'l') { /* It's a long long */
310 cflags = DP_C_LLONG;
311 ch = *format++;
312 }
313 break;
314 case 'L':
315 cflags = DP_C_LDOUBLE;
316 ch = *format++;
317 break;
318 default:
319 break;
320 }
321 state = DP_S_CONV;
322 break;
323 case DP_S_CONV:
324 switch (ch) {
325 case 'd':
326 case 'i':
327 if (cflags == DP_C_SHORT)
328 value = va_arg (args, int);
329 else if (cflags == DP_C_LONG)
330 value = va_arg (args, long int);
331 else if (cflags == DP_C_LLONG)
332 value = va_arg (args, LLONG);
333 else
334 value = va_arg (args, int);
335 fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
336 break;
337 case 'o':
338 flags |= DP_F_UNSIGNED;
339 if (cflags == DP_C_SHORT)
340 value = va_arg (args, unsigned int);
341 else if (cflags == DP_C_LONG)
342 value = (long)va_arg (args, unsigned long int);
343 else if (cflags == DP_C_LLONG)
344 value = (long)va_arg (args, unsigned LLONG);
345 else
346 value = (long)va_arg (args, unsigned int);
347 fmtint (buffer, &currlen, maxlen, value, 8, min, max, flags);
348 break;
349 case 'u':
350 flags |= DP_F_UNSIGNED;
351 if (cflags == DP_C_SHORT)
352 value = va_arg (args, unsigned int);
353 else if (cflags == DP_C_LONG)
354 value = (long)va_arg (args, unsigned long int);
355 else if (cflags == DP_C_LLONG)
356 value = (LLONG)va_arg (args, unsigned LLONG);
357 else
358 value = (long)va_arg (args, unsigned int);
359 fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
360 break;
361 case 'X':
362 flags |= DP_F_UP;
363 case 'x':
364 flags |= DP_F_UNSIGNED;
365 if (cflags == DP_C_SHORT)
366 value = va_arg (args, unsigned int);
367 else if (cflags == DP_C_LONG)
368 value = (long)va_arg (args, unsigned long int);
369 else if (cflags == DP_C_LLONG)
370 value = (LLONG)va_arg (args, unsigned LLONG);
371 else
372 value = (long)va_arg (args, unsigned int);
373 fmtint (buffer, &currlen, maxlen, value, 16, min, max, flags);
374 break;
375 case 'f':
376 if (cflags == DP_C_LDOUBLE)
377 fvalue = va_arg (args, LDOUBLE);
378 else
379 fvalue = va_arg (args, double);
380 /* um, floating point? */
381 fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
382 break;
383 case 'E':
384 flags |= DP_F_UP;
385 case 'e':
386 if (cflags == DP_C_LDOUBLE)
387 fvalue = va_arg (args, LDOUBLE);
388 else
389 fvalue = va_arg (args, double);
390 fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
391 break;
392 case 'G':
393 flags |= DP_F_UP;
394 case 'g':
395 if (cflags == DP_C_LDOUBLE)
396 fvalue = va_arg (args, LDOUBLE);
397 else
398 fvalue = va_arg (args, double);
399 fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
400 break;
401 case 'c':
402 dopr_outch (buffer, &currlen, maxlen, va_arg (args, int));
403 break;
404 case 's':
405 strvalue = va_arg (args, char *);
406 if (!strvalue) strvalue = "(NULL)";
407 if (max == -1) {
408 max = strlen(strvalue);
409 }
410 if (min > 0 && max >= 0 && min > max) max = min;
411 fmtstr (buffer, &currlen, maxlen, strvalue, flags, min, max);
412 break;
413 case 'p':
414 strvalue = va_arg (args, void *);
415 fmtint (buffer, &currlen, maxlen, (long) strvalue, 16, min, max, flags);
416 break;
417 case 'n':
418 if (cflags == DP_C_SHORT) {
419 short int *num;
420 num = va_arg (args, short int *);
421 *num = currlen;
422 } else if (cflags == DP_C_LONG) {
423 long int *num;
424 num = va_arg (args, long int *);
425 *num = (long int)currlen;
426 } else if (cflags == DP_C_LLONG) {
427 LLONG *num;
428 num = va_arg (args, LLONG *);
429 *num = (LLONG)currlen;
430 } else {
431 int *num;
432 num = va_arg (args, int *);
433 *num = currlen;
434 }
435 break;
436 case '%':
437 dopr_outch (buffer, &currlen, maxlen, ch);
438 break;
439 case 'w':
440 /* not supported yet, treat as next char */
441 ch = *format++;
442 break;
443 default:
444 /* Unknown, skip */
445 break;
446 }
447 ch = *format++;
448 state = DP_S_DEFAULT;
449 flags = cflags = min = 0;
450 max = -1;
451 break;
452 case DP_S_DONE:
453 break;
454 default:
455 /* hmm? */
456 break; /* some picky compilers need this */
457 }
458 }
459 if (maxlen != 0) {
460 if (currlen < maxlen - 1)
461 buffer[currlen] = '\0';
462 else if (maxlen > 0)
463 buffer[maxlen - 1] = '\0';
464 }
465
466 return currlen;
467}
468
469static void fmtstr(char *buffer, size_t *currlen, size_t maxlen,
470 char *value, int flags, int min, int max)
471{
472 int padlen, strln; /* amount to pad */
473 int cnt = 0;
474
475#ifdef DEBUG_SNPRINTF
476 printf("fmtstr min=%d max=%d s=[%s]\n", min, max, value);
477#endif
478 if (value == 0) {
479 value = "<NULL>";
480 }
481
482 for (strln = 0; value[strln]; ++strln); /* strlen */
483 padlen = min - strln;
484 if (padlen < 0)
485 padlen = 0;
486 if (flags & DP_F_MINUS)
487 padlen = -padlen; /* Left Justify */
488
489 while ((padlen > 0) && (cnt < max)) {
490 dopr_outch (buffer, currlen, maxlen, ' ');
491 --padlen;
492 ++cnt;
493 }
494 while (*value && (cnt < max)) {
495 dopr_outch (buffer, currlen, maxlen, *value++);
496 ++cnt;
497 }
498 while ((padlen < 0) && (cnt < max)) {
499 dopr_outch (buffer, currlen, maxlen, ' ');
500 ++padlen;
501 ++cnt;
502 }
503}
504
505/* Have to handle DP_F_NUM (ie 0x and 0 alternates) */
506
507static void fmtint(char *buffer, size_t *currlen, size_t maxlen,
508 long value, int base, int min, int max, int flags)
509{
510 int signvalue = 0;
511 unsigned long uvalue;
512 char convert[20];
513 int place = 0;
514 int spadlen = 0; /* amount to space pad */
515 int zpadlen = 0; /* amount to zero pad */
516 int caps = 0;
517
518 if (max < 0)
519 max = 0;
520
521 uvalue = value;
522
523 if(!(flags & DP_F_UNSIGNED)) {
524 if( value < 0 ) {
525 signvalue = '-';
526 uvalue = -value;
527 } else {
528 if (flags & DP_F_PLUS) /* Do a sign (+/i) */
529 signvalue = '+';
530 else if (flags & DP_F_SPACE)
531 signvalue = ' ';
532 }
533 }
534
535 if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
536
537 do {
538 convert[place++] =
539 (caps? "0123456789ABCDEF":"0123456789abcdef")
540 [uvalue % (unsigned)base ];
541 uvalue = (uvalue / (unsigned)base );
542 } while(uvalue && (place < 20));
543 if (place == 20) place--;
544 convert[place] = 0;
545
546 zpadlen = max - place;
547 spadlen = min - MAX (max, place) - (signvalue ? 1 : 0);
548 if (zpadlen < 0) zpadlen = 0;
549 if (spadlen < 0) spadlen = 0;
550 if (flags & DP_F_ZERO) {
551 zpadlen = MAX(zpadlen, spadlen);
552 spadlen = 0;
553 }
554 if (flags & DP_F_MINUS)
555 spadlen = -spadlen; /* Left Justifty */
556
557#ifdef DEBUG_SNPRINTF
558 printf("zpad: %d, spad: %d, min: %d, max: %d, place: %d\n",
559 zpadlen, spadlen, min, max, place);
560#endif
561
562 /* Spaces */
563 while (spadlen > 0) {
564 dopr_outch (buffer, currlen, maxlen, ' ');
565 --spadlen;
566 }
567
568 /* Sign */
569 if (signvalue)
570 dopr_outch (buffer, currlen, maxlen, signvalue);
571
572 /* Zeros */
573 if (zpadlen > 0) {
574 while (zpadlen > 0) {
575 dopr_outch (buffer, currlen, maxlen, '0');
576 --zpadlen;
577 }
578 }
579
580 /* Digits */
581 while (place > 0)
582 dopr_outch (buffer, currlen, maxlen, convert[--place]);
583
584 /* Left Justified spaces */
585 while (spadlen < 0) {
586 dopr_outch (buffer, currlen, maxlen, ' ');
587 ++spadlen;
588 }
589}
590
591static LDOUBLE abs_val(LDOUBLE value)
592{
593 LDOUBLE result = value;
594
595 if (value < 0)
596 result = -value;
597
598 return result;
599}
600
601static LDOUBLE POW10(int exp)
602{
603 LDOUBLE result = 1;
604
605 while (exp) {
606 result *= 10;
607 exp--;
608 }
609
610 return result;
611}
612
613static LLONG ROUND(LDOUBLE value)
614{
615 LLONG intpart;
616
617 intpart = (LLONG)value;
618 value = value - intpart;
619 if (value >= 0.5) intpart++;
620
621 return intpart;
622}
623
624/* a replacement for modf that doesn't need the math library. Should
625 be portable, but slow */
626static double my_modf(double x0, double *iptr)
627{
628 int i;
629 long l;
630 double x = x0;
631 double f = 1.0;
632
633 for (i=0;i<100;i++) {
634 l = (long)x;
635 if (l <= (x+1) && l >= (x-1)) break;
636 x *= 0.1;
637 f *= 10.0;
638 }
639
640 if (i == 100) {
641 /* yikes! the number is beyond what we can handle. What do we do? */
642 (*iptr) = 0;
643 return 0;
644 }
645
646 if (i != 0) {
647 double i2;
648 double ret;
649
650 ret = my_modf(x0-l*f, &i2);
651 (*iptr) = l*f + i2;
652 return ret;
653 }
654
655 (*iptr) = l;
656 return x - (*iptr);
657}
658
659
660static void fmtfp (char *buffer, size_t *currlen, size_t maxlen,
661 LDOUBLE fvalue, int min, int max, int flags)
662{
663 int signvalue = 0;
664 double ufvalue;
665 char iconvert[311];
666 char fconvert[311];
667 int iplace = 0;
668 int fplace = 0;
669 int padlen = 0; /* amount to pad */
670 int zpadlen = 0;
671 int caps = 0;
672 int idx;
673 double intpart;
674 double fracpart;
675 double temp;
676
677 /*
678 * AIX manpage says the default is 0, but Solaris says the default
679 * is 6, and sprintf on AIX defaults to 6
680 */
681 if (max < 0)
682 max = 6;
683
684 ufvalue = abs_val (fvalue);
685
686 if (fvalue < 0) {
687 signvalue = '-';
688 } else {
689 if (flags & DP_F_PLUS) { /* Do a sign (+/i) */
690 signvalue = '+';
691 } else {
692 if (flags & DP_F_SPACE)
693 signvalue = ' ';
694 }
695 }
696
697#if 0
698 if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
699#endif
700
701#if 0
702 if (max == 0) ufvalue += 0.5; /* if max = 0 we must round */
703#endif
704
705 /*
706 * Sorry, we only support 16 digits past the decimal because of our
707 * conversion method
708 */
709 if (max > 16)
710 max = 16;
711
712 /* We "cheat" by converting the fractional part to integer by
713 * multiplying by a factor of 10
714 */
715
716 temp = ufvalue;
717 my_modf(temp, &intpart);
718
719 fracpart = ROUND((POW10(max)) * (ufvalue - intpart));
720
721 if (fracpart >= POW10(max)) {
722 intpart++;
723 fracpart -= POW10(max);
724 }
725
726
727 /* Convert integer part */
728 do {
729 temp = intpart*0.1;
730 my_modf(temp, &intpart);
731 idx = (int) ((temp -intpart +0.05)* 10.0);
732 /* idx = (int) (((double)(temp*0.1) -intpart +0.05) *10.0); */
733 /* printf ("%llf, %f, %x\n", temp, intpart, idx); */
734 iconvert[iplace++] =
735 (caps? "0123456789ABCDEF":"0123456789abcdef")[idx];
736 } while (intpart && (iplace < 311));
737 if (iplace == 311) iplace--;
738 iconvert[iplace] = 0;
739
740 /* Convert fractional part */
741 if (fracpart)
742 {
743 do {
744 temp = fracpart*0.1;
745 my_modf(temp, &fracpart);
746 idx = (int) ((temp -fracpart +0.05)* 10.0);
747 /* idx = (int) ((((temp/10) -fracpart) +0.05) *10); */
748 /* printf ("%lf, %lf, %ld\n", temp, fracpart, idx ); */
749 fconvert[fplace++] =
750 (caps? "0123456789ABCDEF":"0123456789abcdef")[idx];
751 } while(fracpart && (fplace < 311));
752 if (fplace == 311) fplace--;
753 }
754 fconvert[fplace] = 0;
755
756 /* -1 for decimal point, another -1 if we are printing a sign */
757 padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
758 zpadlen = max - fplace;
759 if (zpadlen < 0) zpadlen = 0;
760 if (padlen < 0)
761 padlen = 0;
762 if (flags & DP_F_MINUS)
763 padlen = -padlen; /* Left Justifty */
764
765 if ((flags & DP_F_ZERO) && (padlen > 0)) {
766 if (signvalue) {
767 dopr_outch (buffer, currlen, maxlen, signvalue);
768 --padlen;
769 signvalue = 0;
770 }
771 while (padlen > 0) {
772 dopr_outch (buffer, currlen, maxlen, '0');
773 --padlen;
774 }
775 }
776 while (padlen > 0) {
777 dopr_outch (buffer, currlen, maxlen, ' ');
778 --padlen;
779 }
780 if (signvalue)
781 dopr_outch (buffer, currlen, maxlen, signvalue);
782
783 while (iplace > 0)
784 dopr_outch (buffer, currlen, maxlen, iconvert[--iplace]);
785
786#ifdef DEBUG_SNPRINTF
787 printf("fmtfp: fplace=%d zpadlen=%d\n", fplace, zpadlen);
788#endif
789
790 /*
791 * Decimal point. This should probably use locale to find the correct
792 * char to print out.
793 */
794 if (max > 0) {
795 dopr_outch (buffer, currlen, maxlen, '.');
796
797 while (zpadlen > 0) {
798 dopr_outch (buffer, currlen, maxlen, '0');
799 --zpadlen;
800 }
801
802 while (fplace > 0)
803 dopr_outch (buffer, currlen, maxlen, fconvert[--fplace]);
804 }
805
806 while (padlen < 0) {
807 dopr_outch (buffer, currlen, maxlen, ' ');
808 ++padlen;
809 }
810}
811
812static void dopr_outch(char *buffer, size_t *currlen, size_t maxlen, char c)
813{
814 if (*currlen < maxlen) {
815 buffer[(*currlen)] = c;
816 }
817 (*currlen)++;
818}
819
820 int smb_vsnprintf (char *str, size_t count, const char *fmt, va_list args)
821{
822 return dopr(str, count, fmt, args);
823}
824#define vsnprintf smb_vsnprintf
825#endif
826
827/* yes this really must be a ||. Don't muck with this (tridge)
828 *
829 * The logic for these two is that we need our own definition if the
830 * OS *either* has no definition of *sprintf, or if it does have one
831 * that doesn't work properly according to the autoconf test.
832 */
833#if !defined(HAVE_SNPRINTF) || !defined(HAVE_C99_VSNPRINTF)
834int smb_snprintf(char *str,size_t count,const char *fmt,...)
835{
836 size_t ret;
837 va_list ap;
838
839 va_start(ap, fmt);
840 ret = vsnprintf(str, count, fmt, ap);
841 va_end(ap);
842 return ret;
843}
844#define snprintf smb_snprintf
845#endif
846
847#endif
848
849#ifndef HAVE_VASPRINTF
850 int vasprintf(char **ptr, const char *format, va_list ap)
851{
852 int ret;
853 va_list ap2;
854
855 VA_COPY(ap2, ap);
856
857 ret = vsnprintf(NULL, 0, format, ap2);
858 if (ret <= 0) return ret;
859
860 (*ptr) = (char *)malloc(ret+1);
861 if (!*ptr) return -1;
862
863 VA_COPY(ap2, ap);
864
865 ret = vsnprintf(*ptr, ret+1, format, ap2);
866
867 return ret;
868}
869#endif
870
871
872#ifndef HAVE_ASPRINTF
873 int asprintf(char **ptr, const char *format, ...)
874{
875 va_list ap;
876 int ret;
877
878 *ptr = NULL;
879 va_start(ap, format);
880 ret = vasprintf(ptr, format, ap);
881 va_end(ap);
882
883 return ret;
884}
885#endif
886
887#ifdef TEST_SNPRINTF
888
889 int sprintf(char *str,const char *fmt,...);
890
891 int main (void)
892{
893 char buf1[1024];
894 char buf2[1024];
895 char *fp_fmt[] = {
896 "%1.1f",
897 "%-1.5f",
898 "%1.5f",
899 "%123.9f",
900 "%10.5f",
901 "% 10.5f",
902 "%+22.9f",
903 "%+4.9f",
904 "%01.3f",
905 "%4f",
906 "%3.1f",
907 "%3.2f",
908 "%.0f",
909 "%f",
910 "-16.16f",
911 NULL
912 };
913 double fp_nums[] = { 6442452944.1234, -1.5, 134.21, 91340.2, 341.1234, 203.9, 0.96, 0.996,
914 0.9996, 1.996, 4.136, 5.030201, 0.00205,
915 /* END LIST */ 0};
916 char *int_fmt[] = {
917 "%-1.5d",
918 "%1.5d",
919 "%123.9d",
920 "%5.5d",
921 "%10.5d",
922 "% 10.5d",
923 "%+22.33d",
924 "%01.3d",
925 "%4d",
926 "%d",
927 NULL
928 };
929 long int_nums[] = { -1, 134, 91340, 341, 0203, 0};
930 char *str_fmt[] = {
931 "10.5s",
932 "5.10s",
933 "10.1s",
934 "0.10s",
935 "10.0s",
936 "1.10s",
937 "%s",
938 "%.1s",
939 "%.10s",
940 "%10s",
941 NULL
942 };
943 char *str_vals[] = {"hello", "a", "", "a longer string", NULL};
944 int x, y;
945 int fail = 0;
946 int num = 0;
947
948 printf ("Testing snprintf format codes against system sprintf...\n");
949
950 for (x = 0; fp_fmt[x] ; x++) {
951 for (y = 0; fp_nums[y] != 0 ; y++) {
952 int l1 = snprintf(NULL, 0, fp_fmt[x], fp_nums[y]);
953 int l2 = snprintf(buf1, sizeof(buf1), fp_fmt[x], fp_nums[y]);
954 sprintf (buf2, fp_fmt[x], fp_nums[y]);
955 if (strcmp (buf1, buf2)) {
956 printf("snprintf doesn't match Format: %s\n\tsnprintf = [%s]\n\t sprintf = [%s]\n",
957 fp_fmt[x], buf1, buf2);
958 fail++;
959 }
960 if (l1 != l2) {
961 printf("snprintf l1 != l2 (%d %d) %s\n", l1, l2, fp_fmt[x]);
962 fail++;
963 }
964 num++;
965 }
966 }
967
968 for (x = 0; int_fmt[x] ; x++) {
969 for (y = 0; int_nums[y] != 0 ; y++) {
970 int l1 = snprintf(NULL, 0, int_fmt[x], int_nums[y]);
971 int l2 = snprintf(buf1, sizeof(buf1), int_fmt[x], int_nums[y]);
972 sprintf (buf2, int_fmt[x], int_nums[y]);
973 if (strcmp (buf1, buf2)) {
974 printf("snprintf doesn't match Format: %s\n\tsnprintf = [%s]\n\t sprintf = [%s]\n",
975 int_fmt[x], buf1, buf2);
976 fail++;
977 }
978 if (l1 != l2) {
979 printf("snprintf l1 != l2 (%d %d) %s\n", l1, l2, int_fmt[x]);
980 fail++;
981 }
982 num++;
983 }
984 }
985
986 for (x = 0; str_fmt[x] ; x++) {
987 for (y = 0; str_vals[y] != 0 ; y++) {
988 int l1 = snprintf(NULL, 0, str_fmt[x], str_vals[y]);
989 int l2 = snprintf(buf1, sizeof(buf1), str_fmt[x], str_vals[y]);
990 sprintf (buf2, str_fmt[x], str_vals[y]);
991 if (strcmp (buf1, buf2)) {
992 printf("snprintf doesn't match Format: %s\n\tsnprintf = [%s]\n\t sprintf = [%s]\n",
993 str_fmt[x], buf1, buf2);
994 fail++;
995 }
996 if (l1 != l2) {
997 printf("snprintf l1 != l2 (%d %d) %s\n", l1, l2, str_fmt[x]);
998 fail++;
999 }
1000 num++;
1001 }
1002 }
1003
1004 printf ("%d tests failed out of %d.\n", fail, num);
1005
1006 printf("seeing how many digits we support\n");
1007 {
1008 double v0 = 0.12345678901234567890123456789012345678901;
1009 for (x=0; x<100; x++) {
1010 double p = pow(10, x);
1011 double r = v0*p;
1012 snprintf(buf1, sizeof(buf1), "%1.1f", r);
1013 sprintf(buf2, "%1.1f", r);
1014 if (strcmp(buf1, buf2)) {
1015 printf("we seem to support %d digits\n", x-1);
1016 break;
1017 }
1018 }
1019 }
1020
1021 return 0;
1022}
1023#endif /* TEST_SNPRINTF */
diff --git a/lib/stdbool_.h b/lib/stdbool_.h
deleted file mode 100644
index 7cd2da84..00000000
--- a/lib/stdbool_.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/* Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
2 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#ifndef _STDBOOL_H
19#define _STDBOOL_H
20
21/* ISO C 99 <stdbool.h> for platforms that lack it. */
22
23/* Usage suggestions:
24
25 Programs that use <stdbool.h> should be aware of some limitations
26 and standards compliance issues.
27
28 Standards compliance:
29
30 - <stdbool.h> must be #included before 'bool', 'false', 'true'
31 can be used.
32
33 - You cannot assume that sizeof (bool) == 1.
34
35 - Programs should not undefine the macros bool, true, and false,
36 as C99 lists that as an "obsolescent feature".
37
38 Limitations of this substitute, when used in a C89 environment:
39
40 - <stdbool.h> must be #included before the '_Bool' type can be used.
41
42 - You cannot assume that _Bool is a typedef; it might be a macro.
43
44 - In C99, casts and automatic conversions to '_Bool' or 'bool' are
45 performed in such a way that every nonzero value gets converted
46 to 'true', and zero gets converted to 'false'. This doesn't work
47 with this substitute. With this substitute, only the values 0 and 1
48 give the expected result when converted to _Bool' or 'bool'.
49
50 Also, it is suggested that programs use 'bool' rather than '_Bool';
51 this isn't required, but 'bool' is more common. */
52
53
54/* 7.16. Boolean type and values */
55
56/* BeOS <sys/socket.h> already #defines false 0, true 1. We use the same
57 definitions below, which is OK. */
58#ifdef __BEOS__
59# include <OS.h> /* defines bool but not _Bool */
60#endif
61
62/* C++ and BeOS have a reliable bool (and _Bool, if it exists).
63 Otherwise, since this file is being compiled, the system
64 <stdbool.h> is not reliable so assume that the system _Bool is not
65 reliable either. Under that assumption, it is tempting to write
66
67 typedef enum { false, true } _Bool;
68
69 so that gdb prints values of type 'bool' symbolically. But if we do
70 this, values of type '_Bool' may promote to 'int' or 'unsigned int'
71 (see ISO C 99 6.7.2.2.(4)); however, '_Bool' must promote to 'int'
72 (see ISO C 99 6.3.1.1.(2)). We could instead try this:
73
74 typedef enum { _Bool_dummy = -1, false, true } _Bool;
75
76 as the negative value ensures that '_Bool' promotes to 'int'.
77 However, this runs into some other problems. First, Sun's C
78 compiler when (__SUNPRO_C < 0x550 || __STDC__ == 1) issues a stupid
79 "warning: _Bool is a keyword in ISO C99". Second, IBM's AIX cc
80 compiler 6.0.0.0 (and presumably other versions) mishandles
81 subscripts involving _Bool (effectively, _Bool promotes to unsigned
82 int in this case), and we need to redefine _Bool in that case.
83 Third, HP-UX 10.20's C compiler lacks <stdbool.h> but has _Bool and
84 mishandles comparisons of _Bool to int (it promotes _Bool to
85 unsigned int).
86
87 The simplest way to work around these problems is to ignore any
88 existing definition of _Bool and use our own. */
89
90#if defined __cplusplus || defined __BEOS__
91# if !@HAVE__BOOL@
92typedef bool _Bool;
93# endif
94#else
95# define _Bool signed char
96#endif
97
98#define bool _Bool
99
100/* The other macros must be usable in preprocessor directives. */
101#define false 0
102#define true 1
103#define __bool_true_false_are_defined 1
104
105#endif /* _STDBOOL_H */
diff --git a/lib/strcase.h b/lib/strcase.h
deleted file mode 100644
index e4207980..00000000
--- a/lib/strcase.h
+++ /dev/null
@@ -1,48 +0,0 @@
1/* Case-insensitive string comparison functions.
2 Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18#ifndef _STRCASE_H
19#define _STRCASE_H
20
21#include <stddef.h>
22
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28
29/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
30 greater than zero if S1 is lexicographically less than, equal to or greater
31 than S2.
32 Note: This function may, in multibyte locales, return 0 for strings of
33 different lengths! */
34extern int strcasecmp (const char *s1, const char *s2);
35
36/* Compare no more than N characters of strings S1 and S2, ignoring case,
37 returning less than, equal to or greater than zero if S1 is
38 lexicographically less than, equal to or greater than S2.
39 Note: This function can not work correctly in multibyte locales. */
40extern int strncasecmp (const char *s1, const char *s2, size_t n);
41
42
43#ifdef __cplusplus
44}
45#endif
46
47
48#endif /* _STRCASE_H */
diff --git a/lib/strtod.c b/lib/strtod.c
deleted file mode 100644
index 7b48754e..00000000
--- a/lib/strtod.c
+++ /dev/null
@@ -1,189 +0,0 @@
1/* Copyright (C) 1991, 1992, 1997, 1999, 2003 Free Software Foundation, Inc.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2, or (at your option)
6 any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software Foundation,
15 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
16
17#ifdef HAVE_CONFIG_H
18# include <config.h>
19#endif
20
21#include <errno.h>
22#ifndef errno
23extern int errno;
24#endif
25
26#include <ctype.h>
27
28#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
29# define IN_CTYPE_DOMAIN(c) 1
30#else
31# define IN_CTYPE_DOMAIN(c) isascii(c)
32#endif
33
34#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
35#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
36#define TOLOWER(c) (IN_CTYPE_DOMAIN (c) ? tolower(c) : (c))
37
38#include <math.h>
39
40#include <float.h>
41#include <stdlib.h>
42#include <string.h>
43
44/* Convert NPTR to a double. If ENDPTR is not NULL, a pointer to the
45 character after the last one used in the number is put in *ENDPTR. */
46double
47strtod (const char *nptr, char **endptr)
48{
49 register const char *s;
50 short int sign;
51
52 /* The number so far. */
53 double num;
54
55 int got_dot; /* Found a decimal point. */
56 int got_digit; /* Seen any digits. */
57
58 /* The exponent of the number. */
59 long int exponent;
60
61 if (nptr == NULL)
62 {
63 errno = EINVAL;
64 goto noconv;
65 }
66
67 s = nptr;
68
69 /* Eat whitespace. */
70 while (ISSPACE (*s))
71 ++s;
72
73 /* Get the sign. */
74 sign = *s == '-' ? -1 : 1;
75 if (*s == '-' || *s == '+')
76 ++s;
77
78 num = 0.0;
79 got_dot = 0;
80 got_digit = 0;
81 exponent = 0;
82 for (;; ++s)
83 {
84 if (ISDIGIT (*s))
85 {
86 got_digit = 1;
87
88 /* Make sure that multiplication by 10 will not overflow. */
89 if (num > DBL_MAX * 0.1)
90 /* The value of the digit doesn't matter, since we have already
91 gotten as many digits as can be represented in a `double'.
92 This doesn't necessarily mean the result will overflow.
93 The exponent may reduce it to within range.
94
95 We just need to record that there was another
96 digit so that we can multiply by 10 later. */
97 ++exponent;
98 else
99 num = (num * 10.0) + (*s - '0');
100
101 /* Keep track of the number of digits after the decimal point.
102 If we just divided by 10 here, we would lose precision. */
103 if (got_dot)
104 --exponent;
105 }
106 else if (!got_dot && *s == '.')
107 /* Record that we have found the decimal point. */
108 got_dot = 1;
109 else
110 /* Any other character terminates the number. */
111 break;
112 }
113
114 if (!got_digit)
115 goto noconv;
116
117 if (TOLOWER (*s) == 'e')
118 {
119 /* Get the exponent specified after the `e' or `E'. */
120 int save = errno;
121 char *end;
122 long int exp;
123
124 errno = 0;
125 ++s;
126 exp = strtol (s, &end, 10);
127 if (errno == ERANGE)
128 {
129 /* The exponent overflowed a `long int'. It is probably a safe
130 assumption that an exponent that cannot be represented by
131 a `long int' exceeds the limits of a `double'. */
132 if (endptr != NULL)
133 *endptr = end;
134 if (exp < 0)
135 goto underflow;
136 else
137 goto overflow;
138 }
139 else if (end == s)
140 /* There was no exponent. Reset END to point to
141 the 'e' or 'E', so *ENDPTR will be set there. */
142 end = (char *) s - 1;
143 errno = save;
144 s = end;
145 exponent += exp;
146 }
147
148 if (endptr != NULL)
149 *endptr = (char *) s;
150
151 if (num == 0.0)
152 return 0.0;
153
154 /* Multiply NUM by 10 to the EXPONENT power,
155 checking for overflow and underflow. */
156
157 if (exponent < 0)
158 {
159 if (num < DBL_MIN * pow (10.0, (double) -exponent))
160 goto underflow;
161 }
162 else if (exponent > 0)
163 {
164 if (num > DBL_MAX * pow (10.0, (double) -exponent))
165 goto overflow;
166 }
167
168 num *= pow (10.0, (double) exponent);
169
170 return num * sign;
171
172overflow:
173 /* Return an overflow error. */
174 errno = ERANGE;
175 return HUGE_VAL * sign;
176
177underflow:
178 /* Return an underflow error. */
179 if (endptr != NULL)
180 *endptr = (char *) nptr;
181 errno = ERANGE;
182 return 0.0;
183
184noconv:
185 /* There was no number. */
186 if (endptr != NULL)
187 *endptr = (char *) nptr;
188 return 0.0;
189}
diff --git a/lib/tests/Makefile.am b/lib/tests/Makefile.am
index 63dee76e..b03f48a8 100644
--- a/lib/tests/Makefile.am
+++ b/lib/tests/Makefile.am
@@ -5,7 +5,7 @@ noinst_PROGRAMS = @EXTRA_TEST@
5TESTS = @EXTRA_TEST@ 5TESTS = @EXTRA_TEST@
6check_PROGRAMS = @EXTRA_TEST@ 6check_PROGRAMS = @EXTRA_TEST@
7 7
8INCLUDES = -I$(top_srcdir)/lib -I$(top_srcdir)/intl -I$(top_srcdir)/plugins 8INCLUDES = -I$(top_srcdir)/lib -I$(top_srcdir)/gl -I$(top_srcdir)/intl -I$(top_srcdir)/plugins
9 9
10EXTRA_PROGRAMS = test_utils test_disk 10EXTRA_PROGRAMS = test_utils test_disk
11 11
diff --git a/lib/unistd--.h b/lib/unistd--.h
deleted file mode 100644
index 1fe6ce8b..00000000
--- a/lib/unistd--.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/* Like unistd.h, but redefine some names to avoid glitches.
2
3 Copyright (C) 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21#include <unistd.h>
22#include "unistd-safer.h"
23
24#undef dup
25#define dup dup_safer
26
27#undef pipe
28#define pipe pipe_safer
diff --git a/lib/unistd-safer.h b/lib/unistd-safer.h
deleted file mode 100644
index f95999d3..00000000
--- a/lib/unistd-safer.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/* Invoke unistd-like functions, but avoid some glitches.
2
3 Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Paul Eggert. */
20
21int dup_safer (int);
22int fd_safer (int);
23int pipe_safer (int[2]);
diff --git a/lib/unlocked-io.h b/lib/unlocked-io.h
deleted file mode 100644
index d0093036..00000000
--- a/lib/unlocked-io.h
+++ /dev/null
@@ -1,137 +0,0 @@
1/* Prefer faster, non-thread-safe stdio functions if available.
2
3 Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* Written by Jim Meyering. */
20
21#ifndef UNLOCKED_IO_H
22# define UNLOCKED_IO_H 1
23
24/* These are wrappers for functions/macros from the GNU C library, and
25 from other C libraries supporting POSIX's optional thread-safe functions.
26
27 The standard I/O functions are thread-safe. These *_unlocked ones are
28 more efficient but not thread-safe. That they're not thread-safe is
29 fine since all of the applications in this package are single threaded.
30
31 Also, some code that is shared with the GNU C library may invoke
32 the *_unlocked functions directly. On hosts that lack those
33 functions, invoke the non-thread-safe versions instead. */
34
35# include <stdio.h>
36
37# if HAVE_DECL_CLEARERR_UNLOCKED
38# undef clearerr
39# define clearerr(x) clearerr_unlocked (x)
40# else
41# define clearerr_unlocked(x) clearerr (x)
42# endif
43
44# if HAVE_DECL_FEOF_UNLOCKED
45# undef feof
46# define feof(x) feof_unlocked (x)
47# else
48# define feof_unlocked(x) feof (x)
49# endif
50
51# if HAVE_DECL_FERROR_UNLOCKED
52# undef ferror
53# define ferror(x) ferror_unlocked (x)
54# else
55# define ferror_unlocked(x) ferror (x)
56# endif
57
58# if HAVE_DECL_FFLUSH_UNLOCKED
59# undef fflush
60# define fflush(x) fflush_unlocked (x)
61# else
62# define fflush_unlocked(x) fflush (x)
63# endif
64
65# if HAVE_DECL_FGETS_UNLOCKED
66# undef fgets
67# define fgets(x,y,z) fgets_unlocked (x,y,z)
68# else
69# define fgets_unlocked(x,y,z) fgets (x,y,z)
70# endif
71
72# if HAVE_DECL_FPUTC_UNLOCKED
73# undef fputc
74# define fputc(x,y) fputc_unlocked (x,y)
75# else
76# define fputc_unlocked(x,y) fputc (x,y)
77# endif
78
79# if HAVE_DECL_FPUTS_UNLOCKED
80# undef fputs
81# define fputs(x,y) fputs_unlocked (x,y)
82# else
83# define fputs_unlocked(x,y) fputs (x,y)
84# endif
85
86# if HAVE_DECL_FREAD_UNLOCKED
87# undef fread
88# define fread(w,x,y,z) fread_unlocked (w,x,y,z)
89# else
90# define fread_unlocked(w,x,y,z) fread (w,x,y,z)
91# endif
92
93# if HAVE_DECL_FWRITE_UNLOCKED
94# undef fwrite
95# define fwrite(w,x,y,z) fwrite_unlocked (w,x,y,z)
96# else
97# define fwrite_unlocked(w,x,y,z) fwrite (w,x,y,z)
98# endif
99
100# if HAVE_DECL_GETC_UNLOCKED
101# undef getc
102# define getc(x) getc_unlocked (x)
103# else
104# define getc_unlocked(x) getc (x)
105# endif
106
107# if HAVE_DECL_GETCHAR_UNLOCKED
108# undef getchar
109# define getchar() getchar_unlocked ()
110# else
111# define getchar_unlocked() getchar ()
112# endif
113
114# if HAVE_DECL_PUTC_UNLOCKED
115# undef putc
116# define putc(x,y) putc_unlocked (x,y)
117# else
118# define putc_unlocked(x,y) putc (x,y)
119# endif
120
121# if HAVE_DECL_PUTCHAR_UNLOCKED
122# undef putchar
123# define putchar(x) putchar_unlocked (x)
124# else
125# define putchar_unlocked(x) putchar (x)
126# endif
127
128# undef flockfile
129# define flockfile(x) ((void) 0)
130
131# undef ftrylockfile
132# define ftrylockfile(x) 0
133
134# undef funlockfile
135# define funlockfile(x) ((void) 0)
136
137#endif /* UNLOCKED_IO_H */
diff --git a/lib/xalloc-die.c b/lib/xalloc-die.c
deleted file mode 100644
index ff5ac9e6..00000000
--- a/lib/xalloc-die.c
+++ /dev/null
@@ -1,45 +0,0 @@
1/* Report a memory allocation failure and exit.
2
3 Copyright (C) 1997, 1998, 1999, 2000, 2002, 2003, 2004 Free
4 Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "xalloc.h"
25
26#include <stdlib.h>
27
28#include "error.h"
29#include "exitfail.h"
30
31#include "gettext.h"
32#define _(msgid) gettext (msgid)
33#define N_(msgid) msgid
34
35void
36xalloc_die (void)
37{
38 error (exit_failure, 0, "%s", _("memory exhausted"));
39
40 /* The `noreturn' cannot be given to error, since it may return if
41 its first argument is 0. To help compilers understand the
42 xalloc_die does not return, call abort. Also, the abort is a
43 safety feature if exit_failure is 0 (which shouldn't happen). */
44 abort ();
45}
diff --git a/lib/xalloc.h b/lib/xalloc.h
deleted file mode 100644
index f80977e3..00000000
--- a/lib/xalloc.h
+++ /dev/null
@@ -1,79 +0,0 @@
1/* xalloc.h -- malloc with out-of-memory checking
2
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifndef XALLOC_H_
21# define XALLOC_H_
22
23# include <stddef.h>
24
25
26# ifdef __cplusplus
27extern "C" {
28# endif
29
30
31# ifndef __attribute__
32# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__
33# define __attribute__(x)
34# endif
35# endif
36
37# ifndef ATTRIBUTE_NORETURN
38# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
39# endif
40
41/* This function is always triggered when memory is exhausted.
42 It must be defined by the application, either explicitly
43 or by using gnulib's xalloc-die module. This is the
44 function to call when one wants the program to die because of a
45 memory allocation failure. */
46extern void xalloc_die (void) ATTRIBUTE_NORETURN;
47
48void *xmalloc (size_t s);
49void *xnmalloc (size_t n, size_t s);
50void *xzalloc (size_t s);
51void *xcalloc (size_t n, size_t s);
52void *xrealloc (void *p, size_t s);
53void *xnrealloc (void *p, size_t n, size_t s);
54void *x2realloc (void *p, size_t *pn);
55void *x2nrealloc (void *p, size_t *pn, size_t s);
56void *xmemdup (void const *p, size_t s);
57char *xstrdup (char const *str);
58
59/* Return 1 if an array of N objects, each of size S, cannot exist due
60 to size arithmetic overflow. S must be positive and N must be
61 nonnegative. This is a macro, not an inline function, so that it
62 works correctly even when SIZE_MAX < N.
63
64 By gnulib convention, SIZE_MAX represents overflow in size
65 calculations, so the conservative dividend to use here is
66 SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value.
67 However, malloc (SIZE_MAX) fails on all known hosts where
68 sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for
69 exactly-SIZE_MAX allocations on such hosts; this avoids a test and
70 branch when S is known to be 1. */
71# define xalloc_oversized(n, s) \
72 ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n))
73
74# ifdef __cplusplus
75}
76# endif
77
78
79#endif /* !XALLOC_H_ */
diff --git a/lib/xmalloc.c b/lib/xmalloc.c
deleted file mode 100644
index 687633c2..00000000
--- a/lib/xmalloc.c
+++ /dev/null
@@ -1,241 +0,0 @@
1/* xmalloc.c -- malloc with out of memory checking
2
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include "xalloc.h"
25
26#include <stdlib.h>
27#include <string.h>
28
29#ifndef SIZE_MAX
30# define SIZE_MAX ((size_t) -1)
31#endif
32
33/* 1 if calloc is known to be compatible with GNU calloc. This
34 matters if we are not also using the calloc module, which defines
35 HAVE_CALLOC and supports the GNU API even on non-GNU platforms. */
36#if defined HAVE_CALLOC || defined __GLIBC__
37enum { HAVE_GNU_CALLOC = 1 };
38#else
39enum { HAVE_GNU_CALLOC = 0 };
40#endif
41
42/* Allocate an array of N objects, each with S bytes of memory,
43 dynamically, with error checking. S must be nonzero. */
44
45static inline void *
46xnmalloc_inline (size_t n, size_t s)
47{
48 void *p;
49 if (xalloc_oversized (n, s) || (! (p = malloc (n * s)) && n != 0))
50 xalloc_die ();
51 return p;
52}
53
54void *
55xnmalloc (size_t n, size_t s)
56{
57 return xnmalloc_inline (n, s);
58}
59
60/* Allocate N bytes of memory dynamically, with error checking. */
61
62void *
63xmalloc (size_t n)
64{
65 return xnmalloc_inline (n, 1);
66}
67
68/* Change the size of an allocated block of memory P to an array of N
69 objects each of S bytes, with error checking. S must be nonzero. */
70
71static inline void *
72xnrealloc_inline (void *p, size_t n, size_t s)
73{
74 if (xalloc_oversized (n, s) || (! (p = realloc (p, n * s)) && n != 0))
75 xalloc_die ();
76 return p;
77}
78
79void *
80xnrealloc (void *p, size_t n, size_t s)
81{
82 return xnrealloc_inline (p, n, s);
83}
84
85/* Change the size of an allocated block of memory P to N bytes,
86 with error checking. */
87
88void *
89xrealloc (void *p, size_t n)
90{
91 return xnrealloc_inline (p, n, 1);
92}
93
94
95/* If P is null, allocate a block of at least *PN such objects;
96 otherwise, reallocate P so that it contains more than *PN objects
97 each of S bytes. *PN must be nonzero unless P is null, and S must
98 be nonzero. Set *PN to the new number of objects, and return the
99 pointer to the new block. *PN is never set to zero, and the
100 returned pointer is never null.
101
102 Repeated reallocations are guaranteed to make progress, either by
103 allocating an initial block with a nonzero size, or by allocating a
104 larger block.
105
106 In the following implementation, nonzero sizes are doubled so that
107 repeated reallocations have O(N log N) overall cost rather than
108 O(N**2) cost, but the specification for this function does not
109 guarantee that sizes are doubled.
110
111 Here is an example of use:
112
113 int *p = NULL;
114 size_t used = 0;
115 size_t allocated = 0;
116
117 void
118 append_int (int value)
119 {
120 if (used == allocated)
121 p = x2nrealloc (p, &allocated, sizeof *p);
122 p[used++] = value;
123 }
124
125 This causes x2nrealloc to allocate a block of some nonzero size the
126 first time it is called.
127
128 To have finer-grained control over the initial size, set *PN to a
129 nonzero value before calling this function with P == NULL. For
130 example:
131
132 int *p = NULL;
133 size_t used = 0;
134 size_t allocated = 0;
135 size_t allocated1 = 1000;
136
137 void
138 append_int (int value)
139 {
140 if (used == allocated)
141 {
142 p = x2nrealloc (p, &allocated1, sizeof *p);
143 allocated = allocated1;
144 }
145 p[used++] = value;
146 }
147
148 */
149
150static inline void *
151x2nrealloc_inline (void *p, size_t *pn, size_t s)
152{
153 size_t n = *pn;
154
155 if (! p)
156 {
157 if (! n)
158 {
159 /* The approximate size to use for initial small allocation
160 requests, when the invoking code specifies an old size of
161 zero. 64 bytes is the largest "small" request for the
162 GNU C library malloc. */
163 enum { DEFAULT_MXFAST = 64 };
164
165 n = DEFAULT_MXFAST / s;
166 n += !n;
167 }
168 }
169 else
170 {
171 if (SIZE_MAX / 2 / s < n)
172 xalloc_die ();
173 n *= 2;
174 }
175
176 *pn = n;
177 return xrealloc (p, n * s);
178}
179
180void *
181x2nrealloc (void *p, size_t *pn, size_t s)
182{
183 return x2nrealloc_inline (p, pn, s);
184}
185
186/* If P is null, allocate a block of at least *PN bytes; otherwise,
187 reallocate P so that it contains more than *PN bytes. *PN must be
188 nonzero unless P is null. Set *PN to the new block's size, and
189 return the pointer to the new block. *PN is never set to zero, and
190 the returned pointer is never null. */
191
192void *
193x2realloc (void *p, size_t *pn)
194{
195 return x2nrealloc_inline (p, pn, 1);
196}
197
198/* Allocate S bytes of zeroed memory dynamically, with error checking.
199 There's no need for xnzalloc (N, S), since it would be equivalent
200 to xcalloc (N, S). */
201
202void *
203xzalloc (size_t s)
204{
205 return memset (xmalloc (s), 0, s);
206}
207
208/* Allocate zeroed memory for N elements of S bytes, with error
209 checking. S must be nonzero. */
210
211void *
212xcalloc (size_t n, size_t s)
213{
214 void *p;
215 /* Test for overflow, since some calloc implementations don't have
216 proper overflow checks. But omit overflow and size-zero tests if
217 HAVE_GNU_CALLOC, since GNU calloc catches overflow and never
218 returns NULL if successful. */
219 if ((! HAVE_GNU_CALLOC && xalloc_oversized (n, s))
220 || (! (p = calloc (n, s)) && (HAVE_GNU_CALLOC || n != 0)))
221 xalloc_die ();
222 return p;
223}
224
225/* Clone an object P of size S, with error checking. There's no need
226 for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any
227 need for an arithmetic overflow check. */
228
229void *
230xmemdup (void const *p, size_t s)
231{
232 return memcpy (xmalloc (s), p, s);
233}
234
235/* Clone STRING. */
236
237char *
238xstrdup (char const *string)
239{
240 return xmemdup (string, strlen (string) + 1);
241}
diff --git a/lib/xstrdup.c b/lib/xstrdup.c
deleted file mode 100644
index 58f18beb..00000000
--- a/lib/xstrdup.c
+++ /dev/null
@@ -1,33 +0,0 @@
1/* xstrdup.c -- copy a string with out of memory checking
2 Copyright (C) 1990, 1996, 1998, 2001, 2003 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18#if HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22/* Specification. */
23#include "xalloc.h"
24
25#include <string.h>
26
27/* Return a newly allocated copy of STRING. */
28
29char *
30xstrdup (const char *string)
31{
32 return xclone (string, strlen (string) + 1);
33}