2001-05-06 00:39:01 +00:00
|
|
|
|
/* Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
|
|
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2001-05-06 00:39:01 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
1997-05-27 23:16:42 +00:00
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
|
* Lesser General Public License for more details.
|
2001-05-06 00:39:01 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
1997-05-27 23:16:42 +00:00
|
|
|
|
*/
|
1999-12-12 02:36:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* regex-posix.c -- POSIX regular expression support.
|
|
|
|
|
|
|
|
|
|
|
|
This code was written against Henry Spencer's famous regex package.
|
|
|
|
|
|
The principal reference for POSIX behavior was the man page for this
|
|
|
|
|
|
library, not the 1003.2 document itself. Ergo, other `POSIX'
|
|
|
|
|
|
libraries which do not agree with the Spencer implementation may
|
|
|
|
|
|
produce varying behavior. Sigh. */
|
|
|
|
|
|
|
2003-03-25 23:59:25 +00:00
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
|
|
# include <config.h>
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
1997-05-27 23:16:42 +00:00
|
|
|
|
#include <sys/types.h>
|
1997-05-28 17:02:23 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/_scm.h"
|
1997-05-29 02:20:10 +00:00
|
|
|
|
|
1997-05-28 17:02:23 +00:00
|
|
|
|
/* Supposedly, this file is never compiled unless we know we have
|
|
|
|
|
|
POSIX regular expressions. But we still put this in an #ifdef so
|
|
|
|
|
|
the file is CPP'able (for dependency scanning) even on systems that
|
|
|
|
|
|
don't have a <regex.h> header. */
|
|
|
|
|
|
#ifdef HAVE_REGCOMP
|
1997-08-24 15:33:49 +00:00
|
|
|
|
#ifdef HAVE_REGEX_H
|
1997-05-27 23:16:42 +00:00
|
|
|
|
#include <regex.h>
|
1997-08-24 15:33:49 +00:00
|
|
|
|
#else
|
|
|
|
|
|
#ifdef HAVE_RXPOSIX_H
|
|
|
|
|
|
#include <rxposix.h> /* GNU Rx library */
|
|
|
|
|
|
#else
|
|
|
|
|
|
#ifdef HAVE_RX_RXPOSIX_H
|
|
|
|
|
|
#include <rx/rxposix.h> /* GNU Rx library on Linux */
|
|
|
|
|
|
#endif
|
|
|
|
|
|
#endif
|
|
|
|
|
|
#endif
|
|
|
|
|
|
#endif
|
1997-05-28 17:02:23 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/smob.h"
|
|
|
|
|
|
#include "libguile/symbols.h"
|
|
|
|
|
|
#include "libguile/vectors.h"
|
|
|
|
|
|
#include "libguile/strports.h"
|
|
|
|
|
|
#include "libguile/ports.h"
|
|
|
|
|
|
#include "libguile/feature.h"
|
|
|
|
|
|
#include "libguile/strings.h"
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/validate.h"
|
|
|
|
|
|
#include "libguile/regex-posix.h"
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
1997-06-28 08:50:43 +00:00
|
|
|
|
/* This is defined by some regex libraries and omitted by others. */
|
|
|
|
|
|
#ifndef REG_BASIC
|
|
|
|
|
|
#define REG_BASIC 0
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2001-06-14 19:50:43 +00:00
|
|
|
|
scm_t_bits scm_tc16_regex;
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
* validate.h
(SCM_NUM2{SIZE,PTRDIFF,SHORT,USHORT,BITS,UBITS,INT,UINT}[_DEF]):
new macros.
* unif.h: type renaming:
scm_array -> scm_array_t
scm_array_dim -> scm_array_dim_t
the old names are deprecated, all in-Guile uses changed.
* tags.h (scm_ubits_t): new typedef, representing unsigned
scm_bits_t.
* stacks.h: type renaming:
scm_info_frame -> scm_info_frame_t
scm_stack -> scm_stack_t
the old names are deprecated, all in-Guile uses changed.
* srcprop.h: type renaming:
scm_srcprops -> scm_srcprops_t
scm_srcprops_chunk -> scm_srcprops_chunk_t
the old names are deprecated, all in-Guile uses changed.
* gsubr.c, procs.c, print.c, ports.c, read.c, rdelim.c, ramap.c,
rw.c, smob.c, sort.c, srcprop.c, stacks.c, strings.c, strop.c,
strorder.c, strports.c, struct.c, symbols.c, unif.c, values.c,
vectors.c, vports.c, weaks.c:
various int/size_t -> size_t/scm_bits_t changes.
* random.h: type renaming:
scm_rstate -> scm_rstate_t
scm_rng -> scm_rng_t
scm_i_rstate -> scm_i_rstate_t
the old names are deprecated, all in-Guile uses changed.
* procs.h: type renaming:
scm_subr_entry -> scm_subr_entry_t
the old name is deprecated, all in-Guile uses changed.
* options.h (scm_option_t.val): unsigned long -> scm_bits_t.
type renaming:
scm_option -> scm_option_t
the old name is deprecated, all in-Guile uses changed.
* objects.c: various long -> scm_bits_t changes.
(scm_i_make_class_object): flags: unsigned long -> scm_ubits_t
* numbers.h (SCM_FIXNUM_BIT): deprecated, renamed to
SCM_I_FIXNUM_BIT.
* num2integral.i.c: new file, multiply included by numbers.c, used
to "templatize" the various integral <-> num conversion routines.
* numbers.c (scm_mkbig, scm_big2num, scm_adjbig, scm_normbig,
scm_copybig, scm_2ulong2big, scm_dbl2big, scm_big2dbl):
deprecated.
(scm_i_mkbig, scm_i_big2inum, scm_i_adjbig, scm_i_normbig,
scm_i_copybig, scm_i_short2big, scm_i_ushort2big, scm_i_int2big,
scm_i_uint2big, scm_i_long2big, scm_i_ulong2big, scm_i_bits2big,
scm_i_ubits2big, scm_i_size2big, scm_i_ptrdiff2big,
scm_i_long_long2big, scm_i_ulong_long2big, scm_i_dbl2big,
scm_i_big2dbl, scm_short2num, scm_ushort2num, scm_int2num,
scm_uint2num, scm_bits2num, scm_ubits2num, scm_size2num,
scm_ptrdiff2num, scm_num2short, scm_num2ushort, scm_num2int,
scm_num2uint, scm_num2bits, scm_num2ubits, scm_num2ptrdiff,
scm_num2size): new functions.
* modules.c (scm_module_reverse_lookup): i, n: int -> scm_bits_t.x
* load.c: change int -> size_t in various places (where the
variable is used to store a string length).
(search-path): call scm_done_free, not scm_done_malloc.
* list.c (scm_ilength): return a scm_bits_t, not long.
some other {int,long} -> scm_bits_t changes.
* hashtab.c: various [u]int -> scm_bits_t changes.
scm_ihashx_closure -> scm_ihashx_closure_t (and made a typedef).
(scm_ihashx): n: uint -> scm_bits_t
use scm_bits2num instead of scm_ulong2num.
* gsubr.c: various int -> scm_bits_t changes.
* gh_data.c (gh_scm2double): no loss of precision any more.
* gh.h (gh_str2scm): len: int -> size_t
(gh_{get,set}_substr): start: int -> scm_bits_t,
len: int -> size_t
(gh_<num>2scm): n: int -> scm_bits_t
(gh_*vector_length): return scm_[u]size_t, not unsigned long.
(gh_length): return scm_bits_t, not unsigned long.
* fports.h: type renaming:
scm_fport -> scm_fport_t
the old name is deprecated, all in-Guile uses changed.
* fports.c (fport_fill_input): count: int -> scm_bits_t
(fport_flush): init_size, remaining, count: int -> scm_bits_t
* debug.h (scm_lookup_cstr, scm_lookup_soft, scm_evstr): removed
those prototypes, as the functions they prototype don't exist.
* fports.c (default_buffer_size): int -> size_t
(scm_fport_buffer_add): read_size, write_size: int -> scm_bits_t
default_size: int -> size_t
(scm_setvbuf): csize: int -> scm_bits_t
* fluids.c (n_fluids): int -> scm_bits_t
(grow_fluids): old_length, i: int -> scm_bits_t
(next_fluid_num, scm_fluid_ref, scm_fluid_set_x): n: int ->
scm_bits_t
(scm_c_with_fluids): flen, vlen: int -> scm_bits_t
* filesys.c (s_scm_open_fdes): changed calls to SCM_NUM2LONG to
the new and shiny SCM_NUM2INT.
* extensions.c: extension -> extension_t (and made a typedef).
* eval.h (SCM_IFRAME): cast to scm_bits_t, not int. just so
there are no nasty surprises if/when the various deeply magic tag
bits move somewhere else.
* eval.c: changed the locals used to store results of SCM_IFRAME,
scm_ilength and such to be of type scm_bits_t (and not int/long).
(iqq): depth, edepth: int -> scm_bits_t
(scm_eval_stack): int -> scm_bits_t
(SCM_CEVAL): various vars are not scm_bits_t instead of int.
(check_map_args, scm_map, scm_for_each): len: long -> scm_bits_t
i: int -> scm_bits_t
* environments.c: changed the many calls to scm_ulong2num to
scm_ubits2num.
(import_environment_fold): proc_as_ul: ulong -> scm_ubits_t
* dynwind.c (scm_dowinds): delta: long -> scm_bits_t
* debug.h: type renaming:
scm_debug_info -> scm_debug_info_t
scm_debug_frame -> scm_debug_frame_t
the old names are deprecated, all in-Guile uses changed.
(scm_debug_eframe_size): int -> scm_bits_t
* debug.c (scm_init_debug): use scm_c_define instead of the
deprecated scm_define.
* continuations.h: type renaming:
scm_contregs -> scm_contregs_t
the old name is deprecated, all in-Guile uses changed.
(scm_contregs_t.num_stack_items): size_t -> scm_bits_t
(scm_contregs_t.num_stack_items): ulong -> scm_ubits_t
* continuations.c (scm_make_continuation): change the type of
stack_size form long to scm_bits_t.
* ports.h: type renaming:
scm_port_rw_active -> scm_port_rw_active_t (and made a typedef)
scm_port -> scm_port_t
scm_ptob_descriptor -> scm_ptob_descriptor_t
the old names are deprecated, all in-Guile uses changed.
(scm_port_t.entry): int -> scm_bits_t.
(scm_port_t.line_number): int -> long.
(scm_port_t.putback_buf_size): int -> size_t.
* __scm.h (long_long, ulong_long): deprecated (they pollute the
global namespace and have little value besides that).
(SCM_BITS_LENGTH): new, is the bit size of scm_bits_t (i.e. of an
SCM handle).
(ifdef spaghetti): include sys/types.h and sys/stdtypes.h, if they
exist (for size_t & ptrdiff_t)
(scm_sizet): deprecated.
* Makefile.am (noinst_HEADERS): add num2integral.i.c
2001-05-24 00:50:51 +00:00
|
|
|
|
static size_t
|
2000-12-08 17:32:56 +00:00
|
|
|
|
regex_free (SCM obj)
|
1997-05-27 23:16:42 +00:00
|
|
|
|
{
|
|
|
|
|
|
regfree (SCM_RGX (obj));
|
* gc.h, gc.c (scm_gc_sweep): Issue deprecation warning when
non-zero is returned from a port or smob free function.
(scm_malloc, scm_realloc, scm_strndup, scm_strdup,
scm_gc_register_collectable_memory,
scm_gc_unregister_collectable_memory, scm_gc_malloc,
scm_gc_realloc, scm_gc_free, scm_gc_strndup, scm_gc_strdup): New.
* backtrace.c, continuations.c, convert.i.c, coop-threads.c,
debug-malloc.c, dynl.c, environments.c, environments.h,
extensions.c, filesys.c, fports.c, gc.c, gc.h, gh_data.c, goops.c,
guardians.c, hooks.c, init.c, keywords.c, load.c, numbers.c,
ports.c, posix.c, procs.c, rdelim.c, regex-posix.c, root.c,
smob.c, stime.c, strings.c, struct.c, struct.h, symbols.c, unif.c,
vectors.c, weaks.c: Use scm_gc_malloc/scm_malloc and
scm_gc_free/free instead of scm_must_malloc and scm_must_free, as
appropriate. Return zero from smob and port free functions.
* debug-malloc.c (scm_malloc_reregister): Handle "old == NULL".
* fports.c (scm_setvbuf): Reset read buffer to saved values when
it is pointing to the putback buffer.
2002-02-11 18:06:50 +00:00
|
|
|
|
scm_gc_free (SCM_RGX (obj), sizeof(regex_t), "regex");
|
|
|
|
|
|
return 0;
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax");
|
|
|
|
|
|
|
1998-09-30 10:14:59 +00:00
|
|
|
|
static char *
|
1999-07-19 07:52:51 +00:00
|
|
|
|
scm_regexp_error_msg (int regerrno, regex_t *rx)
|
1997-05-27 23:16:42 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM errmsg;
|
|
|
|
|
|
int l;
|
|
|
|
|
|
|
|
|
|
|
|
/* FIXME: must we wrap any external calls in SCM_DEFER_INTS...SCM_ALLOW_INTS?
|
|
|
|
|
|
Or are these only necessary when a SCM object may be left in an
|
|
|
|
|
|
undetermined state (half-formed)? If the latter then I believe we
|
|
|
|
|
|
may do without the critical section code. -twp */
|
|
|
|
|
|
|
|
|
|
|
|
/* We could simply make errmsg a char pointer, and allocate space with
|
|
|
|
|
|
malloc. But since we are about to pass the pointer to scm_error, which
|
|
|
|
|
|
never returns, we would never have the opportunity to free it. Creating
|
|
|
|
|
|
it as a SCM object means that the system will GC it at some point. */
|
|
|
|
|
|
|
|
|
|
|
|
errmsg = scm_make_string (SCM_MAKINUM (80), SCM_UNDEFINED);
|
|
|
|
|
|
SCM_DEFER_INTS;
|
2000-09-22 17:17:55 +00:00
|
|
|
|
l = regerror (regerrno, rx, SCM_STRING_CHARS (errmsg), 80);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
if (l > 80)
|
|
|
|
|
|
{
|
|
|
|
|
|
errmsg = scm_make_string (SCM_MAKINUM (l), SCM_UNDEFINED);
|
2000-09-26 18:37:26 +00:00
|
|
|
|
regerror (regerrno, rx, SCM_STRING_CHARS (errmsg), l);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
SCM_ALLOW_INTS;
|
2000-09-22 17:17:55 +00:00
|
|
|
|
return SCM_STRING_CHARS (errmsg);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2001-05-06 00:39:01 +00:00
|
|
|
|
SCM_DEFINE (scm_regexp_p, "regexp?", 1, 0, 0,
|
2001-04-03 13:19:05 +00:00
|
|
|
|
(SCM obj),
|
|
|
|
|
|
"Return @code{#t} if @var{obj} is a compiled regular expression,\n"
|
|
|
|
|
|
"or @code{#f} otherwise.")
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_regexp_p
|
1997-05-27 23:16:42 +00:00
|
|
|
|
{
|
2004-07-06 10:59:25 +00:00
|
|
|
|
return scm_from_bool(SCM_RGXP (obj));
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
2001-05-06 00:39:01 +00:00
|
|
|
|
SCM_DEFINE (scm_make_regexp, "make-regexp", 1, 0, 1,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM pat, SCM flags),
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"Compile the regular expression described by @var{pat}, and\n"
|
|
|
|
|
|
"return the compiled regexp structure. If @var{pat} does not\n"
|
|
|
|
|
|
"describe a legal regular expression, @code{make-regexp} throws\n"
|
|
|
|
|
|
"a @code{regular-expression-syntax} error.\n"
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
"The @var{flags} arguments change the behavior of the compiled\n"
|
|
|
|
|
|
"regular expression. The following flags may be supplied:\n"
|
|
|
|
|
|
"\n"
|
* alist.c, chars.c, debug.c, dynl.c, dynwind.c, error.c, eval.c,
evalext.c, filesys.c, gc.c, hash.c, hashtab.c, ioext.c,
keywords.c, list.c, load.c, macros.c, net_db.c, numbers.c,
objprop.c, ports.c, posix.c, print.c, procprop.c, procs.c,
ramap.c, regex-posix.c, root.c, scmsigs.c, simpos.c, socket.c,
stacks.c, stime.c, strings.c, strop.c, strports.c, struct.c,
symbols.c, throw.c, unif.c, vectors.c, version.c, vports.c,
weaks.c: Converted docstrings to ANSI C format.
2000-01-18 11:24:03 +00:00
|
|
|
|
"@table @code\n"
|
|
|
|
|
|
"@item regexp/icase\n"
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"Consider uppercase and lowercase letters to be the same when\n"
|
|
|
|
|
|
"matching.\n"
|
* alist.c, chars.c, debug.c, dynl.c, dynwind.c, error.c, eval.c,
evalext.c, filesys.c, gc.c, hash.c, hashtab.c, ioext.c,
keywords.c, list.c, load.c, macros.c, net_db.c, numbers.c,
objprop.c, ports.c, posix.c, print.c, procprop.c, procs.c,
ramap.c, regex-posix.c, root.c, scmsigs.c, simpos.c, socket.c,
stacks.c, stime.c, strings.c, strop.c, strports.c, struct.c,
symbols.c, throw.c, unif.c, vectors.c, version.c, vports.c,
weaks.c: Converted docstrings to ANSI C format.
2000-01-18 11:24:03 +00:00
|
|
|
|
"@item regexp/newline\n"
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"If a newline appears in the target string, then permit the\n"
|
|
|
|
|
|
"@samp{^} and @samp{$} operators to match immediately after or\n"
|
|
|
|
|
|
"immediately before the newline, respectively. Also, the\n"
|
|
|
|
|
|
"@samp{.} and @samp{[^...]} operators will never match a newline\n"
|
|
|
|
|
|
"character. The intent of this flag is to treat the target\n"
|
|
|
|
|
|
"string as a buffer containing many lines of text, and the\n"
|
|
|
|
|
|
"regular expression as a pattern that may match a single one of\n"
|
|
|
|
|
|
"those lines.\n"
|
* alist.c, chars.c, debug.c, dynl.c, dynwind.c, error.c, eval.c,
evalext.c, filesys.c, gc.c, hash.c, hashtab.c, ioext.c,
keywords.c, list.c, load.c, macros.c, net_db.c, numbers.c,
objprop.c, ports.c, posix.c, print.c, procprop.c, procs.c,
ramap.c, regex-posix.c, root.c, scmsigs.c, simpos.c, socket.c,
stacks.c, stime.c, strings.c, strop.c, strports.c, struct.c,
symbols.c, throw.c, unif.c, vectors.c, version.c, vports.c,
weaks.c: Converted docstrings to ANSI C format.
2000-01-18 11:24:03 +00:00
|
|
|
|
"@item regexp/basic\n"
|
|
|
|
|
|
"Compile a basic (``obsolete'') regexp instead of the extended\n"
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"(``modern'') regexps that are the default. Basic regexps do\n"
|
|
|
|
|
|
"not consider @samp{|}, @samp{+} or @samp{?} to be special\n"
|
|
|
|
|
|
"characters, and require the @samp{@{...@}} and @samp{(...)}\n"
|
|
|
|
|
|
"metacharacters to be backslash-escaped (@pxref{Backslash\n"
|
|
|
|
|
|
"Escapes}). There are several other differences between basic\n"
|
|
|
|
|
|
"and extended regular expressions, but these are the most\n"
|
|
|
|
|
|
"significant.\n"
|
* alist.c, chars.c, debug.c, dynl.c, dynwind.c, error.c, eval.c,
evalext.c, filesys.c, gc.c, hash.c, hashtab.c, ioext.c,
keywords.c, list.c, load.c, macros.c, net_db.c, numbers.c,
objprop.c, ports.c, posix.c, print.c, procprop.c, procs.c,
ramap.c, regex-posix.c, root.c, scmsigs.c, simpos.c, socket.c,
stacks.c, stime.c, strings.c, strop.c, strports.c, struct.c,
symbols.c, throw.c, unif.c, vectors.c, version.c, vports.c,
weaks.c: Converted docstrings to ANSI C format.
2000-01-18 11:24:03 +00:00
|
|
|
|
"@item regexp/extended\n"
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"Compile an extended regular expression rather than a basic\n"
|
|
|
|
|
|
"regexp. This is the default behavior; this flag will not\n"
|
|
|
|
|
|
"usually be needed. If a call to @code{make-regexp} includes\n"
|
|
|
|
|
|
"both @code{regexp/basic} and @code{regexp/extended} flags, the\n"
|
|
|
|
|
|
"one which comes last will override the earlier one.\n"
|
|
|
|
|
|
"@end table")
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_make_regexp
|
1997-05-27 23:16:42 +00:00
|
|
|
|
{
|
1999-07-07 09:44:01 +00:00
|
|
|
|
SCM flag;
|
1997-05-27 23:16:42 +00:00
|
|
|
|
regex_t *rx;
|
1997-06-28 08:50:43 +00:00
|
|
|
|
int status, cflags;
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
2000-10-30 11:42:26 +00:00
|
|
|
|
SCM_VALIDATE_STRING (1, pat);
|
2000-05-18 08:47:52 +00:00
|
|
|
|
SCM_VALIDATE_REST_ARGUMENT (flags);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
1997-06-28 08:50:43 +00:00
|
|
|
|
/* Examine list of regexp flags. If REG_BASIC is supplied, then
|
|
|
|
|
|
turn off REG_EXTENDED flag (on by default). */
|
|
|
|
|
|
cflags = REG_EXTENDED;
|
|
|
|
|
|
flag = flags;
|
2000-05-18 08:47:52 +00:00
|
|
|
|
while (!SCM_NULLP (flag))
|
1997-06-28 08:50:43 +00:00
|
|
|
|
{
|
|
|
|
|
|
if (SCM_INUM (SCM_CAR (flag)) == REG_BASIC)
|
|
|
|
|
|
cflags &= ~REG_EXTENDED;
|
|
|
|
|
|
else
|
|
|
|
|
|
cflags |= SCM_INUM (SCM_CAR (flag));
|
|
|
|
|
|
flag = SCM_CDR (flag);
|
|
|
|
|
|
}
|
2001-05-06 00:39:01 +00:00
|
|
|
|
|
* gc.h, gc.c (scm_gc_sweep): Issue deprecation warning when
non-zero is returned from a port or smob free function.
(scm_malloc, scm_realloc, scm_strndup, scm_strdup,
scm_gc_register_collectable_memory,
scm_gc_unregister_collectable_memory, scm_gc_malloc,
scm_gc_realloc, scm_gc_free, scm_gc_strndup, scm_gc_strdup): New.
* backtrace.c, continuations.c, convert.i.c, coop-threads.c,
debug-malloc.c, dynl.c, environments.c, environments.h,
extensions.c, filesys.c, fports.c, gc.c, gc.h, gh_data.c, goops.c,
guardians.c, hooks.c, init.c, keywords.c, load.c, numbers.c,
ports.c, posix.c, procs.c, rdelim.c, regex-posix.c, root.c,
smob.c, stime.c, strings.c, struct.c, struct.h, symbols.c, unif.c,
vectors.c, weaks.c: Use scm_gc_malloc/scm_malloc and
scm_gc_free/free instead of scm_must_malloc and scm_must_free, as
appropriate. Return zero from smob and port free functions.
* debug-malloc.c (scm_malloc_reregister): Handle "old == NULL".
* fports.c (scm_setvbuf): Reset read buffer to saved values when
it is pointing to the putback buffer.
2002-02-11 18:06:50 +00:00
|
|
|
|
rx = scm_gc_malloc (sizeof(regex_t), "regex");
|
2000-10-30 11:42:26 +00:00
|
|
|
|
status = regcomp (rx, SCM_STRING_CHARS (pat),
|
1997-06-24 17:22:45 +00:00
|
|
|
|
/* Make sure they're not passing REG_NOSUB;
|
|
|
|
|
|
regexp-exec assumes we're getting match data. */
|
1997-06-28 08:50:43 +00:00
|
|
|
|
cflags & ~REG_NOSUB);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
if (status != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_error (scm_regexp_error_key,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
FUNC_NAME,
|
1997-05-27 23:16:42 +00:00
|
|
|
|
scm_regexp_error_msg (status, rx),
|
|
|
|
|
|
SCM_BOOL_F,
|
|
|
|
|
|
SCM_BOOL_F);
|
|
|
|
|
|
/* never returns */
|
|
|
|
|
|
}
|
1999-07-07 09:44:01 +00:00
|
|
|
|
SCM_RETURN_NEWSMOB (scm_tc16_regex, rx);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
2001-05-06 00:39:01 +00:00
|
|
|
|
SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM rx, SCM str, SCM start, SCM flags),
|
2001-04-03 13:19:05 +00:00
|
|
|
|
"Match the compiled regular expression @var{rx} against\n"
|
|
|
|
|
|
"@code{str}. If the optional integer @var{start} argument is\n"
|
|
|
|
|
|
"provided, begin matching from that position in the string.\n"
|
|
|
|
|
|
"Return a match structure describing the results of the match,\n"
|
2001-05-06 00:39:01 +00:00
|
|
|
|
"or @code{#f} if no match could be found.\n"
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
"The @var{flags} arguments change the matching behavior.\n"
|
|
|
|
|
|
"The following flags may be supplied:\n"
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
"@table @code\n"
|
|
|
|
|
|
"@item regexp/notbol\n"
|
|
|
|
|
|
"Operator @samp{^} always fails (unless @code{regexp/newline}\n"
|
|
|
|
|
|
"is used). Use this when the beginning of the string should\n"
|
|
|
|
|
|
"not be considered the beginning of a line.\n"
|
|
|
|
|
|
"@item regexp/noteol\n"
|
|
|
|
|
|
"Operator @samp{$} always fails (unless @code{regexp/newline}\n"
|
|
|
|
|
|
"is used). Use this when the end of the string should not be\n"
|
|
|
|
|
|
"considered the end of a line.\n"
|
|
|
|
|
|
"@end table")
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_regexp_exec
|
1997-05-27 23:16:42 +00:00
|
|
|
|
{
|
1997-06-07 00:07:33 +00:00
|
|
|
|
int status, nmatches, offset;
|
1997-05-27 23:16:42 +00:00
|
|
|
|
regmatch_t *matches;
|
|
|
|
|
|
SCM mvec = SCM_BOOL_F;
|
|
|
|
|
|
|
2002-07-20 14:08:34 +00:00
|
|
|
|
SCM_VALIDATE_RGXP (1, rx);
|
2000-10-30 11:42:26 +00:00
|
|
|
|
SCM_VALIDATE_STRING (2, str);
|
2002-07-20 14:08:34 +00:00
|
|
|
|
SCM_VALIDATE_INUM_DEF_COPY (3, start,0, offset);
|
|
|
|
|
|
SCM_ASSERT_RANGE (3, start, offset >= 0 && offset <= SCM_STRING_LENGTH (str));
|
1997-06-24 05:30:28 +00:00
|
|
|
|
if (SCM_UNBNDP (flags))
|
|
|
|
|
|
flags = SCM_INUM0;
|
2002-07-20 14:08:34 +00:00
|
|
|
|
SCM_VALIDATE_INUM (4, flags);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
|
|
|
|
|
/* re_nsub doesn't account for the `subexpression' representing the
|
|
|
|
|
|
whole regexp, so add 1 to nmatches. */
|
|
|
|
|
|
|
|
|
|
|
|
nmatches = SCM_RGX(rx)->re_nsub + 1;
|
|
|
|
|
|
SCM_DEFER_INTS;
|
* gc.h, gc.c (scm_gc_sweep): Issue deprecation warning when
non-zero is returned from a port or smob free function.
(scm_malloc, scm_realloc, scm_strndup, scm_strdup,
scm_gc_register_collectable_memory,
scm_gc_unregister_collectable_memory, scm_gc_malloc,
scm_gc_realloc, scm_gc_free, scm_gc_strndup, scm_gc_strdup): New.
* backtrace.c, continuations.c, convert.i.c, coop-threads.c,
debug-malloc.c, dynl.c, environments.c, environments.h,
extensions.c, filesys.c, fports.c, gc.c, gc.h, gh_data.c, goops.c,
guardians.c, hooks.c, init.c, keywords.c, load.c, numbers.c,
ports.c, posix.c, procs.c, rdelim.c, regex-posix.c, root.c,
smob.c, stime.c, strings.c, struct.c, struct.h, symbols.c, unif.c,
vectors.c, weaks.c: Use scm_gc_malloc/scm_malloc and
scm_gc_free/free instead of scm_must_malloc and scm_must_free, as
appropriate. Return zero from smob and port free functions.
* debug-malloc.c (scm_malloc_reregister): Handle "old == NULL".
* fports.c (scm_setvbuf): Reset read buffer to saved values when
it is pointing to the putback buffer.
2002-02-11 18:06:50 +00:00
|
|
|
|
matches = scm_malloc (sizeof (regmatch_t) * nmatches);
|
2000-10-30 11:42:26 +00:00
|
|
|
|
status = regexec (SCM_RGX (rx), SCM_STRING_CHARS (str) + offset,
|
1997-06-24 05:30:28 +00:00
|
|
|
|
nmatches, matches,
|
|
|
|
|
|
SCM_INUM (flags));
|
1997-05-27 23:16:42 +00:00
|
|
|
|
if (!status)
|
|
|
|
|
|
{
|
|
|
|
|
|
int i;
|
|
|
|
|
|
/* The match vector must include a cell for the string that was matched,
|
|
|
|
|
|
so add 1. */
|
2001-02-02 04:56:25 +00:00
|
|
|
|
mvec = scm_c_make_vector (nmatches + 1, SCM_UNSPECIFIED);
|
2002-08-04 00:17:18 +00:00
|
|
|
|
SCM_VECTOR_SET(mvec,0, str);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
for (i = 0; i < nmatches; ++i)
|
1998-11-23 10:24:09 +00:00
|
|
|
|
if (matches[i].rm_so == -1)
|
2002-08-04 00:17:18 +00:00
|
|
|
|
SCM_VECTOR_SET(mvec,i+1, scm_cons (SCM_MAKINUM (-1), SCM_MAKINUM (-1)));
|
1998-11-23 10:24:09 +00:00
|
|
|
|
else
|
2002-08-04 00:17:18 +00:00
|
|
|
|
SCM_VECTOR_SET(mvec,i+1,scm_cons (scm_long2num (matches[i].rm_so + offset),
|
|
|
|
|
|
scm_long2num (matches[i].rm_eo + offset)));
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
* gc.h, gc.c (scm_gc_sweep): Issue deprecation warning when
non-zero is returned from a port or smob free function.
(scm_malloc, scm_realloc, scm_strndup, scm_strdup,
scm_gc_register_collectable_memory,
scm_gc_unregister_collectable_memory, scm_gc_malloc,
scm_gc_realloc, scm_gc_free, scm_gc_strndup, scm_gc_strdup): New.
* backtrace.c, continuations.c, convert.i.c, coop-threads.c,
debug-malloc.c, dynl.c, environments.c, environments.h,
extensions.c, filesys.c, fports.c, gc.c, gc.h, gh_data.c, goops.c,
guardians.c, hooks.c, init.c, keywords.c, load.c, numbers.c,
ports.c, posix.c, procs.c, rdelim.c, regex-posix.c, root.c,
smob.c, stime.c, strings.c, struct.c, struct.h, symbols.c, unif.c,
vectors.c, weaks.c: Use scm_gc_malloc/scm_malloc and
scm_gc_free/free instead of scm_must_malloc and scm_must_free, as
appropriate. Return zero from smob and port free functions.
* debug-malloc.c (scm_malloc_reregister): Handle "old == NULL".
* fports.c (scm_setvbuf): Reset read buffer to saved values when
it is pointing to the putback buffer.
2002-02-11 18:06:50 +00:00
|
|
|
|
free (matches);
|
1997-05-27 23:16:42 +00:00
|
|
|
|
SCM_ALLOW_INTS;
|
|
|
|
|
|
|
|
|
|
|
|
if (status != 0 && status != REG_NOMATCH)
|
|
|
|
|
|
scm_error (scm_regexp_error_key,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
FUNC_NAME,
|
1999-07-19 07:52:51 +00:00
|
|
|
|
scm_regexp_error_msg (status, SCM_RGX (rx)),
|
1997-05-27 23:16:42 +00:00
|
|
|
|
SCM_BOOL_F,
|
|
|
|
|
|
SCM_BOOL_F);
|
|
|
|
|
|
return mvec;
|
|
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1997-05-27 23:16:42 +00:00
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
scm_init_regex_posix ()
|
|
|
|
|
|
{
|
2000-12-08 17:32:56 +00:00
|
|
|
|
scm_tc16_regex = scm_make_smob_type ("regexp", sizeof (regex_t));
|
|
|
|
|
|
scm_set_smob_free (scm_tc16_regex, regex_free);
|
1997-06-24 05:30:28 +00:00
|
|
|
|
|
|
|
|
|
|
/* Compilation flags. */
|
2001-05-15 14:57:22 +00:00
|
|
|
|
scm_c_define ("regexp/basic", scm_long2num (REG_BASIC));
|
|
|
|
|
|
scm_c_define ("regexp/extended", scm_long2num (REG_EXTENDED));
|
|
|
|
|
|
scm_c_define ("regexp/icase", scm_long2num (REG_ICASE));
|
|
|
|
|
|
scm_c_define ("regexp/newline", scm_long2num (REG_NEWLINE));
|
1997-06-24 05:30:28 +00:00
|
|
|
|
|
|
|
|
|
|
/* Execution flags. */
|
2001-05-15 14:57:22 +00:00
|
|
|
|
scm_c_define ("regexp/notbol", scm_long2num (REG_NOTBOL));
|
|
|
|
|
|
scm_c_define ("regexp/noteol", scm_long2num (REG_NOTEOL));
|
1997-06-24 05:30:28 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/regex-posix.x"
|
1997-05-29 02:47:40 +00:00
|
|
|
|
|
|
|
|
|
|
scm_add_feature ("regex");
|
1997-05-27 23:16:42 +00:00
|
|
|
|
}
|
2000-03-19 19:01:16 +00:00
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
Local Variables:
|
|
|
|
|
|
c-file-style: "gnu"
|
|
|
|
|
|
End:
|
|
|
|
|
|
*/
|