module Pcre: sig end
Exceptions
|
exception BadPattern of (string * int)
BadPattern (msg, pos)
gets raised when the regular expression is
malformed. The reason is in msg
, the position of the error in the
pattern in pos
.exception BadUTF8
BadUTF8
gets raised when a UTF8 string being matched is invalid.exception BadUTF8Offset
BadUTF8Offset
gets raised when a UTF8 string being matched with offset
is invalid.exception MatchLimit
MatchLimit
gets raised when the maximum allowed number of match
attempts with backtracking or recursion is reached during matching.
ALL FUNCTIONS CALLING THE MATCHING ENGINE MAY RAISE IT!!!exception InternalError of string
InternalError msg
gets raised when the C-library exhibits undefined
behaviour. The reason is in msg
.exception Backtrack
Backtrack
used in callout functions to force backtracking.
Compilation and runtime flags and their conversion functions
|
type
icflag
type
irflag
typecflag =
[ `ANCHORED
| `CASELESS
| `DOLLAR_ENDONLY
| `DOTALL
| `EXTENDED
| `EXTRA
| `MULTILINE
| `NO_AUTO_CAPTURE
| `NO_UTF8_CHECK
| `UNGREEDY
| `UTF8 ]
val cflags : cflag list -> icflag
cflags cflag_list
converts a list of compilation flags to
their internal representation.val cflag_list : icflag -> cflag list
cflag_list cflags
converts internal representation of
compilation flags to a list.typerflag =
[ `ANCHORED | `NOTBOL | `NOTEMPTY | `NOTEOL ]
val rflags : rflag list -> irflag
rflags rflag_list
converts a list of runtime flags to
their internal representation.val rflag_list : irflag -> rflag list
rflag_list rflags
converts internal representation of
runtime flags to a list.
Information on the PCRE-configuration (build-time options)
|
val version : string
Version of the PCRE-C-library
val config_utf8 : bool
val config_newline : char
val config_link_size : int
val config_match_limit : int
val config_stackrecurse : bool
Information on patterns
|
typefirstbyte_info =
[ `ANCHORED | `Char of char | `Start_only ]
typestudy_stat =
[ `Not_studied | `Optimal | `Studied ]
type
regexp
val options : regexp -> icflag
options regexp
regexp
.val size : regexp -> int
size regexp
regexp
.val studysize : regexp -> int
studysize regexp
regexp
.val capturecount : regexp -> int
capturecount regexp
regexp
.val backrefmax : regexp -> int
backrefmax regexp
regexp
.val namecount : regexp -> int
namecount regexp
regexp
.val nameentrysize : regexp -> int
nameentrysize regexp
regexp
+ 3.val firstbyte : regexp -> firstbyte_info
firstbyte regexp
regexp
.val firsttable : regexp -> string option
firsttable regexp
regexp
if available, None
otherwise.val lastliteral : regexp -> char option
lastliteral regexp
regexp
if available, None
otherwise.val study_stat : regexp -> study_stat
study_stat regexp
regexp
.val get_stringnumber : regexp -> string -> int
get_stringnumber rex name
Invalid_arg
if there is no such named substring.name
in regular expression rex
. This index can then be used with
get_substring
.val get_match_limit : regexp -> int option
get_match_limit rex
rex
or None
.
Compilation of patterns
|
type
chtables
val maketables : unit -> chtables
val regexp : ?study:bool ->
?limit:int ->
?iflags:icflag ->
?flags:cflag list -> ?chtables:chtables -> string -> regexp
regexp ?study ?limit ?iflags ?flags ?chtables pattern
compiles
pattern
with flags
when given, with iflags
otherwise, and
with char tables chtables
. If study
is true, then the resulting
regular expression will be studied. If limit
is specified, this
sets a limit to the amount of recursion and backtracking (only lower
than the builtin default!). If this limit is exceeded, MatchLimit
will be raised during matching.
For detailed documentation on how you can specify PERL-style regular
expressions (= patterns), please consult the PCRE-documentation
("man pcrepattern") or PERL-manuals.
See also www.perl.com
study
: default = truelimit
: default = no extra limit other than defaultiflags
: default = no extra flagsflags
: default = ignoredchtables
: default = builtin char tablesval quote : string -> string
quote str
str
.
Subpattern extraction
|
type
substrings
val get_subject : substrings -> string
get_subject substrings
substrings
.val num_of_subs : substrings -> int
num_of_subs substrings
substrings
(whole match inclusive).val get_substring : substrings -> int -> string
get_substring substrings n
Invalid_argument
if n
is not in the range of the number of
substrings.n
th substring
(0 is whole match) of substrings
or the empty string if the
corresponding subpattern did not capture a substring.val get_substring_ofs : substrings -> int -> int * int
get_substring_ofs substrings n
Invalid_argument
if n
is not in the range of the number
of substrings.Not_found
if the corresponding subpattern did not capture
a substring.n
th substring of substrings
(0 is whole match).val get_substrings : ?full_match:bool -> substrings -> string array
get_substrings ?full_match substrings
substrings
. It includes the full match at index 0
when full_match
is true
, the captured substrings only when it
is false
. If a subpattern did not capture a substring, the empty
string is returned in the corresponding position instead.full_match
: default = trueval get_named_substring : regexp -> string -> substrings -> string
get_named_substring rex name substrings
Invalid_argument
if there is no such named substring.name
in regular expression rex
and substrings
.val get_named_substring_ofs : regexp -> string -> substrings -> int * int
get_named_substring_ofs rex name substrings
Invalid_argument
if there is no such named substring.Not_found
if the corresponding subpattern did not capture
a substring.name
in regular expression rex
and
substrings
.
Callouts
|
typecallout =
substrings -> int -> int -> int -> int -> int -> unit
Callout functions have the form:
callout
substrings match_start current_position
capture_top capture_last callout_number
They are indicated in patterns as "(?Cn)" where "n" is a
callout_number
ranging from 0 to 255. Substrings captured so far
are accesible as usual via substrings
. You will have to consider
capture_top
and capture_last
to know about the current state of
valid substrings.
By raising exception Backtrack
within a callout function, the user
can force the pattern matching engine to backtrack to other possible
solutions. Other exceptions will terminate matching immediately and
return control to OCaml.
Matching of patterns and subpattern extraction
|
val pcre_exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> int array
pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found
if pattern does not match.subj
starting at position pos
with pattern pat
when
given, regular expression rex
otherwise. The array also contains
additional workspace needed by the match engine. Uses flags
when
given, the precompiled iflags
otherwise. Callouts are handled by
callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> substrings
exec ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found
if pattern does not match.subj
starting at position pos
with pattern
pat
when given, regular expression rex
otherwise. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled
by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval exec_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> string -> substrings array
exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found
if pattern does not match.subj
starting at position pos
with pattern pat
when
given, regular expression rex
otherwise. Uses flags
when given,
the precompiled iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval next_match : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> substrings -> substrings
next_match ?iflags ?flags ?rex ?pat ?pos ?callout substrs
Not_found
if pattern does not match.Invalid_arg
if pos
let matching start outside of
the subject string.substrs
, jumping over pos
characters (also
backwards!), using pattern pat
when given, regular expression
rex
otherwise. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval extract : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string array
extract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found
if pattern does not match.subj
starting at
position pos
, using pattern pat
when given, regular expression
rex
otherwise. Uses flags
when given, the precompiled iflags
otherwise. It includes the full match at index 0 when full_match
is true
, the captured substrings only when it is false
. Callouts
are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0full_match
: default = truecallout
: default = ignore calloutsval extract_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string array array
extract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found
if pattern does not match.subj
starting at position pos
, using pattern pat
when given,
regular expression rex
otherwise. Uses flags
when given, the
precompiled iflags
otherwise. It includes the full match at index
0 of the extracted string arrays when full_match
is true
, the
captured substrings only when it is false
. Callouts are handled by
callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0full_match
: default = truecallout
: default = ignore calloutsval pmatch : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> bool
pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj
true
if subj
is matched by pattern pat
when given, regular expression
rex
otherwise, starting at position pos
. Uses flags
when given,
the precompiled iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore callouts
String substitution
|
type
substitution
val subst : string -> substitution
subst str
converts the string str
representing a
substitution pattern to the internal representation
The contents of the substitution string str
can be normal text
mixed with any of the following (mostly as in PERL):
0-9
+" from an immediately
following other number.val replace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> string
replace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces all substrings of subj
matching pattern pat
when given,
regular expression rex
otherwise, starting at position pos
with
the substitution string templ
when given, itempl
otherwise. Uses
flags
when given, the precompiled iflags
otherwise. Callouts
are handled by callout
.Failure
if there are backreferences to nonexistent subpatterns.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0itempl
: default = empty stringtempl
: default = ignoredcallout
: default = ignore calloutsval qreplace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> string
qreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces all substrings of subj
matching pattern pat
when given,
regular expression rex
otherwise, starting at position pos
with the string templ
. Uses flags
when given, the precompiled
iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0templ
: default = ignoredcallout
: default = ignore calloutsval substitute_substrings : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> string
substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj
matching pattern pat
when given,
regular expression rex
otherwise, starting at position pos
with the result of function subst
applied to the substrings
of the match. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval substitute : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> string
substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj
matching pattern pat
when given,
regular expression rex
otherwise, starting at position pos
with
the result of function subst
applied to the match. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled
by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval replace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> string
replace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces the first substring of subj
matching pattern pat
when
given, regular expression rex
otherwise, starting at position
pos
with the substitution string templ
when given, itempl
otherwise. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled by callout
.Failure
if there are backreferences to nonexistent subpatterns.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0itempl
: default = empty stringtempl
: default = ignoredcallout
: default = ignore calloutsval qreplace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> string
qreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces the first substring of subj
matching pattern pat
when
given, regular expression rex
otherwise, starting at position pos
with the string templ
. Uses flags
when given, the precompiled
iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0templ
: default = ignoredcallout
: default = ignore calloutsval substitute_substrings_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> string
substitute_substrings_first
?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj
matching pattern pat
when
given, regular expression rex
otherwise, starting at position
pos
with the result of function subst
applied to the substrings
of the match. Uses flags
when given, the precompiled iflags
otherwise. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore calloutsval substitute_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> string
substitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj
matching pattern pat
when
given, regular expression rex
otherwise, starting at position
pos
with the result of function subst
applied to the match. Uses
flags
when given, the precompiled iflags
otherwise. Callouts
are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0callout
: default = ignore callouts
Splitting
|
val split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string list
split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj
splits subj
into a list of at most max
strings, using as delimiter pattern
pat
when given, regular expression rex
otherwise, starting at
position pos
. Uses flags
when given, the precompiled iflags
otherwise. If max
is zero, trailing empty fields are stripped. If
it is negative, it is treated as arbitrarily large. If neither pat
nor rex
are specified, leading whitespace will be stripped! Should
behave exactly as in PERL. Callouts are handled by callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0max
: default = 0callout
: default = ignore calloutsval asplit : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string array
asplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj
same as
Pcre.split
buttype
split_result =
| |
Text of |
(* | Text part of splitted string | *) |
| |
Delim of |
(* | Delimiter part of splitted
string | *) |
| |
Group of |
(* | Subgroup of matched delimiter
(subgroup_nr, subgroup_str) | *) |
| |
NoGroup |
(* | Unmatched subgroup | *) |
Pcre.full_split
val full_split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?max:int -> ?callout:callout -> string -> split_result list
full_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj
splits
subj
into a list of at most max
elements of type "split_result",
using as delimiter pattern pat
when given, regular expression
rex
otherwise, starting at position pos
. Uses flags
when given,
the precompiled iflags
otherwise. If max
is zero, trailing empty
fields are stripped. If it is negative, it is treated as arbitrarily
large. Should behave exactly as in PERL. Callouts are handled by
callout
.iflags
: default = no extra flagsflags
: default = ignoredrex
: default = matches whitespacepat
: default = ignoredpos
: default = 0max
: default = 0callout
: default = ignore callouts
Additional convenience functions
|
val foreach_line : ?ic:Pervasives.in_channel -> (string -> unit) -> unit
foreach_line ?ic f
applies f
to each line in inchannel ic
until
the end-of-file is reached.ic
: default = stdinval foreach_file : string list -> (string -> Pervasives.in_channel -> unit) -> unit
foreach_file filenames f
opens each file in the list filenames
for input and applies f
to each filename and the corresponding
channel. Channels are closed after each operation (even when
exceptions occur - they get reraised afterwards!).
UNSAFE STUFF - USE WITH CAUTION!
|
val unsafe_pcre_exec : irflag ->
regexp ->
int -> string -> int -> int array -> callout option -> unit
unsafe_pcre_exec flags rex pos subject subgroup_offsets offset_vector
.
You should read the C-source to know what happens.
If you do not understand it - don't use this function!val make_ovector : regexp -> int * int array
make_ovector regexp
calculates the tuple (subgroups2, ovector)
which is the number of subgroup offsets and the offset array.