10 KiB
module regex
Contents
Constants
const v_regex_version = '1.0 alpha' // regex module version
const max_code_len = 256 // default small base code len for the regex programs
const max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
const spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
const new_line_list = [`\n`, `\r`]
new line chars for now only '\n'
const no_match_found = -1
Results
const compile_ok = 0 // the regex string compiled, all ok
Errors
const err_char_unknown = -2 // the char used is unknow to the system
const err_undefined = -3 // the compiler symbol is undefined
const err_internal_error = -4 // Bug in the regex system!!
const err_cc_alloc_overflow = -5 // memory for char class full!!
const err_syntax_error = -6 // syntax error in regex compiling
const err_groups_overflow = -7 // max number of groups reached
const err_groups_max_nested = -8 // max number of nested group reached
const err_group_not_balanced = -9 // group not balanced
const err_group_qm_notation = -10 // group invalid notation
const err_invalid_or_with_cc = -11 // invalid or on two consecutive char class
const err_neg_group_quantifier = -12 // negation groups can not have quantifier
const err_consecutive_dots = -13
const f_nl = 0x00000001 // end the match when find a new line symbol
const f_ms = 0x00000002 // match true only if the match is at the start of the string
const f_me = 0x00000004 // match true only if the match is at the end of the string
const f_efm = 0x00000100 // exit on first token matched, used by search
const f_bin = 0x00000200 // work only on bytes, ignore utf-8
const f_src = 0x00020000
behaviour modifier flags
new
fn new() RE
new create a RE of small size, usually sufficient for ordinary use
regex_base
fn regex_base(pattern string) (RE, int, int)
regex_base returns a regex object (RE) generated from pattern string and detailed information in re_err, err_pos, if an error occurred.
regex_opt
fn regex_opt(pattern string) !RE
regex_opt create new RE object from RE pattern string
FnLog
type FnLog = fn (string)
Log function prototype
FnReplace
type FnReplace = fn (re RE, in_txt string, start int, end int) string
type of function used for custom replace in_txt source text start index of the start of the match in in_txt end index of the end of the match in in_txt the match is in in_txt[start..end]
FnValidator
type FnValidator = fn (u8) bool
RE
struct RE {
pub mut:
prog []Token
prog_len int // regex program len
// char classes storage
cc []CharClass // char class list
cc_index int // index
// groups
group_count int // number of groups in this regex struct
groups []int // groups index results
group_max_nested int = 3 // max nested group
group_max int = 8 // max allowed number of different groups
state_list []StateObj
group_csave_flag bool // flag to enable continuous saving
group_csave []int //= []int{} // groups continuous save list
group_map map[string]int // groups names map
group_stack []int
group_data []int
// flags
flag int // flag for optional parameters
// Debug/log
debug int // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
log_func FnLog = simple_log // log function, can be customized by the user
query string // query string
}
compile_opt
fn (mut re RE) compile_opt(pattern string) !
compile_opt compile RE pattern string
find
fn (mut re RE) find(in_txt string) (int, int)
find try to find the first match in the input string
find_all
fn (mut re RE) find_all(in_txt string) []int
find_all find all the non overlapping occurrences of the match pattern and return the start and end index of the match
Usage:
blurb := 'foobar boo steelbar toolbox foot tooooot'
mut re := regex.regex_opt('f|t[eo]+')?
res := re.find_all(blurb) // [0, 3, 12, 15, 20, 23, 28, 31, 33, 39]
find_all_str
fn (mut re RE) find_all_str(in_txt string) []string
find_all_str find all the non overlapping occurrences of the match pattern, return a string list
find_from
fn (mut re RE) find_from(in_txt string, start int) (int, int)
find try to find the first match in the input string strarting from start index
get_code
fn (re &RE) get_code() string
get_code return the compiled code as regex string, note: may be different from the source!
get_group_bounds_by_id
fn (re &RE) get_group_bounds_by_id(group_id int) (int, int)
get_group_by_id get a group boundaries by its id
get_group_bounds_by_name
fn (re &RE) get_group_bounds_by_name(group_name string) (int, int)
get_group_bounds_by_name get a group boundaries by its name
get_group_by_id
fn (re &RE) get_group_by_id(in_txt string, group_id int) string
get_group_by_id get a group string by its id
get_group_by_name
fn (re &RE) get_group_by_name(in_txt string, group_name string) string
get_group_by_name get a group boundaries by its name
get_group_list
fn (re &RE) get_group_list() []Re_group
get_group_list return a list of Re_group for the found groups
get_query
fn (re &RE) get_query() string
get_query return a string with a reconstruction of the query starting from the regex program code
match_base
fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int)
match_string
fn (re &RE) match_string(in_txt string) (int, int)
match_string Match the pattern with the in_txt string
matches_string
fn (re &RE) matches_string(in_txt string) bool
matches_string Checks if the pattern matches the in_txt string
replace
fn (mut re RE) replace(in_txt string, repl_str string) string
replace return a string where the matches are replaced with the repl_str string, this function supports groups in the replace string
replace_by_fn
fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string
replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
replace_n
fn (mut re RE) replace_n(in_txt string, repl_str string, count int) string
replace_n return a string where the first count matches are replaced with the repl_str string, if count is > 0 the replace began from the start of the string toward the end if count is < 0 the replace began from the end of the string toward the start if count is 0 do nothing
replace_simple
fn (mut re RE) replace_simple(in_txt string, repl string) string
replace_simple return a string where the matches are replaced with the replace string
reset
fn (mut re RE) reset()
Reset RE object
split
fn (mut re RE) split(in_txt string) []string
split returns the sections of string around the regex
Usage:
blurb := 'foobar boo steelbar toolbox foot tooooot'
mut re := regex.regex_opt('f|t[eo]+')?
res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
Re_group
struct Re_group {
pub:
start int = -1
end int = -1
}