...
This commit is contained in:
188
lib/core/texttools/regext/matcher.v
Normal file
188
lib/core/texttools/regext/matcher.v
Normal file
@@ -0,0 +1,188 @@
|
||||
module regext
|
||||
|
||||
import regex
|
||||
|
||||
// Arguments for creating a matcher
|
||||
@[params]
|
||||
pub struct MatcherArgs {
|
||||
pub mut:
|
||||
// Include if matches any regex pattern
|
||||
regex []string
|
||||
// Exclude if matches any regex pattern
|
||||
regex_ignore []string
|
||||
// Include if matches any wildcard pattern (* = any sequence)
|
||||
filter []string
|
||||
// Exclude if matches any wildcard pattern
|
||||
filter_ignore []string
|
||||
}
|
||||
|
||||
// Matcher matches strings against include/exclude regex patterns
|
||||
pub struct Matcher {
|
||||
mut:
|
||||
regex_include []regex.RE
|
||||
filter_include []regex.RE
|
||||
regex_exclude []regex.RE
|
||||
}
|
||||
|
||||
// Create a new matcher from arguments
|
||||
//
|
||||
// Parameters:
|
||||
// - regex: Include if matches regex pattern (e.g., $r'.*\.v'$)
|
||||
// - regex_ignore: Exclude if matches regex pattern
|
||||
// - filter: Include if matches wildcard pattern (e.g., $r'*.txt'$, $r'test*'$, $r'config'$)
|
||||
// - filter_ignore: Exclude if matches wildcard pattern
|
||||
//
|
||||
// Logic:
|
||||
// - If both regex and filter patterns are provided, BOTH must match (AND logic)
|
||||
// - If only regex patterns are provided, any regex pattern can match (OR logic)
|
||||
// - If only filter patterns are provided, any filter pattern can match (OR logic)
|
||||
// - Exclude patterns take precedence over include patterns
|
||||
//
|
||||
// Examples:
|
||||
// $m := regex.new(regex: [r'.*\.v$'])!$
|
||||
// $m := regex.new(filter: ['*.txt'], filter_ignore: ['*.bak'])!$
|
||||
// $m := regex.new(regex: [r'.*test.*'], regex_ignore: [r'.*_test\.v$'])!$
|
||||
pub fn new(args_ MatcherArgs) !Matcher {
|
||||
mut regex_include := []regex.RE{}
|
||||
mut filter_include := []regex.RE{}
|
||||
|
||||
// Add regex patterns
|
||||
for regexstr in args_.regex {
|
||||
mut re := regex.regex_opt(regexstr) or {
|
||||
return error("cannot create regex for:'${regexstr}'")
|
||||
}
|
||||
regex_include << re
|
||||
}
|
||||
|
||||
// Convert wildcard filters to regex and add separately
|
||||
for filter_pattern in args_.filter {
|
||||
regex_pattern := wildcard_to_regex(filter_pattern)
|
||||
mut re := regex.regex_opt(regex_pattern) or {
|
||||
return error("cannot create regex from filter:'${filter_pattern}'")
|
||||
}
|
||||
filter_include << re
|
||||
}
|
||||
|
||||
mut regex_exclude := []regex.RE{}
|
||||
|
||||
// Add regex ignore patterns
|
||||
for regexstr in args_.regex_ignore {
|
||||
mut re := regex.regex_opt(regexstr) or {
|
||||
return error("cannot create ignore regex for:'${regexstr}'")
|
||||
}
|
||||
regex_exclude << re
|
||||
}
|
||||
|
||||
// Convert wildcard ignore filters to regex and add
|
||||
for filter_pattern in args_.filter_ignore {
|
||||
regex_pattern := wildcard_to_regex(filter_pattern)
|
||||
mut re := regex.regex_opt(regex_pattern) or {
|
||||
return error("cannot create ignore regex from filter:'${filter_pattern}'")
|
||||
}
|
||||
regex_exclude << re
|
||||
}
|
||||
|
||||
return Matcher{
|
||||
regex_include: regex_include
|
||||
filter_include: filter_include
|
||||
regex_exclude: regex_exclude
|
||||
}
|
||||
}
|
||||
|
||||
// match checks if a string matches the include patterns and not the exclude patterns
|
||||
//
|
||||
// Logic:
|
||||
// - If both regex and filter patterns exist, string must match BOTH (AND logic)
|
||||
// - If only regex patterns exist, string must match at least one (OR logic)
|
||||
// - If only filter patterns exist, string must match at least one (OR logic)
|
||||
// - Then check if string matches any exclude pattern; if yes, return false
|
||||
// - Otherwise return true
|
||||
//
|
||||
// Examples:
|
||||
// $m := regex.new(regex: [r'.*\.v$'])!$
|
||||
// $result := m.match('file.v') // true$
|
||||
// $result := m.match('file.txt') // false$
|
||||
//
|
||||
// $m2 := regex.new(filter: ['*.txt'], filter_ignore: ['*.bak'])!$
|
||||
// $result := m2.match('readme.txt') // true$
|
||||
// $result := m2.match('backup.bak') // false$
|
||||
//
|
||||
// $m3 := regex.new(filter: ['src*'], regex: [r'.*\.v$'])!$
|
||||
// $result := m3.match('src/main.v') // true (matches both)$
|
||||
// $result := m3.match('src/config.txt') // false (doesn't match regex)$
|
||||
// $result := m3.match('main.v') // false (doesn't match filter)$
|
||||
pub fn (m Matcher) match(text string) bool {
|
||||
// Determine if we have both regex and filter patterns
|
||||
has_regex := m.regex_include.len > 0
|
||||
has_filter := m.filter_include.len > 0
|
||||
|
||||
// If both regex and filter patterns exist, string must match BOTH
|
||||
if has_regex && has_filter {
|
||||
mut regex_matched := false
|
||||
for re in m.regex_include {
|
||||
if re.matches_string(text) {
|
||||
regex_matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !regex_matched {
|
||||
return false
|
||||
}
|
||||
|
||||
mut filter_matched := false
|
||||
for re in m.filter_include {
|
||||
if re.matches_string(text) {
|
||||
filter_matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !filter_matched {
|
||||
return false
|
||||
}
|
||||
} else if has_regex {
|
||||
// Only regex patterns: string must match at least one
|
||||
mut matched := false
|
||||
for re in m.regex_include {
|
||||
if re.matches_string(text) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return false
|
||||
}
|
||||
} else if has_filter {
|
||||
// Only filter patterns: string must match at least one
|
||||
mut matched := false
|
||||
for re in m.filter_include {
|
||||
if re.matches_string(text) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
// If no include patterns are defined, everything matches initially
|
||||
// unless there are explicit exclude patterns.
|
||||
// This handles the case where new() is called without any include patterns.
|
||||
if m.regex_exclude.len == 0 {
|
||||
return true // No includes and no excludes, so everything matches.
|
||||
}
|
||||
// If no include patterns but there are exclude patterns,
|
||||
// we defer to the exclude patterns check below.
|
||||
}
|
||||
|
||||
// Check exclude patterns - if matches any, return false
|
||||
for re in m.regex_exclude {
|
||||
if re.matches_string(text) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, it either matched includes (or no includes were set and
|
||||
// no excludes were set, or no includes were set but it didn't match any excludes)
|
||||
// and didn't match any excludes
|
||||
return true
|
||||
}
|
||||
232
lib/core/texttools/regext/matcher_test.v
Normal file
232
lib/core/texttools/regext/matcher_test.v
Normal file
@@ -0,0 +1,232 @@
|
||||
module regext
|
||||
|
||||
fn test_matcher_no_constraints() {
|
||||
m := new()!
|
||||
assert m.match('file.txt') == true
|
||||
assert m.match('anything.v') == true
|
||||
assert m.match('') == true
|
||||
assert m.match('test-123_file.log') == true
|
||||
}
|
||||
|
||||
fn test_matcher_regex_include_single() {
|
||||
m := new(regex: [r'.*\.v$'])!
|
||||
assert m.match('file.v') == true
|
||||
assert m.match('test.v') == true
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('file.txt') == false
|
||||
assert m.match('image.png') == false
|
||||
assert m.match('file.v.bak') == false
|
||||
}
|
||||
|
||||
fn test_matcher_regex_include_multiple() {
|
||||
m := new(regex: [r'.*\.v$', r'.*\.txt$'])!
|
||||
assert m.match('file.v') == true
|
||||
assert m.match('readme.txt') == true
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('notes.txt') == true
|
||||
assert m.match('image.png') == false
|
||||
assert m.match('archive.tar.gz') == false
|
||||
}
|
||||
|
||||
fn test_matcher_regex_ignore_single() {
|
||||
m := new(regex_ignore: [r'.*_test\.v$'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('helper.v') == true
|
||||
assert m.match('file_test.v') == false
|
||||
assert m.match('test_file.v') == true // doesn't end with _test.v
|
||||
assert m.match('test_helper.txt') == true
|
||||
}
|
||||
|
||||
fn test_matcher_regex_ignore_multiple() {
|
||||
m := new(regex_ignore: [r'.*_test\.v$', r'.*\.bak$'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('file_test.v') == false
|
||||
assert m.match('backup.bak') == false
|
||||
assert m.match('old_backup.bak') == false
|
||||
assert m.match('readme.txt') == true
|
||||
assert m.match('test_data.bak') == false
|
||||
}
|
||||
|
||||
fn test_matcher_regex_include_and_exclude() {
|
||||
m := new(regex: [r'.*\.v$'], regex_ignore: [r'.*_test\.v$'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('helper.v') == true
|
||||
assert m.match('file_test.v') == false
|
||||
assert m.match('image.png') == false
|
||||
assert m.match('test_helper.v') == true
|
||||
assert m.match('utils_test.v') == false
|
||||
}
|
||||
|
||||
fn test_matcher_filter_wildcard_start() {
|
||||
m := new(filter: ['*.txt'])!
|
||||
assert m.match('readme.txt') == true
|
||||
assert m.match('config.txt') == true
|
||||
assert m.match('notes.txt') == true
|
||||
assert m.match('file.v') == false
|
||||
assert m.match('.txt') == true
|
||||
assert m.match('txt') == false
|
||||
}
|
||||
|
||||
fn test_matcher_filter_wildcard_end() {
|
||||
m := new(filter: ['test*'])!
|
||||
assert m.match('test_file.v') == true
|
||||
assert m.match('test') == true
|
||||
assert m.match('test.txt') == true
|
||||
assert m.match('file_test.v') == false
|
||||
assert m.match('testing.v') == true
|
||||
}
|
||||
|
||||
fn test_matcher_filter_substring() {
|
||||
m := new(filter: ['config'])!
|
||||
assert m.match('config.txt') == true
|
||||
assert m.match('my_config_file.v') == true
|
||||
assert m.match('config') == true
|
||||
assert m.match('reconfigure.py') == true
|
||||
assert m.match('settings.txt') == false
|
||||
}
|
||||
|
||||
fn test_matcher_filter_multiple() {
|
||||
m := new(filter: ['*.v', '*.txt', 'config*'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('readme.txt') == true
|
||||
assert m.match('config.yaml') == true
|
||||
assert m.match('configuration.json') == true
|
||||
assert m.match('image.png') == false
|
||||
}
|
||||
|
||||
fn test_matcher_filter_with_exclude() {
|
||||
// FIXED: Changed test to use *test* pattern instead of *_test.v
|
||||
// This correctly excludes files containing 'test'
|
||||
m := new(filter: ['*.v'], filter_ignore: ['*test*.v'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('helper.v') == true
|
||||
assert m.match('helper_test.v') == false
|
||||
assert m.match('file.txt') == false
|
||||
assert m.match('test_helper.v') == false // Now correctly excluded
|
||||
}
|
||||
|
||||
fn test_matcher_filter_ignore_multiple() {
|
||||
m := new(filter: ['*'], filter_ignore: ['*.bak', '*_old.*'])!
|
||||
assert m.match('file.txt') == true
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('backup.bak') == false
|
||||
assert m.match('config_old.v') == false
|
||||
assert m.match('data_old.txt') == false
|
||||
assert m.match('readme.md') == true
|
||||
}
|
||||
|
||||
fn test_matcher_complex_combined() {
|
||||
m := new(
|
||||
regex: [r'.*\.(v|go|rs)$']
|
||||
regex_ignore: [r'.*test.*']
|
||||
filter: ['src*']
|
||||
filter_ignore: ['*_generated.*']
|
||||
)!
|
||||
assert m.match('src/main.v') == true
|
||||
assert m.match('src/helper.go') == true
|
||||
assert m.match('src/lib.rs') == true
|
||||
assert m.match('src/main_test.v') == false
|
||||
assert m.match('src/main_generated.rs') == false
|
||||
assert m.match('main.v') == false
|
||||
assert m.match('test/helper.v') == false
|
||||
}
|
||||
|
||||
fn test_matcher_empty_patterns() {
|
||||
m := new(regex: [r'.*\.v$'])!
|
||||
assert m.match('') == false
|
||||
|
||||
m2 := new()!
|
||||
assert m2.match('') == true
|
||||
}
|
||||
|
||||
fn test_matcher_special_characters_in_wildcard() {
|
||||
m := new(filter: ['*.test[1].v'])!
|
||||
assert m.match('file.test[1].v') == true
|
||||
assert m.match('main.test[1].v') == true
|
||||
assert m.match('file.test1.v') == false
|
||||
}
|
||||
|
||||
fn test_matcher_case_sensitive() {
|
||||
// FIXED: Use proper regex anchoring to match full patterns
|
||||
m := new(regex: [r'.*Main.*'])! // Match 'Main' anywhere in the string
|
||||
assert m.match('Main.v') == true
|
||||
assert m.match('main.v') == false
|
||||
assert m.match('MAIN.v') == false
|
||||
assert m.match('main_Main.txt') == true // Now correctly matches
|
||||
}
|
||||
|
||||
fn test_matcher_exclude_takes_precedence() {
|
||||
// If something matches include but also exclude, exclude wins
|
||||
m := new(regex: [r'.*\.v$'], regex_ignore: [r'.*\.v$'])!
|
||||
assert m.match('file.v') == false
|
||||
assert m.match('file.txt') == false
|
||||
}
|
||||
|
||||
fn test_matcher_only_exclude_allows_everything_except() {
|
||||
m := new(regex_ignore: [r'.*\.bak$'])!
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('file.txt') == true
|
||||
assert m.match('config.py') == true
|
||||
assert m.match('backup.bak') == false
|
||||
assert m.match('old.bak') == false
|
||||
}
|
||||
|
||||
fn test_matcher_complex_regex_patterns() {
|
||||
// FIXED: Simplified regex patterns to ensure they work properly
|
||||
m := new(regex: [r'.*\.(go|v|rs)$', r'.*Makefile.*'])!
|
||||
assert m.match('main.go') == true
|
||||
assert m.match('main.v') == true
|
||||
assert m.match('lib.rs') == true
|
||||
assert m.match('Makefile') == true
|
||||
assert m.match('Makefile.bak') == true
|
||||
assert m.match('main.py') == false
|
||||
}
|
||||
|
||||
fn test_matcher_wildcard_combinations() {
|
||||
m := new(filter: ['src/*test*.v', '*_helper.*'])!
|
||||
assert m.match('src/main_test.v') == true
|
||||
assert m.match('src/test_utils.v') == true
|
||||
assert m.match('utils_helper.js') == true
|
||||
assert m.match('src/main.v') == false
|
||||
assert m.match('test_helper.go') == true
|
||||
}
|
||||
|
||||
fn test_matcher_edge_case_dot_files() {
|
||||
// FIXED: Use correct regex escape sequence for dot files
|
||||
m := new(regex_ignore: [r'^\..*'])! // Match files starting with dot
|
||||
assert m.match('.env') == false
|
||||
assert m.match('.gitignore') == false
|
||||
assert m.match('file.dotfile') == true
|
||||
assert m.match('main.v') == true
|
||||
}
|
||||
|
||||
fn test_matcher_multiple_extensions() {
|
||||
m := new(filter: ['*.tar.gz', '*.tar.bz2'])!
|
||||
assert m.match('archive.tar.gz') == true
|
||||
assert m.match('backup.tar.bz2') == true
|
||||
assert m.match('file.gz') == false
|
||||
assert m.match('file.tar') == false
|
||||
}
|
||||
|
||||
fn test_matcher_path_like_strings() {
|
||||
m := new(regex: [r'.*src/.*\.v$'])!
|
||||
assert m.match('src/main.v') == true
|
||||
assert m.match('src/utils/helper.v') == true
|
||||
assert m.match('test/main.v') == false
|
||||
assert m.match('src/config.txt') == false
|
||||
}
|
||||
|
||||
fn test_matcher_filter_ignore_with_regex() {
|
||||
// FIXED: When both filter and regex are used, they should both match (AND logic)
|
||||
// This requires separating filter and regex include patterns
|
||||
m := new(
|
||||
filter: ['src*']
|
||||
regex: [r'.*\.v$']
|
||||
regex_ignore: [r'.*_temp.*']
|
||||
)!
|
||||
assert m.match('src/main.v') == true
|
||||
assert m.match('src/helper.v') == true
|
||||
assert m.match('src/main_temp.v') == false
|
||||
assert m.match('src/config.txt') == false // Doesn't match .*\.v$ regex
|
||||
assert m.match('main.v') == false // Doesn't match src* filter
|
||||
}
|
||||
@@ -1,8 +1,5 @@
|
||||
# regex
|
||||
|
||||
## basic regex utilities
|
||||
|
||||
### escape_regex_chars
|
||||
## escape_regex_chars
|
||||
|
||||
Escapes special regex metacharacters in a string to make it safe for use in regex patterns.
|
||||
|
||||
@@ -97,11 +94,3 @@ mut text_out2 := ri.replace(text: text, dedent: true) or { panic(err) }
|
||||
ri.replace_in_dir(path:"/tmp/mypath",extensions:["md"])!
|
||||
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Run regex conversion tests:
|
||||
|
||||
```bash
|
||||
vtest ~/code/github/incubaid/herolib/lib/core/texttools/regext/regex_convert_test.v
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user