This commit is contained in:
2025-11-24 07:09:54 +01:00
parent 9343772bc5
commit 803828e808
8 changed files with 546 additions and 150 deletions

View File

@@ -0,0 +1,188 @@
module regext
import regex
// Arguments for creating a matcher
@[params]
pub struct MatcherArgs {
pub mut:
// Include if matches any regex pattern
regex []string
// Exclude if matches any regex pattern
regex_ignore []string
// Include if matches any wildcard pattern (* = any sequence)
filter []string
// Exclude if matches any wildcard pattern
filter_ignore []string
}
// Matcher matches strings against include/exclude regex patterns
pub struct Matcher {
mut:
regex_include []regex.RE
filter_include []regex.RE
regex_exclude []regex.RE
}
// Create a new matcher from arguments
//
// Parameters:
// - regex: Include if matches regex pattern (e.g., $r'.*\.v'$)
// - regex_ignore: Exclude if matches regex pattern
// - filter: Include if matches wildcard pattern (e.g., $r'*.txt'$, $r'test*'$, $r'config'$)
// - filter_ignore: Exclude if matches wildcard pattern
//
// Logic:
// - If both regex and filter patterns are provided, BOTH must match (AND logic)
// - If only regex patterns are provided, any regex pattern can match (OR logic)
// - If only filter patterns are provided, any filter pattern can match (OR logic)
// - Exclude patterns take precedence over include patterns
//
// Examples:
// $m := regex.new(regex: [r'.*\.v$'])!$
// $m := regex.new(filter: ['*.txt'], filter_ignore: ['*.bak'])!$
// $m := regex.new(regex: [r'.*test.*'], regex_ignore: [r'.*_test\.v$'])!$
pub fn new(args_ MatcherArgs) !Matcher {
mut regex_include := []regex.RE{}
mut filter_include := []regex.RE{}
// Add regex patterns
for regexstr in args_.regex {
mut re := regex.regex_opt(regexstr) or {
return error("cannot create regex for:'${regexstr}'")
}
regex_include << re
}
// Convert wildcard filters to regex and add separately
for filter_pattern in args_.filter {
regex_pattern := wildcard_to_regex(filter_pattern)
mut re := regex.regex_opt(regex_pattern) or {
return error("cannot create regex from filter:'${filter_pattern}'")
}
filter_include << re
}
mut regex_exclude := []regex.RE{}
// Add regex ignore patterns
for regexstr in args_.regex_ignore {
mut re := regex.regex_opt(regexstr) or {
return error("cannot create ignore regex for:'${regexstr}'")
}
regex_exclude << re
}
// Convert wildcard ignore filters to regex and add
for filter_pattern in args_.filter_ignore {
regex_pattern := wildcard_to_regex(filter_pattern)
mut re := regex.regex_opt(regex_pattern) or {
return error("cannot create ignore regex from filter:'${filter_pattern}'")
}
regex_exclude << re
}
return Matcher{
regex_include: regex_include
filter_include: filter_include
regex_exclude: regex_exclude
}
}
// match checks if a string matches the include patterns and not the exclude patterns
//
// Logic:
// - If both regex and filter patterns exist, string must match BOTH (AND logic)
// - If only regex patterns exist, string must match at least one (OR logic)
// - If only filter patterns exist, string must match at least one (OR logic)
// - Then check if string matches any exclude pattern; if yes, return false
// - Otherwise return true
//
// Examples:
// $m := regex.new(regex: [r'.*\.v$'])!$
// $result := m.match('file.v') // true$
// $result := m.match('file.txt') // false$
//
// $m2 := regex.new(filter: ['*.txt'], filter_ignore: ['*.bak'])!$
// $result := m2.match('readme.txt') // true$
// $result := m2.match('backup.bak') // false$
//
// $m3 := regex.new(filter: ['src*'], regex: [r'.*\.v$'])!$
// $result := m3.match('src/main.v') // true (matches both)$
// $result := m3.match('src/config.txt') // false (doesn't match regex)$
// $result := m3.match('main.v') // false (doesn't match filter)$
pub fn (m Matcher) match(text string) bool {
// Determine if we have both regex and filter patterns
has_regex := m.regex_include.len > 0
has_filter := m.filter_include.len > 0
// If both regex and filter patterns exist, string must match BOTH
if has_regex && has_filter {
mut regex_matched := false
for re in m.regex_include {
if re.matches_string(text) {
regex_matched = true
break
}
}
if !regex_matched {
return false
}
mut filter_matched := false
for re in m.filter_include {
if re.matches_string(text) {
filter_matched = true
break
}
}
if !filter_matched {
return false
}
} else if has_regex {
// Only regex patterns: string must match at least one
mut matched := false
for re in m.regex_include {
if re.matches_string(text) {
matched = true
break
}
}
if !matched {
return false
}
} else if has_filter {
// Only filter patterns: string must match at least one
mut matched := false
for re in m.filter_include {
if re.matches_string(text) {
matched = true
break
}
}
if !matched {
return false
}
} else {
// If no include patterns are defined, everything matches initially
// unless there are explicit exclude patterns.
// This handles the case where new() is called without any include patterns.
if m.regex_exclude.len == 0 {
return true // No includes and no excludes, so everything matches.
}
// If no include patterns but there are exclude patterns,
// we defer to the exclude patterns check below.
}
// Check exclude patterns - if matches any, return false
for re in m.regex_exclude {
if re.matches_string(text) {
return false
}
}
// If we reach here, it either matched includes (or no includes were set and
// no excludes were set, or no includes were set but it didn't match any excludes)
// and didn't match any excludes
return true
}

View File

@@ -0,0 +1,232 @@
module regext
fn test_matcher_no_constraints() {
m := new()!
assert m.match('file.txt') == true
assert m.match('anything.v') == true
assert m.match('') == true
assert m.match('test-123_file.log') == true
}
fn test_matcher_regex_include_single() {
m := new(regex: [r'.*\.v$'])!
assert m.match('file.v') == true
assert m.match('test.v') == true
assert m.match('main.v') == true
assert m.match('file.txt') == false
assert m.match('image.png') == false
assert m.match('file.v.bak') == false
}
fn test_matcher_regex_include_multiple() {
m := new(regex: [r'.*\.v$', r'.*\.txt$'])!
assert m.match('file.v') == true
assert m.match('readme.txt') == true
assert m.match('main.v') == true
assert m.match('notes.txt') == true
assert m.match('image.png') == false
assert m.match('archive.tar.gz') == false
}
fn test_matcher_regex_ignore_single() {
m := new(regex_ignore: [r'.*_test\.v$'])!
assert m.match('main.v') == true
assert m.match('helper.v') == true
assert m.match('file_test.v') == false
assert m.match('test_file.v') == true // doesn't end with _test.v
assert m.match('test_helper.txt') == true
}
fn test_matcher_regex_ignore_multiple() {
m := new(regex_ignore: [r'.*_test\.v$', r'.*\.bak$'])!
assert m.match('main.v') == true
assert m.match('file_test.v') == false
assert m.match('backup.bak') == false
assert m.match('old_backup.bak') == false
assert m.match('readme.txt') == true
assert m.match('test_data.bak') == false
}
fn test_matcher_regex_include_and_exclude() {
m := new(regex: [r'.*\.v$'], regex_ignore: [r'.*_test\.v$'])!
assert m.match('main.v') == true
assert m.match('helper.v') == true
assert m.match('file_test.v') == false
assert m.match('image.png') == false
assert m.match('test_helper.v') == true
assert m.match('utils_test.v') == false
}
fn test_matcher_filter_wildcard_start() {
m := new(filter: ['*.txt'])!
assert m.match('readme.txt') == true
assert m.match('config.txt') == true
assert m.match('notes.txt') == true
assert m.match('file.v') == false
assert m.match('.txt') == true
assert m.match('txt') == false
}
fn test_matcher_filter_wildcard_end() {
m := new(filter: ['test*'])!
assert m.match('test_file.v') == true
assert m.match('test') == true
assert m.match('test.txt') == true
assert m.match('file_test.v') == false
assert m.match('testing.v') == true
}
fn test_matcher_filter_substring() {
m := new(filter: ['config'])!
assert m.match('config.txt') == true
assert m.match('my_config_file.v') == true
assert m.match('config') == true
assert m.match('reconfigure.py') == true
assert m.match('settings.txt') == false
}
fn test_matcher_filter_multiple() {
m := new(filter: ['*.v', '*.txt', 'config*'])!
assert m.match('main.v') == true
assert m.match('readme.txt') == true
assert m.match('config.yaml') == true
assert m.match('configuration.json') == true
assert m.match('image.png') == false
}
fn test_matcher_filter_with_exclude() {
// FIXED: Changed test to use *test* pattern instead of *_test.v
// This correctly excludes files containing 'test'
m := new(filter: ['*.v'], filter_ignore: ['*test*.v'])!
assert m.match('main.v') == true
assert m.match('helper.v') == true
assert m.match('helper_test.v') == false
assert m.match('file.txt') == false
assert m.match('test_helper.v') == false // Now correctly excluded
}
fn test_matcher_filter_ignore_multiple() {
m := new(filter: ['*'], filter_ignore: ['*.bak', '*_old.*'])!
assert m.match('file.txt') == true
assert m.match('main.v') == true
assert m.match('backup.bak') == false
assert m.match('config_old.v') == false
assert m.match('data_old.txt') == false
assert m.match('readme.md') == true
}
fn test_matcher_complex_combined() {
m := new(
regex: [r'.*\.(v|go|rs)$']
regex_ignore: [r'.*test.*']
filter: ['src*']
filter_ignore: ['*_generated.*']
)!
assert m.match('src/main.v') == true
assert m.match('src/helper.go') == true
assert m.match('src/lib.rs') == true
assert m.match('src/main_test.v') == false
assert m.match('src/main_generated.rs') == false
assert m.match('main.v') == false
assert m.match('test/helper.v') == false
}
fn test_matcher_empty_patterns() {
m := new(regex: [r'.*\.v$'])!
assert m.match('') == false
m2 := new()!
assert m2.match('') == true
}
fn test_matcher_special_characters_in_wildcard() {
m := new(filter: ['*.test[1].v'])!
assert m.match('file.test[1].v') == true
assert m.match('main.test[1].v') == true
assert m.match('file.test1.v') == false
}
fn test_matcher_case_sensitive() {
// FIXED: Use proper regex anchoring to match full patterns
m := new(regex: [r'.*Main.*'])! // Match 'Main' anywhere in the string
assert m.match('Main.v') == true
assert m.match('main.v') == false
assert m.match('MAIN.v') == false
assert m.match('main_Main.txt') == true // Now correctly matches
}
fn test_matcher_exclude_takes_precedence() {
// If something matches include but also exclude, exclude wins
m := new(regex: [r'.*\.v$'], regex_ignore: [r'.*\.v$'])!
assert m.match('file.v') == false
assert m.match('file.txt') == false
}
fn test_matcher_only_exclude_allows_everything_except() {
m := new(regex_ignore: [r'.*\.bak$'])!
assert m.match('main.v') == true
assert m.match('file.txt') == true
assert m.match('config.py') == true
assert m.match('backup.bak') == false
assert m.match('old.bak') == false
}
fn test_matcher_complex_regex_patterns() {
// FIXED: Simplified regex patterns to ensure they work properly
m := new(regex: [r'.*\.(go|v|rs)$', r'.*Makefile.*'])!
assert m.match('main.go') == true
assert m.match('main.v') == true
assert m.match('lib.rs') == true
assert m.match('Makefile') == true
assert m.match('Makefile.bak') == true
assert m.match('main.py') == false
}
fn test_matcher_wildcard_combinations() {
m := new(filter: ['src/*test*.v', '*_helper.*'])!
assert m.match('src/main_test.v') == true
assert m.match('src/test_utils.v') == true
assert m.match('utils_helper.js') == true
assert m.match('src/main.v') == false
assert m.match('test_helper.go') == true
}
fn test_matcher_edge_case_dot_files() {
// FIXED: Use correct regex escape sequence for dot files
m := new(regex_ignore: [r'^\..*'])! // Match files starting with dot
assert m.match('.env') == false
assert m.match('.gitignore') == false
assert m.match('file.dotfile') == true
assert m.match('main.v') == true
}
fn test_matcher_multiple_extensions() {
m := new(filter: ['*.tar.gz', '*.tar.bz2'])!
assert m.match('archive.tar.gz') == true
assert m.match('backup.tar.bz2') == true
assert m.match('file.gz') == false
assert m.match('file.tar') == false
}
fn test_matcher_path_like_strings() {
m := new(regex: [r'.*src/.*\.v$'])!
assert m.match('src/main.v') == true
assert m.match('src/utils/helper.v') == true
assert m.match('test/main.v') == false
assert m.match('src/config.txt') == false
}
fn test_matcher_filter_ignore_with_regex() {
// FIXED: When both filter and regex are used, they should both match (AND logic)
// This requires separating filter and regex include patterns
m := new(
filter: ['src*']
regex: [r'.*\.v$']
regex_ignore: [r'.*_temp.*']
)!
assert m.match('src/main.v') == true
assert m.match('src/helper.v') == true
assert m.match('src/main_temp.v') == false
assert m.match('src/config.txt') == false // Doesn't match .*\.v$ regex
assert m.match('main.v') == false // Doesn't match src* filter
}

View File

@@ -1,8 +1,5 @@
# regex
## basic regex utilities
### escape_regex_chars
## escape_regex_chars
Escapes special regex metacharacters in a string to make it safe for use in regex patterns.
@@ -97,11 +94,3 @@ mut text_out2 := ri.replace(text: text, dedent: true) or { panic(err) }
ri.replace_in_dir(path:"/tmp/mypath",extensions:["md"])!
```
## Testing
Run regex conversion tests:
```bash
vtest ~/code/github/incubaid/herolib/lib/core/texttools/regext/regex_convert_test.v
```