- Implement level-scoped .gitignore/.heroignore matching - Rewrite directory walker to use new ignore matcher - Replace filemap parser with robust header-based logic - Support `FILE`, `FILECHANGE`, and legacy header formats - Add extensive tests for new parsing and ignore features
119 lines
3.2 KiB
V
119 lines
3.2 KiB
V
module codewalker
|
|
|
|
// A minimal gitignore-like matcher used by CodeWalker
|
|
// Supports:
|
|
// - Directory patterns ending with '/': ignores any path that has this segment prefix
|
|
// - Extension patterns like '*.pyc' or '*.<ext>'
|
|
// - Simple substrings and '*' wildcards
|
|
// - Lines starting with '#' are comments; empty lines ignored
|
|
// No negation support for simplicity
|
|
|
|
const default_gitignore = '__pycache__/\n*.py[cod]\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n.env\n.venv\nvenv/\n.tox/\n.nox/\n.coverage\n.coveragerc\ncoverage.xml\n*.cover\n*.gem\n*.pyc\n.cache\n.pytest_cache/\n.mypy_cache/\n.hypothesis/\n'
|
|
|
|
struct IgnoreRule {
|
|
base string // relative dir from source root where the ignore file lives ('' means global)
|
|
pattern string
|
|
}
|
|
|
|
pub struct IgnoreMatcher {
|
|
pub mut:
|
|
rules []IgnoreRule
|
|
}
|
|
|
|
pub fn gitignore_matcher_new() IgnoreMatcher {
|
|
mut m := IgnoreMatcher{}
|
|
m.add_content(default_gitignore)
|
|
return m
|
|
}
|
|
|
|
// Add raw .gitignore-style content as global (root-scoped) rules
|
|
pub fn (mut m IgnoreMatcher) add_content(content string) {
|
|
m.add_content_with_base('', content)
|
|
}
|
|
|
|
// Add raw .gitignore/.heroignore-style content scoped to base_rel
|
|
pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
|
|
mut base := base_rel.replace('\\', '/').trim('/').to_lower()
|
|
for raw_line in content.split_into_lines() {
|
|
mut line := raw_line.trim_space()
|
|
if line.len == 0 || line.starts_with('#') {
|
|
continue
|
|
}
|
|
m.rules << IgnoreRule{
|
|
base: base
|
|
pattern: line
|
|
}
|
|
}
|
|
}
|
|
|
|
// Very simple glob/substring-based matching with directory scoping
|
|
pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
|
|
mut path := relpath.replace('\\', '/').trim_left('/')
|
|
path_low := path.to_lower()
|
|
for rule in m.rules {
|
|
mut pat := rule.pattern.replace('\\', '/').trim_space()
|
|
if pat == '' {
|
|
continue
|
|
}
|
|
|
|
// Determine subpath relative to base
|
|
mut sub := path_low
|
|
if rule.base != '' {
|
|
base := rule.base
|
|
if sub == base {
|
|
// path equals the base dir; ignore rules apply to entries under base, not the base itself
|
|
continue
|
|
}
|
|
if sub.starts_with(base + '/') {
|
|
sub = sub[(base.len + 1)..]
|
|
} else {
|
|
continue // rule not applicable for this path
|
|
}
|
|
}
|
|
|
|
// Directory pattern (relative to base)
|
|
if pat.ends_with('/') {
|
|
mut dirpat := pat.trim_right('/')
|
|
dirpat = dirpat.trim_left('/').to_lower()
|
|
if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
// Extension pattern *.ext
|
|
if pat.starts_with('*.') {
|
|
ext := pat.all_after_last('.').to_lower()
|
|
if sub.ends_with('.' + ext) {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
// Simple wildcard * anywhere -> sequential contains match
|
|
if pat.contains('*') {
|
|
mut parts := pat.to_lower().split('*')
|
|
mut idx := 0
|
|
mut ok := true
|
|
for part in parts {
|
|
if part == '' {
|
|
continue
|
|
}
|
|
pos := sub.index_after(part, idx) or { -1 }
|
|
if pos == -1 {
|
|
ok = false
|
|
break
|
|
}
|
|
idx = pos + part.len
|
|
}
|
|
if ok {
|
|
return true
|
|
}
|
|
continue
|
|
}
|
|
// Fallback: substring match (case-insensitive) on subpath
|
|
if sub.contains(pat.to_lower()) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|