diff --git a/lib/ai/codewalker/factory.v b/lib/ai/codewalker/factory.v index 809a0957..84f27bdf 100644 --- a/lib/ai/codewalker/factory.v +++ b/lib/ai/codewalker/factory.v @@ -1,14 +1,24 @@ module codewalker -// new creates a CodeWalker instance with default ignore patterns -pub fn new() CodeWalker { - return CodeWalker{ - scoped_ignore: ScopedIgnore{} - } +@[params] +pub struct FileMapArgs { +pub mut: + path string + content string + content_read bool = true // If false, file content not read from disk + // Include if matches any wildcard pattern (* = any sequence) + filter []string + // Exclude if matches any wildcard pattern + filter_ignore []string } -// filemap creates FileMap from path or content (convenience function) +// filemap_get creates FileMap from path or content string pub fn filemap(args FileMapArgs) !FileMap { - mut cw := new() - return cw.filemap_get(args) + if args.path != '' { + return filemap_get_from_path(args.path, args.content_read)! + } else if args.content != '' { + return filemap_get_from_content(args.content)! + } else { + return error('Either path or content must be provided') + } } diff --git a/lib/ai/codewalker/codewalker_test.v b/lib/ai/codewalker/filemap_test.v similarity index 100% rename from lib/ai/codewalker/codewalker_test.v rename to lib/ai/codewalker/filemap_test.v diff --git a/lib/ai/codewalker/ignore.v b/lib/ai/codewalker/ignore.v index 0ec477f2..af45da7f 100644 --- a/lib/ai/codewalker/ignore.v +++ b/lib/ai/codewalker/ignore.v @@ -1,5 +1,7 @@ module codewalker +import arrays + // Default ignore patterns based on .gitignore conventions const default_gitignore = ' .git/ @@ -45,3 +47,11 @@ Thumbs.db *.temp *.log ' + +pub fn find_ignore_patterns() []string { + mut patterns := default_gitignore.split_into_lines() + patterns.sort() + patterns = arrays.uniq(patterns) + + return patterns +} diff --git a/lib/ai/codewalker/model.v b/lib/ai/codewalker/model.v index d7ad0f24..7cf77e08 100644 --- a/lib/ai/codewalker/model.v +++ b/lib/ai/codewalker/model.v @@ -14,61 +14,3 @@ pub: category string filename string } - -// ScopedIgnore handles directory-scoped .gitignore/.heroignore patterns -pub struct ScopedIgnore { -pub mut: - // Map of directory -> list of patterns - // Empty string key for root level patterns - patterns map[string][]string -} - -// Add patterns for a specific directory scope -pub fn (mut si ScopedIgnore) add_for_scope(scope string, patterns_text string) { - mut scope_key := scope - if scope == '' { - scope_key = '/' - } - - if scope_key !in si.patterns { - si.patterns[scope_key] = []string{} - } - - for line in patterns_text.split_into_lines() { - line_trimmed := line.trim_space() - if line_trimmed != '' && !line_trimmed.starts_with('#') { - si.patterns[scope_key] << gitignore_pattern_to_regex(line_trimmed) - } - } -} - -// Check if a relative path should be ignored -pub fn (si ScopedIgnore) is_ignored(relpath string) bool { - // Check all scopes that could apply to this path - path_parts := relpath.split('/') - - // Check root level patterns - if '/' in si.patterns { - for pattern in si.patterns['/'] { - if relpath.match_regex(pattern) { // Use match_regex here - return true - } - } - } - - // Check directory-scoped patterns - for i := 0; i < path_parts.len; i++ { - scope := path_parts[..i].join('/') - if scope != '' && scope in si.patterns { - // Check if remaining path matches patterns in this scope - remaining := path_parts[i..].join('/') - for pattern in si.patterns[scope] { - if remaining.match_regex(pattern) { - return true - } - } - } - } - - return false -} diff --git a/lib/ai/codewalker/codewalker.v b/lib/ai/codewalker/tools.v similarity index 79% rename from lib/ai/codewalker/codewalker.v rename to lib/ai/codewalker/tools.v index 5b899380..79799b89 100644 --- a/lib/ai/codewalker/codewalker.v +++ b/lib/ai/codewalker/tools.v @@ -2,38 +2,8 @@ module codewalker import incubaid.herolib.core.pathlib -// CodeWalker walks directories and parses file content -pub struct CodeWalker { -pub mut: - scoped_ignore ScopedIgnore -} - -@[params] -pub struct FileMapArgs { -pub mut: - path string - content string - content_read bool = true // If false, file content not read from disk -} - -// parse extracts FileMap from formatted content string -pub fn (mut cw CodeWalker) parse(content string) !FileMap { - return cw.filemap_get_from_content(content) -} - -// filemap_get creates FileMap from path or content string -pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap { - if args.path != '' { - return cw.filemap_get_from_path(args.path, args.content_read)! - } else if args.content != '' { - return cw.filemap_get_from_content(args.content)! - } else { - return error('Either path or content must be provided') - } -} - // filemap_get_from_path reads directory and creates FileMap, respecting ignore patterns -fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap { +fn filemap_get_from_path(path string, content_read bool) !FileMap { mut dir := pathlib.get(path) if !dir.exists() || !dir.is_dir() { return error('Directory "${path}" does not exist') @@ -43,17 +13,7 @@ fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !Fi source: path } - // Load .gitignore and .heroignore files first to build scoped ignores - cw.scoped_ignore = ScopedIgnore{} - cw.load_ignore_files(path)! - - // Combine default patterns with custom ignore patterns - mut ignore_patterns := get_default_ignore_patterns() - - // Add any root-level custom patterns - if '/' in cw.scoped_ignore.patterns { - ignore_patterns << cw.scoped_ignore.patterns['/'] - } + // List all files using pathlib with both default and custom ignore patterns mut file_list := dir.list( diff --git a/lib/core/pathlib/path_tools.v b/lib/core/pathlib/path_tools.v index b3bf9cd7..23e7942d 100644 --- a/lib/core/pathlib/path_tools.v +++ b/lib/core/pathlib/path_tools.v @@ -2,6 +2,7 @@ module pathlib import os import incubaid.herolib.core.texttools +import incubaid.herolib.core.texttools.regext import time import crypto.md5 import rand @@ -292,6 +293,69 @@ pub fn (path Path) parent_find(tofind string) !Path { return path2.parent_find(tofind) } +// parent_find_advanced walks up the directory tree, collecting all items that match tofind +// pattern until it encounters an item matching the stop pattern. +// Both tofind and stop use matcher filter format supporting wildcards: +// - '*.txt' matches any .txt file +// - 'src*' matches anything starting with 'src' +// - '.git' matches exactly '.git' +// - '*test*' matches anything containing 'test' +// +// Returns all found paths before hitting the stop condition. +// If stop is never found, continues until reaching filesystem root. +// +// Examples: +// // Find all 'test_*.v' files until reaching '.git' directory +// tests := my_path.parent_find_advanced('test_*.v', '.git')! +// +// // Find any 'Makefile*' until hitting 'node_modules' +// makefiles := my_path.parent_find_advanced('Makefile*', 'node_modules')! +// +// // Find '*.md' files until reaching '.git' +// docs := my_path.parent_find_advanced('*.md', '.git')! +pub fn (path Path) parent_find_advanced(tofind string, stop string) ![]Path { + // Start from current path or its parent if it's a file + mut search_path := path + if search_path.is_file() { + search_path = search_path.parent()! + } + + // Create matchers from filter patterns + tofind_matcher := regext.new(filter: [tofind])! + stop_matcher := regext.new(filter: [stop])! + + mut found_paths := []Path{} + mut current := search_path + + for { + // List contents of current directory + mut items := os.ls(current.path) or { []string{} } + + // Check each item in the directory + for item in items { + // Check if this is the stop pattern - if yes, halt and return + if stop_matcher.match(item) { + return found_paths + } + + // Check if this matches what we're looking for + if tofind_matcher.match(item) { + full_path := os.join_path(current.path, item) + mut found_path := get(full_path) + if found_path.exists() { + found_paths << found_path + } + } + } + + // Try to move to parent directory + current = current.parent() or { + // Reached filesystem root, return what we found + return found_paths + } + } +} + // delete pub fn (mut path Path) rm() ! { return path.delete()