codewalker

This commit is contained in:
2025-11-24 05:48:13 +01:00
parent 4402cba8ac
commit d282a5dc95
29 changed files with 1412 additions and 453 deletions

142
lib/ai/codewalker/README.md Normal file
View File

@@ -0,0 +1,142 @@
# CodeWalker Module
Parse directories or formatted strings into file maps with automatic ignore pattern support.
## Features
- 📂 Walk directories recursively and build file maps
- 🚫 Respect `.gitignore` and `.heroignore` ignore patterns with directory scoping
- 📝 Parse custom `===FILE:name===` format into file maps
- 📦 Export/write file maps to disk
- 🛡️ Robust, defensive parsing (handles spaces, variable `=` length, case-insensitive)
## Quick Start
### From Directory Path
```v
import incubaid.herolib.lib.ai.codewalker
mut cw := codewalker.new()
mut fm := cw.filemap_get(path: '/path/to/project')!
// Iterate files
for path, content in fm.content {
println('${path}: ${content.len} bytes')
}
```
### From Formatted String
```v
content_str := '
===FILE:main.v===
fn main() {
println("Hello!")
}
===FILE:utils/helper.v===
pub fn help() {}
===END===
'
mut cw := codewalker.new()
mut fm := cw.parse(content_str)!
println(fm.get('main.v')!)
```
## FileMap Operations
```v
// Get file content
content := fm.get('path/to/file.txt')!
// Set/modify file
fm.set('new/file.txt', 'content here')
// Find files by prefix
files := fm.find('src/')
// Export to directory
fm.export('/output/dir')!
// Write updates to directory
fm.write('/project/dir')!
// Convert back to formatted string
text := fm.content()
```
## File Format
### Full Files
```
===FILE:path/to/file.txt===
File content here
Can span multiple lines
===END===
```
### Partial Content (for future morphing)
```
===FILECHANGE:src/models.v===
struct User {
id int
}
===END===
```
### Both Together
```
===FILE:main.v===
fn main() {}
===FILECHANGE:utils.v===
fn helper() {}
===END===
```
## Parsing Robustness
Parser handles variations:
```
===FILE:name.txt=== // Standard
= = FILE : name.txt = = // Extra spaces
===file:name.txt=== // Lowercase
==FILE:name.txt== // Different = count
```
## Error Handling
Errors are collected in `FileMap.errors`:
```v
mut fm := cw.filemap_get(content: str)!
if fm.errors.len > 0 {
for err in fm.errors {
println('Line ${err.linenr}: ${err.message}')
}
}
```
## Ignore Patterns
- Respects `.gitignore` and `.heroignore` in any directory
- Patterns are scoped to the directory that contains them
- Default patterns include `.git/`, `node_modules/`, `*.pyc`, etc.
- Use `/` suffix for directory patterns: `dist/`
- Use `*` for wildcards: `*.log`
- Lines starting with `#` are comments
Example `.heroignore`:
```
build/
*.tmp
.env
__pycache__/
```

View File

@@ -0,0 +1,212 @@
module codewalker
import incubaid.herolib.core.pathlib
// CodeWalker walks directories and parses file content
pub struct CodeWalker {
pub mut:
ignorematcher IgnoreMatcher
}
@[params]
pub struct FileMapArgs {
pub mut:
path string
content string
content_read bool = true // If false, file content not read from disk
}
// parse extracts FileMap from formatted content string
pub fn (mut cw CodeWalker) parse(content string) !FileMap {
return cw.filemap_get_from_content(content)
}
// filemap_get creates FileMap from path or content string
pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
if args.path != '' {
return cw.filemap_get_from_path(args.path, args.content_read)!
} else if args.content != '' {
return cw.filemap_get_from_content(args.content)!
} else {
return error('Either path or content must be provided')
}
}
// filemap_get_from_path reads directory and creates FileMap, respecting ignore patterns
fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap {
mut dir := pathlib.get(path)
if !dir.exists() || !dir.is_dir() {
return error('Directory "${path}" does not exist')
}
mut files := dir.list(ignore_default: false)!
mut fm := FileMap{
source: path
}
// Collect ignore patterns from .gitignore and .heroignore with scoping
for mut p in files.paths {
if p.is_file() {
name := p.name()
if name == '.gitignore' || name == '.heroignore' {
content := p.read() or { '' }
if content != '' {
rel := p.path_relative(path) or { '' }
base_rel := if rel.contains('/') { rel.all_before_last('/') } else { '' }
cw.ignorematcher.add_content_with_base(base_rel, content)
}
}
}
}
for mut file in files.paths {
if file.is_file() {
name := file.name()
if name == '.gitignore' || name == '.heroignore' {
continue
}
relpath := file.path_relative(path)!
if cw.ignorematcher.is_ignored(relpath) {
continue
}
if content_read {
content := file.read()!
fm.content[relpath] = content
} else {
fm.content[relpath] = ''
}
}
}
return fm
}
// parse_header robustly extracts block type and filename from header line
// Handles variable `=` count, spaces, and case-insensitivity
// Example: ` ===FILE: myfile.txt ===` $(BlockKind.file, "myfile.txt")
fn parse_header(line string) !(BlockKind, string) {
cleaned := line.trim_space()
// Must have = and content
if !cleaned.contains('=') {
return BlockKind.end, ''
}
// Strip leading and trailing = (any count), preserving spaces between
mut content := cleaned.trim_left('=').trim_space()
content = content.trim_right('=').trim_space()
if content.len == 0 {
return BlockKind.end, ''
}
// Check for END marker
if content.to_lower() == 'end' {
return BlockKind.end, ''
}
// Parse FILE or FILECHANGE
if content.contains(':') {
kind_str := content.all_before(':').to_lower().trim_space()
filename := content.all_after(':').trim_space()
if filename.len < 1 {
return error('Invalid filename: empty after colon')
}
match kind_str {
'file' { return BlockKind.file, filename }
'filechange' { return BlockKind.filechange, filename }
else { return BlockKind.end, '' }
}
}
return BlockKind.end, ''
}
// filemap_get_from_content parses FileMap from string with ===FILE:name=== format
fn (mut cw CodeWalker) filemap_get_from_content(content string) !FileMap {
mut fm := FileMap{}
mut current_kind := BlockKind.end
mut filename := ''
mut block := []string{}
mut had_any_block := false
mut linenr := 0
for line in content.split_into_lines() {
linenr += 1
line_trimmed := line.trim_space()
kind, name := parse_header(line_trimmed)!
match kind {
.end {
if filename == '' {
if had_any_block {
fm.errors << FMError{
message: 'Unexpected END marker without active block'
linenr: linenr
category: 'parse'
}
} else {
fm.errors << FMError{
message: 'END found before any FILE block'
linenr: linenr
category: 'parse'
}
}
} else {
// Store current block
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
filename = ''
block = []string{}
current_kind = .end
}
}
.file, .filechange {
// Flush previous block if any
if filename != '' {
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
}
filename = name
current_kind = kind
block = []string{}
had_any_block = true
}
}
// Accumulate non-header lines
if kind == .end || kind == .file || kind == .filechange {
continue
}
if filename == '' && line_trimmed.len > 0 {
fm.errors << FMError{
message: "Content before first FILE block: '${line}'"
linenr: linenr
category: 'parse'
}
} else if filename != '' {
block << line
}
}
// Flush final block if any
if filename != '' {
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
}
return fm
}

View File

@@ -0,0 +1,253 @@
module codewalker
import os
import incubaid.herolib.core.pathlib
fn test_parse_basic() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\nline2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
}
fn test_parse_multiple_files() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\n===FILE:file2.txt===\nlineA\nlineB\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'line1'
assert fm.content['file2.txt'] == 'lineA\nlineB'
}
fn test_parse_empty_file_block() {
mut cw := new()
test_content := '===FILE:empty.txt===\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['empty.txt'] == ''
}
fn test_parse_consecutive_end_and_file() {
mut cw := new()
test_content := '===FILE:file1.txt ===\ncontent1\n===END===\n=== file2.txt===\ncontent2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'content1'
assert fm.content['file2.txt'] == 'content2'
}
fn test_parse_content_before_first_file_block() {
mut cw := new()
test_content := 'unexpected content\n===FILE:file1.txt===\ncontent\n====='
// This should ideally log an error but still parse the file
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'content'
assert cw.errors.len > 0
assert cw.errors[0].message.contains('Unexpected content before first file block')
}
fn test_parse_content_after_end() {
mut cw := new()
test_content := '===FILE:file1.txt===\ncontent\n===END===\nmore unexpected content'
// Implementation chooses to ignore content after END but return parsed content
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'content'
}
fn test_parse_invalid_filename_line() {
mut cw := new()
test_content := '======\ncontent\n===END==='
cw.parse(test_content) or {
assert err.msg().contains('Invalid filename, < 1 chars')
return
}
assert false // Should have errored
}
fn test_parse_file_ending_without_end() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\nline2'
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
}
fn test_parse_empty_content() {
mut cw := new()
test_content := ''
fm := cw.parse(test_content)!
assert fm.content.len == 0
}
fn test_parse_only_end_at_start() {
mut cw := new()
test_content := '===END==='
cw.parse(test_content) or {
assert err.msg().contains('END found at start, not good.')
return
}
assert false // Should have errored
}
fn test_parse_mixed_file_and_filechange() {
mut cw2 := new()!
test_content2 := '===FILE:file.txt===\nfull\n===FILECHANGE:file.txt===\npartial\n===END==='
fm2 := cw2.parse(test_content2)!
assert fm2.content.len == 1
assert fm2.content_change.len == 1
assert fm2.content['file.txt'] == 'full'
assert fm2.content_change['file.txt'] == 'partial'
}
fn test_parse_empty_block_between_files() {
mut cw := new()
test_content := '===FILE:file1.txt===\ncontent1\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == 'content1'
assert fm.content['file2.txt'] == ''
assert fm.content['file3.txt'] == 'content3'
}
fn test_parse_multiple_empty_blocks() {
mut cw := new()
test_content := '===FILE:file1.txt===\n===END===\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == ''
assert fm.content['file2.txt'] == ''
assert fm.content['file3.txt'] == 'content3'
}
fn test_parse_filename_end_reserved() {
mut cw := new()
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}
fn test_filemap_export_and_write() ! {
// Setup temp dir
mut tmpdir := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_test')
create: true
empty: true
)!
defer {
tmpdir.delete() or {}
}
// Build a FileMap
mut fm := FileMap{
source: tmpdir.path
}
fm.set('a/b.txt', 'hello')
fm.set('c.txt', 'world')
// Export to new dir
mut dest := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_out')
create: true
empty: true
)!
defer {
dest.delete() or {}
}
fm.export(dest.path)!
mut f1 := pathlib.get_file(path: os.join_path(dest.path, 'a/b.txt'))!
mut f2 := pathlib.get_file(path: os.join_path(dest.path, 'c.txt'))!
assert f1.read()! == 'hello'
assert f2.read()! == 'world'
// Overwrite via write()
fm.set('a/b.txt', 'hello2')
fm.write(dest.path)!
assert f1.read()! == 'hello2'
}
fn test_filemap_content_roundtrip() {
mut fm := FileMap{}
fm.set('x.txt', 'X')
fm.content_change['y.txt'] = 'Y'
txt := fm.content()
assert txt.contains('===FILE:x.txt===')
assert txt.contains('===FILECHANGE:y.txt===')
assert txt.contains('===END===')
}
fn test_ignore_level_scoped() ! {
// create temp dir structure
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_lvl')
create: true
empty: true
)!
defer { root.delete() or {} }
// subdir with its own ignore
mut sub := pathlib.get_dir(path: os.join_path(root.path, 'sub'), create: true)!
mut hero := pathlib.get_file(path: os.join_path(sub.path, '.heroignore'), create: true)!
hero.write('dist/\n')!
// files under sub/dist should be ignored
mut dist := pathlib.get_dir(path: os.join_path(sub.path, 'dist'), create: true)!
mut a1 := pathlib.get_file(path: os.join_path(dist.path, 'a.txt'), create: true)!
a1.write('A')!
// sibling sub2 with a dist, should NOT be ignored by sub's .heroignore
mut sub2 := pathlib.get_dir(path: os.join_path(root.path, 'sub2'), create: true)!
mut dist2 := pathlib.get_dir(path: os.join_path(sub2.path, 'dist'), create: true)!
mut b1 := pathlib.get_file(path: os.join_path(dist2.path, 'b.txt'), create: true)!
b1.write('B')!
// a normal file under sub should be included
mut okf := pathlib.get_file(path: os.join_path(sub.path, 'ok.txt'), create: true)!
okf.write('OK')!
mut cw := new()
mut fm := cw.filemap_get(path: root.path)!
// sub/dist/a.txt should be ignored
assert 'sub/dist/a.txt' !in fm.content.keys()
// sub/ok.txt should be included
assert fm.content['sub/ok.txt'] == 'OK'
// sub2/dist/b.txt should be included (since .heroignore is level-scoped)
assert fm.content['sub2/dist/b.txt'] == 'B'
}
fn test_ignore_level_scoped_gitignore() ! {
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_git')
create: true
empty: true
)!
defer { root.delete() or {} }
// root has .gitignore ignoring logs/
mut g := pathlib.get_file(path: os.join_path(root.path, '.gitignore'), create: true)!
g.write('logs/\n')!
// nested structure
mut svc := pathlib.get_dir(path: os.join_path(root.path, 'svc'), create: true)!
// this logs/ should be ignored due to root .gitignore
mut logs := pathlib.get_dir(path: os.join_path(svc.path, 'logs'), create: true)!
mut out := pathlib.get_file(path: os.join_path(logs.path, 'out.txt'), create: true)!
out.write('ignored')!
// regular file should be included
mut appf := pathlib.get_file(path: os.join_path(svc.path, 'app.txt'), create: true)!
appf.write('app')!
mut cw := new()
mut fm := cw.filemap_get(path: root.path)!
assert 'svc/logs/out.txt' !in fm.content.keys()
assert fm.content['svc/app.txt'] == 'app'
}
fn test_parse_filename_end_reserved_legacy() {
mut cw := new()
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}

View File

@@ -0,0 +1,14 @@
module codewalker
// new creates a CodeWalker instance with default ignore patterns
pub fn new() CodeWalker {
mut cw := CodeWalker{}
cw.ignorematcher = gitignore_matcher_new()
return cw
}
// filemap creates FileMap from path or content (convenience function)
pub fn filemap(args FileMapArgs) !FileMap {
mut cw := new()
return cw.filemap_get(args)
}

View File

@@ -0,0 +1,79 @@
module codewalker
import incubaid.herolib.core.pathlib
// FileMap represents parsed file structure with content and changes
pub struct FileMap {
pub mut:
source string // Source path or origin
content map[string]string // Full file content by path
content_change map[string]string // Partial/change content by path
errors []FMError // Parse errors encountered
}
// content generates formatted string representation
pub fn (mut fm FileMap) content() string {
mut out := []string{}
for filepath, filecontent in fm.content {
out << '===FILE:${filepath}==='
out << filecontent
}
for filepath, filecontent in fm.content_change {
out << '===FILECHANGE:${filepath}==='
out << filecontent
}
out << '===END==='
return out.join_lines()
}
// export writes all FILE content to destination directory
pub fn (mut fm FileMap) export(path string) ! {
for filepath, filecontent in fm.content {
dest := '${path}/${filepath}'
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
}
@[params]
pub struct WriteParams {
path string
v_test bool = true
v_format bool = true
python_test bool
}
// write updates files in destination directory (creates or overwrites)
pub fn (mut fm FileMap) write(path string) ! {
for filepath, filecontent in fm.content {
dest := '${path}/${filepath}'
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
}
// get retrieves file content by path
pub fn (fm FileMap) get(relpath string) !string {
return fm.content[relpath] or { return error('File not found: ${relpath}') }
}
// set stores file content by path
pub fn (mut fm FileMap) set(relpath string, content string) {
fm.content[relpath] = content
}
// delete removes file from content map
pub fn (mut fm FileMap) delete(relpath string) {
fm.content.delete(relpath)
}
// find returns all paths matching prefix
pub fn (fm FileMap) find(path string) []string {
mut result := []string{}
for filepath, _ in fm.content {
if filepath.starts_with(path) {
result << filepath
}
}
return result
}

154
lib/ai/codewalker/ignore.v Normal file
View File

@@ -0,0 +1,154 @@
module codewalker
// Default ignore patterns based on .gitignore conventions
const default_gitignore = '
.git/
.svn/
.hg/
.bzr/
node_modules/
__pycache__/
*.py[cod]
*.so
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.env
.venv
venv/
.tox/
.nox/
.coverage
.coveragerc
coverage.xml
*.cover
*.gem
*.pyc
.cache
.pytest_cache/
.mypy_cache/
.hypothesis/
.DS_Store
Thumbs.db
*.tmp
*.temp
*.log
'
struct IgnoreRule {
base string // Directory where ignore file was found
pattern string // Ignore pattern
}
// IgnoreMatcher checks if paths should be ignored
pub struct IgnoreMatcher {
pub mut:
rules []IgnoreRule
}
// gitignore_matcher_new creates matcher with default patterns
pub fn gitignore_matcher_new() IgnoreMatcher {
mut m := IgnoreMatcher{}
m.add_content(default_gitignore)
return m
}
// add_content adds global (root-scoped) ignore patterns
pub fn (mut m IgnoreMatcher) add_content(content string) {
m.add_content_with_base('', content)
}
// add_content_with_base adds ignore patterns scoped to base directory
pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
mut base := base_rel.replace('\\', '/').trim('/').to_lower()
for raw_line in content.split_into_lines() {
mut line := raw_line.trim_space()
if line.len == 0 || line.starts_with('#') {
continue
}
m.rules << IgnoreRule{
base: base
pattern: line
}
}
}
// is_ignored checks if path matches any ignore pattern
pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
mut path := relpath.replace('\\', '/').trim_left('/')
path_low := path.to_lower()
for rule in m.rules {
mut pat := rule.pattern.replace('\\', '/').trim_space()
if pat == '' {
continue
}
// Scope pattern to base directory
mut sub := path_low
if rule.base != '' {
base := rule.base
if sub == base {
continue
}
if sub.starts_with(base + '/') {
sub = sub[(base.len + 1)..]
} else {
continue
}
}
// Directory pattern
if pat.ends_with('/') {
mut dirpat := pat.trim_right('/').trim_left('/').to_lower()
if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
return true
}
continue
}
// Extension pattern
if pat.starts_with('*.') {
ext := pat.all_after_last('.').to_lower()
if sub.ends_with('.' + ext) {
return true
}
continue
}
// Wildcard matching
if pat.contains('*') {
mut parts := pat.to_lower().split('*')
mut idx := 0
mut ok := true
for part in parts {
if part == '' {
continue
}
pos := sub.index_after(part, idx) or { -1 }
if pos == -1 {
ok = false
break
}
idx = pos + part.len
}
if ok {
return true
}
continue
}
// Substring match
if sub.contains(pat.to_lower()) {
return true
}
}
return false
}

16
lib/ai/codewalker/model.v Normal file
View File

@@ -0,0 +1,16 @@
module codewalker
// BlockKind defines the type of block in parsed content
pub enum BlockKind {
file
filechange
end
}
pub struct FMError {
pub:
message string
linenr int
category string
filename string
}