refactor: overhaul codewalker with improved parser and ignore logic

- Implement level-scoped .gitignore/.heroignore matching
- Rewrite directory walker to use new ignore matcher
- Replace filemap parser with robust header-based logic
- Support `FILE`, `FILECHANGE`, and legacy header formats
- Add extensive tests for new parsing and ignore features
This commit is contained in:
Mahmoud-Emad
2025-08-17 15:23:15 +03:00
parent f3449d6812
commit bcee46fa15
7 changed files with 508 additions and 292 deletions

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env -S v -n -w -gc none -cc tcc -d use_openssl -enable-globals run
import freeflowuniverse.herolib.develop.codewalker
import freeflowuniverse.herolib.core.pathlib
import os
// Simple example demonstrating CodeWalker:
// - Build a FileMap from a directory (respecting .gitignore)
// - Serialize to filemap text
// - Export to a different destination
// - Parse filemap text directly
// 1) Prepare a small temp source directory
mut srcdir := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'codewalker_example_src')
create: true
empty: true
)!
// Create some files
mut f1 := pathlib.get_file(path: os.join_path(srcdir.path, 'a/b.txt'), create: true)!
f1.write('hello from a/b.txt')!
mut f2 := pathlib.get_file(path: os.join_path(srcdir.path, 'c.txt'), create: true)!
f2.write('world from c.txt')!
// Create ignored files and a .gitignore
mut ig := pathlib.get_file(path: os.join_path(srcdir.path, '.gitignore'), create: true)!
ig.write('__pycache__/\n*.pyc\nbuild/\n')!
mut ignored_dir := pathlib.get_dir(path: os.join_path(srcdir.path, '__pycache__'), create: true)!
_ = ignored_dir // not used
mut ignored_file := pathlib.get_file(path: os.join_path(srcdir.path, 'script.pyc'), create: true)!
ignored_file.write('ignored bytecode')!
mut ignored_build := pathlib.get_dir(path: os.join_path(srcdir.path, 'build'), create: true)!
mut ignored_in_build := pathlib.get_file(
path: os.join_path(ignored_build.path, 'temp.bin')
create: true
)!
ignored_in_build.write('ignored build artifact')!
// Demonstrate level-scoped .heroignore
mut lvl := pathlib.get_dir(path: os.join_path(srcdir.path, 'test_gitignore_levels'), create: true)!
mut hero := pathlib.get_file(path: os.join_path(lvl.path, '.heroignore'), create: true)!
hero.write('dist/\n')!
// files under test_gitignore_levels/dist should be ignored (level-scoped)
mut dist := pathlib.get_dir(path: os.join_path(lvl.path, 'dist'), create: true)!
mut cachef := pathlib.get_file(path: os.join_path(dist.path, 'cache.test'), create: true)!
cachef.write('cache here any text')!
mut buildf := pathlib.get_file(path: os.join_path(dist.path, 'build.test'), create: true)!
buildf.write('just build text')!
// sibling tests folder should be included
mut tests := pathlib.get_dir(path: os.join_path(lvl.path, 'tests'), create: true)!
mut testf := pathlib.get_file(path: os.join_path(tests.path, 'file.test'), create: true)!
testf.write('print test is ok for now')!
// 2) Walk the directory into a FileMap (ignored files should be skipped)
mut cw := codewalker.new()!
mut fm := cw.filemap_get(path: srcdir.path)!
println('Collected files: ${fm.content.len}')
for k, _ in fm.content {
println(' - ${k}')
}
// 3) Serialize to filemap text (for LLMs or storage)
serialized := fm.content()
println('\nSerialized filemap:')
println(serialized)
// 4) Export to a new destination directory
mut destdir := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'codewalker_example_out')
create: true
empty: true
)!
fm.export(destdir.path)!
println('\nExported to: ${destdir.path}')
// 5) Demonstrate direct parsing from filemap text
mut cw2 := codewalker.new(codewalker.CodeWalkerArgs{})!
parsed := cw2.parse(serialized)!
println('\nParsed back from text, files: ${parsed.content.len}')
for k, _ in parsed.content {
println(' * ${k}')
}

View File

@@ -1,64 +0,0 @@
# CodeWalker Module
The CodeWalker module provides functionality to walk through directories and create a map of files with their content. It's particularly useful for processing code directories while respecting gitignore patterns.
## Features
- Walk through directories recursively
- Respect gitignore patterns to exclude files
- Store file content in memory
- Export files back to a directory structure
## Usage
```v
import freeflowuniverse.herolib.lib.lang.codewalker
mut cw := codewalker.new('/tmp/adir')!
// Get content of a specific file
content := cw.filemap.get('path/to/file.txt')!
// return output again
cw.filemap.content()
// Export all files to a destination directory
cw.filemap.export('/tmp/exported_files')!
```
### format of filemap
## full files
```
text before will be ignored
===FILE:filename===
code
===FILE:filename===
code
===END===
text behind will be ignored
```
## files with changes
```
text before will be ignored
===FILECHANGE:filename===
code
===FILECHANGE:filename===
code
===END===
text behind will be ignored
```
FILECHANGE and FILE can be mixed, in FILE it means we have full content otherwise only changed content e.g. a method or s struct and then we need to use morph to change it

View File

@@ -5,21 +5,25 @@ import freeflowuniverse.herolib.core.pathlib
pub struct CodeWalker {
pub mut:
ignorematcher IgnoreMatcher
errors []CWError
errors []CWError
}
@[params]
pub struct FileMapArgs{
pub struct FileMapArgs {
pub mut:
path string
content string
content_read bool = true //if we start from path, and this is on false then we don't read the content
path string
content string
content_read bool = true // if we start from path, and this is on false then we don't read the content
}
// Public factory to parse the filemap-text format directly
pub fn (mut cw CodeWalker) parse(content string) !FileMap {
return cw.filemap_get_from_content(content)
}
pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
if args.path != '' {
return cw.filemap_get_from_path(args.path)!
return cw.filemap_get_from_path(args.path, args.content_read)!
} else if args.content != '' {
return cw.filemap_get_from_content(args.content)!
} else {
@@ -27,76 +31,109 @@ pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
}
}
//walk recursirve over the dir find all .gitignore and .heroignore
fn (mut cw CodeWalker) ignore_walk(path string) !{
//TODO: pahtlib has the features to walk
self.ignorematcher.add(path, content)!
}
//get the filemap from a path
fn (mut cw CodeWalker) filemap_get_from_path(path string) !FileMap {
// get the filemap from a path
fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap {
mut dir := pathlib.get(path)
if !dir.exists() {
if !dir.exists() || !dir.is_dir() {
return error('Source directory "${path}" does not exist')
}
//make recursive ourselves, if we find a gitignore then we use it for the level we are on
mut files := dir.list(recursive: true)!
mut files := dir.list(ignoredefault: false)!
mut fm := FileMap{
source: path
}
for mut file in files.paths {
if file.is_file() {
// Check if file should be ignored
relpath := file.path_relative(path)!
mut should_ignore := false
for pattern in cw.gitignore_patterns {
if relpath.contains(pattern.trim_right('/')) ||
(pattern.ends_with('/') && relpath.starts_with(pattern)) {
should_ignore = true
break
// collect ignore patterns from .gitignore and .heroignore files (recursively),
// and scope them to the directory where they were found
for mut p in files.paths {
if p.is_file() {
name := p.name()
if name == '.gitignore' || name == '.heroignore' {
content := p.read() or { '' }
if content != '' {
rel := p.path_relative(path) or { '' }
base_rel := if rel.contains('/') { rel.all_before_last('/') } else { '' }
cw.ignorematcher.add_content_with_base(base_rel, content)
}
}
if !should_ignore {
}
}
for mut file in files.paths {
if file.is_file() {
name := file.name()
if name == '.gitignore' || name == '.heroignore' {
continue
}
relpath := file.path_relative(path)!
if cw.ignorematcher.is_ignored(relpath) {
continue
}
if content_read {
content := file.read()!
fm.content[relpath] = content
} else {
fm.content[relpath] = ''
}
}
}
return fm
}
fn (mut cw CodeWalker) error(msg string,linenr int,category string, fail bool) ! {
// Parse a header line and return (kind, filename)
// kind: 'FILE' | 'FILECHANGE' | 'LEGACY' | 'END'
fn (mut cw CodeWalker) parse_header(line string, linenr int) !(string, string) {
if line == '===END===' {
return 'END', ''
}
if line.starts_with('===FILE:') && line.ends_with('===') {
name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
if name.len < 1 {
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
}
return 'FILE', name
}
if line.starts_with('===FILECHANGE:') && line.ends_with('===') {
name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
if name.len < 1 {
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
}
return 'FILECHANGE', name
}
// Legacy header: ===filename===
if line.starts_with('===') && line.ends_with('===') {
name := line.trim('=').trim_space()
if name == 'END' {
return 'END', ''
}
if name.len < 1 {
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
}
return 'LEGACY', name
}
return '', ''
}
fn (mut cw CodeWalker) error(msg string, linenr int, category string, fail bool) ! {
cw.errors << CWError{
message: msg
linenr: linenr
message: msg
linenr: linenr
category: category
}
if fail {
mut errormsg:= ""
for e in cw.errors {
errormsg += "${e.message} (line ${e.linenr}, category: ${e.category})\n"
}
return error(msg)
}
}
//internal function to get the filename
fn (mut cw CodeWalker) parse_filename_get(line string,linenr int) !string {
// internal function to get the filename
fn (mut cw CodeWalker) parse_filename_get(line string, linenr int) !string {
parts := line.split('===')
if parts.len < 2 {
cw.error("Invalid filename line: ${line}.",linenr, "filename_get", true)!
cw.error('Invalid filename line: ${line}.', linenr, 'filename_get', true)!
}
mut name:=parts[1].trim_space()
if name.len<2 {
cw.error("Invalid filename, < 2 chars: ${name}.",linenr, "filename_get", true)!
mut name := parts[1].trim_space()
if name.len < 2 {
cw.error('Invalid filename, < 2 chars: ${name}.', linenr, 'filename_get', true)!
}
return name
}
@@ -106,60 +143,76 @@ enum ParseState {
in_block
}
//get file, is the parser
// Parse filemap content string
fn (mut cw CodeWalker) filemap_get_from_content(content string) !FileMap {
mut fm := FileMap{}
mut filename := ""
mut current_kind := '' // 'FILE' | 'FILECHANGE' | 'LEGACY'
mut filename := ''
mut block := []string{}
mut state := ParseState.start
mut had_any_block := false
mut linenr := 0
for line in content.split_into_lines() {
mut line2 := line.trim_space()
linenr += 1
line2 := line.trim_space()
match state {
.start {
if line2.starts_with('===FILE') && !line2.ends_with('===') {
filename = cw.parse_filename_get(line2, linenr)!
if filename == "END" {
cw.error("END found at start, not good.", linenr, "parse", true)!
return error("END found at start, not good.")
}
state = .in_block
} else if line2.len > 0 {
cw.error("Unexpected content before first file block: '${line}'.", linenr, "parse", false)!
}
}
.in_block {
if line2.starts_with('===FILE') {
if line2 == '===END===' {
fm.content[filename] = block.join_lines()
filename = ""
block = []string{}
state = .start
} else if line2.ends_with('===') {
fm.content[filename] = block.join_lines()
filename = cw.parse_filename_get(line2, linenr)!
if filename == "END" {
cw.error("Filename 'END' is reserved.", linenr, "parse", true)!
return error("Filename 'END' is reserved.")
}
block = []string{}
state = .in_block
} else {
block << line
}
kind, name := cw.parse_header(line2, linenr)!
if kind == 'END' {
if filename == '' {
if had_any_block {
cw.error("Filename 'END' is reserved.", linenr, 'parse', true)!
} else {
block << line
cw.error('END found at start, not good.', linenr, 'parse', true)!
}
} else {
if current_kind == 'FILE' || current_kind == 'LEGACY' {
fm.content[filename] = block.join_lines()
} else if current_kind == 'FILECHANGE' {
fm.content_change[filename] = block.join_lines()
}
filename = ''
block = []string{}
current_kind = ''
}
continue
}
if kind in ['FILE', 'FILECHANGE', 'LEGACY'] {
// starting a new block header
if filename != '' {
if current_kind == 'FILE' || current_kind == 'LEGACY' {
fm.content[filename] = block.join_lines()
} else if current_kind == 'FILECHANGE' {
fm.content_change[filename] = block.join_lines()
}
}
filename = name
current_kind = kind
block = []string{}
had_any_block = true
continue
}
// Non-header line
if filename == '' {
if line2.len > 0 {
cw.error("Unexpected content before first file block: '${line}'.", linenr,
'parse', false)!
}
} else {
block << line
}
}
if state == .in_block && filename != '' {
fm.content[filename] = block.join_lines()
// EOF: flush current block if any
if filename != '' {
if current_kind == 'FILE' || current_kind == 'LEGACY' {
fm.content[filename] = block.join_lines()
} else if current_kind == 'FILECHANGE' {
fm.content_change[filename] = block.join_lines()
}
}
return fm

View File

@@ -5,7 +5,7 @@ import freeflowuniverse.herolib.core.pathlib
fn test_parse_basic() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\nline1\nline2\n===END==='
test_content := '===FILE:file1.txt===\nline1\nline2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
@@ -13,7 +13,7 @@ fn test_parse_basic() {
fn test_parse_multiple_files() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\nline1\n===file2.txt===\nlineA\nlineB\n===END==='
test_content := '===FILE:file1.txt===\nline1\n===FILE:file2.txt===\nlineA\nlineB\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'line1'
@@ -22,7 +22,7 @@ fn test_parse_multiple_files() {
fn test_parse_empty_file_block() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===empty.txt===\n===END==='
test_content := '===FILE:empty.txt===\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['empty.txt'] == ''
@@ -30,7 +30,7 @@ fn test_parse_empty_file_block() {
fn test_parse_consecutive_end_and_file() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\ncontent1\n===END===\n===file2.txt===\ncontent2\n===END==='
test_content := '===FILE:file1.txt===\ncontent1\n===END===\n===FILE:file2.txt===\ncontent2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'content1'
@@ -39,7 +39,7 @@ fn test_parse_consecutive_end_and_file() {
fn test_parse_content_before_first_file_block() {
mut cw := new(CodeWalkerArgs{})!
test_content := 'unexpected content\n===file1.txt===\ncontent\n===END==='
test_content := 'unexpected content\n===FILE:file1.txt===\ncontent\n===END==='
// This should ideally log an error but still parse the file
fm := cw.parse(test_content)!
assert fm.content.len == 1
@@ -50,29 +50,26 @@ fn test_parse_content_before_first_file_block() {
fn test_parse_content_after_end() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\ncontent\n===END===\nmore unexpected content'
// This should ideally log an error but still parse the file up to END
test_content := '===FILE:file1.txt===\ncontent\n===END===\nmore unexpected content'
// Implementation chooses to ignore content after END but return parsed content
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'content'
assert cw.errors.len > 0
assert cw.errors[0].message.contains('Unexpected content after ===END===')
}
fn test_parse_invalid_filename_line() {
mut cw := new(CodeWalkerArgs{})!
test_content := '=== ===\ncontent\n===END==='
res := cw.parse(test_content)
if res is error {
assert res.msg.contains('Invalid filename, < 2 chars')
} else {
assert false // Should have errored
test_content := '======\ncontent\n===END==='
cw.parse(test_content) or {
assert err.msg().contains('Invalid filename, < 1 chars')
return
}
assert false // Should have errored
}
fn test_parse_file_ending_without_end() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\nline1\nline2'
test_content := '===FILE:file1.txt===\nline1\nline2'
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
@@ -88,17 +85,26 @@ fn test_parse_empty_content() {
fn test_parse_only_end_at_start() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===END==='
res := cw.parse(test_content)
if res is error {
assert res.msg.contains('END found at start, not good.')
} else {
assert false // Should have errored
cw.parse(test_content) or {
assert err.msg().contains('END found at start, not good.')
return
}
assert false // Should have errored
}
fn test_parse_mixed_file_and_filechange() {
mut cw2 := new(CodeWalkerArgs{})!
test_content2 := '===FILE:file.txt===\nfull\n===FILECHANGE:file.txt===\npartial\n===END==='
fm2 := cw2.parse(test_content2)!
assert fm2.content.len == 1
assert fm2.content_change.len == 1
assert fm2.content['file.txt'] == 'full'
assert fm2.content_change['file.txt'] == 'partial'
}
fn test_parse_empty_block_between_files() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\ncontent1\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
test_content := '===FILE:file1.txt===\ncontent1\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == 'content1'
@@ -108,7 +114,7 @@ fn test_parse_empty_block_between_files() {
fn test_parse_multiple_empty_blocks() {
mut cw := new(CodeWalkerArgs{})!
test_content := '===file1.txt===\n===END===\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
test_content := '===FILE:file1.txt===\n===END===\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == ''
@@ -118,11 +124,130 @@ fn test_parse_multiple_empty_blocks() {
fn test_parse_filename_end_reserved() {
mut cw := new(CodeWalkerArgs{})!
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
res := cw.parse(test_content)
if res is error {
assert res.msg.contains('Filename \'END\' is reserved.')
} else {
assert false // Should have errored
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}
fn test_filemap_export_and_write() ! {
// Setup temp dir
mut tmpdir := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_test')
create: true
empty: true
)!
defer {
tmpdir.delete() or {}
}
// Build a FileMap
mut fm := FileMap{
source: tmpdir.path
}
fm.set('a/b.txt', 'hello')
fm.set('c.txt', 'world')
// Export to new dir
mut dest := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_out')
create: true
empty: true
)!
defer {
dest.delete() or {}
}
fm.export(dest.path)!
mut f1 := pathlib.get_file(path: os.join_path(dest.path, 'a/b.txt'))!
mut f2 := pathlib.get_file(path: os.join_path(dest.path, 'c.txt'))!
assert f1.read()! == 'hello'
assert f2.read()! == 'world'
// Overwrite via write()
fm.set('a/b.txt', 'hello2')
fm.write(dest.path)!
assert f1.read()! == 'hello2'
}
fn test_filemap_content_roundtrip() {
mut fm := FileMap{}
fm.set('x.txt', 'X')
fm.content_change['y.txt'] = 'Y'
txt := fm.content()
assert txt.contains('===FILE:x.txt===')
assert txt.contains('===FILECHANGE:y.txt===')
assert txt.contains('===END===')
}
fn test_ignore_level_scoped() ! {
// create temp dir structure
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_lvl')
create: true
empty: true
)!
defer { root.delete() or {} }
// subdir with its own ignore
mut sub := pathlib.get_dir(path: os.join_path(root.path, 'sub'), create: true)!
mut hero := pathlib.get_file(path: os.join_path(sub.path, '.heroignore'), create: true)!
hero.write('dist/\n')!
// files under sub/dist should be ignored
mut dist := pathlib.get_dir(path: os.join_path(sub.path, 'dist'), create: true)!
mut a1 := pathlib.get_file(path: os.join_path(dist.path, 'a.txt'), create: true)!
a1.write('A')!
// sibling sub2 with a dist, should NOT be ignored by sub's .heroignore
mut sub2 := pathlib.get_dir(path: os.join_path(root.path, 'sub2'), create: true)!
mut dist2 := pathlib.get_dir(path: os.join_path(sub2.path, 'dist'), create: true)!
mut b1 := pathlib.get_file(path: os.join_path(dist2.path, 'b.txt'), create: true)!
b1.write('B')!
// a normal file under sub should be included
mut okf := pathlib.get_file(path: os.join_path(sub.path, 'ok.txt'), create: true)!
okf.write('OK')!
mut cw := new(CodeWalkerArgs{})!
mut fm := cw.filemap_get(path: root.path)!
// sub/dist/a.txt should be ignored
assert 'sub/dist/a.txt' !in fm.content.keys()
// sub/ok.txt should be included
assert fm.content['sub/ok.txt'] == 'OK'
// sub2/dist/b.txt should be included (since .heroignore is level-scoped)
assert fm.content['sub2/dist/b.txt'] == 'B'
}
fn test_ignore_level_scoped_gitignore() ! {
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_git')
create: true
empty: true
)!
defer { root.delete() or {} }
// root has .gitignore ignoring logs/
mut g := pathlib.get_file(path: os.join_path(root.path, '.gitignore'), create: true)!
g.write('logs/\n')!
// nested structure
mut svc := pathlib.get_dir(path: os.join_path(root.path, 'svc'), create: true)!
// this logs/ should be ignored due to root .gitignore
mut logs := pathlib.get_dir(path: os.join_path(svc.path, 'logs'), create: true)!
mut out := pathlib.get_file(path: os.join_path(logs.path, 'out.txt'), create: true)!
out.write('ignored')!
// regular file should be included
mut appf := pathlib.get_file(path: os.join_path(svc.path, 'app.txt'), create: true)!
appf.write('app')!
mut cw := new(CodeWalkerArgs{})!
mut fm := cw.filemap_get(path: root.path)!
assert 'svc/logs/out.txt' !in fm.content.keys()
assert fm.content['svc/app.txt'] == 'app'
}
fn test_parse_filename_end_reserved_legacy() {
mut cw := new(CodeWalkerArgs{})!
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}

View File

@@ -1,19 +1,12 @@
module codewalker
@[params]
pub struct CodeWalkerArgs {
source string //content we will send to an LLM, starting from a dir
content string //content as returned from LLM
// No fields required for now; kept for API stability
}
pub fn new(args CodeWalkerArgs) !CodeWalker {
mut cw := CodeWalker{
source: args.source
}
// Load default gitignore patterns
cw.gitignore_patterns = cw.default_gitignore()
mut cw := CodeWalker{}
cw.ignorematcher = gitignore_matcher_new()
return cw
}

View File

@@ -4,14 +4,14 @@ import freeflowuniverse.herolib.core.pathlib
pub struct FileMap {
pub mut:
source string
content map[string]string
source string
content map[string]string
content_change map[string]string
errors []FMError
errors []FMError
}
pub fn (mut fm FileMap) content()string {
mut out:= []string{}
pub fn (mut fm FileMap) content() string {
mut out := []string{}
for filepath, filecontent in fm.content {
out << '===FILE:${filepath}==='
out << filecontent
@@ -22,44 +22,36 @@ pub fn (mut fm FileMap) content()string {
}
out << '===END==='
return out.join_lines()
}
//write in new location, all will be overwritten, will only work with full files, not chanages
pub fn (mut fm FileMap) export(path string)! {
// write in new location, all will be overwritten, will only work with full files, not changes
pub fn (mut fm FileMap) export(path string) ! {
for filepath, filecontent in fm.content {
dest := "${fm.source}/${filepath}"
mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
dest := '${path}/${filepath}'
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
}
@[PARAMS]
pub struct WriteParams {
path string
v_test bool = true
v_format bool = true
path string
v_test bool = true
v_format bool = true
python_test bool
}
//update the files as found in the folder and update them or create
pub fn (mut fm FileMap) write(path string)! {
// update the files as found in the folder and update them or create
pub fn (mut fm FileMap) write(path string) ! {
for filepath, filecontent in fm.content {
dest := "${fm.source}/${filepath}"
//TODO check ends with .v or .py if v_test or python_test active then call python
//or v to check format of the file so we don't write broken code
//we first write in a temporary location $filename__.v and then test
//if good then overwrite $filename.v
mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
dest := '${path}/${filepath}'
// In future: validate language-specific formatting/tests before overwrite
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
//TODO: phase 2, work with morphe to integrate change in the file
// TODO: phase 2, work with morphe to integrate change in the file
}
pub fn (fm FileMap) get(relpath string) !string {
return fm.content[relpath] or { return error('File not found: ${relpath}') }
}

View File

@@ -1,88 +1,118 @@
module codewalker
// A minimal gitignore-like matcher used by CodeWalker
// Supports:
// - Directory patterns ending with '/': ignores any path that has this segment prefix
// - Extension patterns like '*.pyc' or '*.<ext>'
// - Simple substrings and '*' wildcards
// - Lines starting with '#' are comments; empty lines ignored
// No negation support for simplicity
const default_gitignore := '
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.env
.venv
venv/
.tox/
.nox/
.coverage
.coveragerc
coverage.xml
*.cover
*.gem
*.pyc
.cache
.pytest_cache/
.mypy_cache/
.hypothesis/
'
const default_gitignore = '__pycache__/\n*.py[cod]\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n.env\n.venv\nvenv/\n.tox/\n.nox/\n.coverage\n.coveragerc\ncoverage.xml\n*.cover\n*.gem\n*.pyc\n.cache\n.pytest_cache/\n.mypy_cache/\n.hypothesis/\n'
struct IgnoreRule {
base string // relative dir from source root where the ignore file lives ('' means global)
pattern string
}
//responsible to help us to find if a file matches or not
pub struct IgnoreMatcher {
pub mut:
items map[string]Ignore //the key is the path where the gitignore plays
rules []IgnoreRule
}
pub struct Ignore {
pub mut:
patterns map[string]string
pub fn gitignore_matcher_new() IgnoreMatcher {
mut m := IgnoreMatcher{}
m.add_content(default_gitignore)
return m
}
// Add raw .gitignore-style content as global (root-scoped) rules
pub fn (mut m IgnoreMatcher) add_content(content string) {
m.add_content_with_base('', content)
}
pub fn (mut self Ignore) add(content string) ! {
for line in content.split_into_lines() {
line = line.trim_space()
if line.len == 0 {
// Add raw .gitignore/.heroignore-style content scoped to base_rel
pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
mut base := base_rel.replace('\\', '/').trim('/').to_lower()
for raw_line in content.split_into_lines() {
mut line := raw_line.trim_space()
if line.len == 0 || line.starts_with('#') {
continue
}
self.patterns[line] = line
m.rules << IgnoreRule{
base: base
pattern: line
}
}
}
pub fn (mut self Ignore) check(path string) !bool {
return false //TODO
}
pub fn gitignore_matcher_new() !IgnoreMatcher {
mut matcher := IgnoreMatcher{}
gitignore.add(default_gitignore)!
matcher.patterns['.gitignore'] = gitignore
return matcher
}
//add content to path of gitignore
pub fn (mut self IgnoreMatcher) add(path string, content string) ! {
self.items[path] = Ignore{}
self.items[path].add(content)!
}
pub fn (mut self IgnoreMatcher) check(path string) !bool {
return false //TODO here figure out which gitignores apply to the given path and check them all
// Very simple glob/substring-based matching with directory scoping
pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
mut path := relpath.replace('\\', '/').trim_left('/')
path_low := path.to_lower()
for rule in m.rules {
mut pat := rule.pattern.replace('\\', '/').trim_space()
if pat == '' {
continue
}
// Determine subpath relative to base
mut sub := path_low
if rule.base != '' {
base := rule.base
if sub == base {
// path equals the base dir; ignore rules apply to entries under base, not the base itself
continue
}
if sub.starts_with(base + '/') {
sub = sub[(base.len + 1)..]
} else {
continue // rule not applicable for this path
}
}
// Directory pattern (relative to base)
if pat.ends_with('/') {
mut dirpat := pat.trim_right('/')
dirpat = dirpat.trim_left('/').to_lower()
if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
return true
}
continue
}
// Extension pattern *.ext
if pat.starts_with('*.') {
ext := pat.all_after_last('.').to_lower()
if sub.ends_with('.' + ext) {
return true
}
continue
}
// Simple wildcard * anywhere -> sequential contains match
if pat.contains('*') {
mut parts := pat.to_lower().split('*')
mut idx := 0
mut ok := true
for part in parts {
if part == '' {
continue
}
pos := sub.index_after(part, idx) or { -1 }
if pos == -1 {
ok = false
break
}
idx = pos + part.len
}
if ok {
return true
}
continue
}
// Fallback: substring match (case-insensitive) on subpath
if sub.contains(pat.to_lower()) {
return true
}
}
return false
}