refactor: overhaul codewalker with improved parser and ignore logic
- Implement level-scoped .gitignore/.heroignore matching - Rewrite directory walker to use new ignore matcher - Replace filemap parser with robust header-based logic - Support `FILE`, `FILECHANGE`, and legacy header formats - Add extensive tests for new parsing and ignore features
This commit is contained in:
87
examples/develop/codewalker/codewalker_example.vsh
Executable file
87
examples/develop/codewalker/codewalker_example.vsh
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env -S v -n -w -gc none -cc tcc -d use_openssl -enable-globals run
|
||||
|
||||
import freeflowuniverse.herolib.develop.codewalker
|
||||
import freeflowuniverse.herolib.core.pathlib
|
||||
import os
|
||||
|
||||
// Simple example demonstrating CodeWalker:
|
||||
// - Build a FileMap from a directory (respecting .gitignore)
|
||||
// - Serialize to filemap text
|
||||
// - Export to a different destination
|
||||
// - Parse filemap text directly
|
||||
|
||||
// 1) Prepare a small temp source directory
|
||||
mut srcdir := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'codewalker_example_src')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
|
||||
// Create some files
|
||||
mut f1 := pathlib.get_file(path: os.join_path(srcdir.path, 'a/b.txt'), create: true)!
|
||||
f1.write('hello from a/b.txt')!
|
||||
mut f2 := pathlib.get_file(path: os.join_path(srcdir.path, 'c.txt'), create: true)!
|
||||
f2.write('world from c.txt')!
|
||||
|
||||
// Create ignored files and a .gitignore
|
||||
mut ig := pathlib.get_file(path: os.join_path(srcdir.path, '.gitignore'), create: true)!
|
||||
ig.write('__pycache__/\n*.pyc\nbuild/\n')!
|
||||
|
||||
mut ignored_dir := pathlib.get_dir(path: os.join_path(srcdir.path, '__pycache__'), create: true)!
|
||||
_ = ignored_dir // not used
|
||||
|
||||
mut ignored_file := pathlib.get_file(path: os.join_path(srcdir.path, 'script.pyc'), create: true)!
|
||||
ignored_file.write('ignored bytecode')!
|
||||
|
||||
mut ignored_build := pathlib.get_dir(path: os.join_path(srcdir.path, 'build'), create: true)!
|
||||
mut ignored_in_build := pathlib.get_file(
|
||||
path: os.join_path(ignored_build.path, 'temp.bin')
|
||||
create: true
|
||||
)!
|
||||
ignored_in_build.write('ignored build artifact')!
|
||||
|
||||
// Demonstrate level-scoped .heroignore
|
||||
mut lvl := pathlib.get_dir(path: os.join_path(srcdir.path, 'test_gitignore_levels'), create: true)!
|
||||
mut hero := pathlib.get_file(path: os.join_path(lvl.path, '.heroignore'), create: true)!
|
||||
hero.write('dist/\n')!
|
||||
// files under test_gitignore_levels/dist should be ignored (level-scoped)
|
||||
mut dist := pathlib.get_dir(path: os.join_path(lvl.path, 'dist'), create: true)!
|
||||
mut cachef := pathlib.get_file(path: os.join_path(dist.path, 'cache.test'), create: true)!
|
||||
cachef.write('cache here any text')!
|
||||
mut buildf := pathlib.get_file(path: os.join_path(dist.path, 'build.test'), create: true)!
|
||||
buildf.write('just build text')!
|
||||
// sibling tests folder should be included
|
||||
mut tests := pathlib.get_dir(path: os.join_path(lvl.path, 'tests'), create: true)!
|
||||
mut testf := pathlib.get_file(path: os.join_path(tests.path, 'file.test'), create: true)!
|
||||
testf.write('print test is ok for now')!
|
||||
|
||||
// 2) Walk the directory into a FileMap (ignored files should be skipped)
|
||||
mut cw := codewalker.new()!
|
||||
mut fm := cw.filemap_get(path: srcdir.path)!
|
||||
|
||||
println('Collected files: ${fm.content.len}')
|
||||
for k, _ in fm.content {
|
||||
println(' - ${k}')
|
||||
}
|
||||
|
||||
// 3) Serialize to filemap text (for LLMs or storage)
|
||||
serialized := fm.content()
|
||||
println('\nSerialized filemap:')
|
||||
println(serialized)
|
||||
|
||||
// 4) Export to a new destination directory
|
||||
mut destdir := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'codewalker_example_out')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
fm.export(destdir.path)!
|
||||
println('\nExported to: ${destdir.path}')
|
||||
|
||||
// 5) Demonstrate direct parsing from filemap text
|
||||
mut cw2 := codewalker.new(codewalker.CodeWalkerArgs{})!
|
||||
parsed := cw2.parse(serialized)!
|
||||
println('\nParsed back from text, files: ${parsed.content.len}')
|
||||
for k, _ in parsed.content {
|
||||
println(' * ${k}')
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
# CodeWalker Module
|
||||
|
||||
The CodeWalker module provides functionality to walk through directories and create a map of files with their content. It's particularly useful for processing code directories while respecting gitignore patterns.
|
||||
|
||||
## Features
|
||||
|
||||
- Walk through directories recursively
|
||||
- Respect gitignore patterns to exclude files
|
||||
- Store file content in memory
|
||||
- Export files back to a directory structure
|
||||
|
||||
## Usage
|
||||
|
||||
```v
|
||||
import freeflowuniverse.herolib.lib.lang.codewalker
|
||||
|
||||
mut cw := codewalker.new('/tmp/adir')!
|
||||
|
||||
// Get content of a specific file
|
||||
content := cw.filemap.get('path/to/file.txt')!
|
||||
|
||||
// return output again
|
||||
cw.filemap.content()
|
||||
|
||||
// Export all files to a destination directory
|
||||
cw.filemap.export('/tmp/exported_files')!
|
||||
|
||||
```
|
||||
|
||||
### format of filemap
|
||||
|
||||
## full files
|
||||
|
||||
```
|
||||
|
||||
text before will be ignored
|
||||
|
||||
===FILE:filename===
|
||||
code
|
||||
===FILE:filename===
|
||||
code
|
||||
===END===
|
||||
|
||||
text behind will be ignored
|
||||
|
||||
```
|
||||
|
||||
## files with changes
|
||||
|
||||
```
|
||||
|
||||
text before will be ignored
|
||||
|
||||
===FILECHANGE:filename===
|
||||
code
|
||||
===FILECHANGE:filename===
|
||||
code
|
||||
===END===
|
||||
|
||||
text behind will be ignored
|
||||
|
||||
```
|
||||
|
||||
FILECHANGE and FILE can be mixed, in FILE it means we have full content otherwise only changed content e.g. a method or s struct and then we need to use morph to change it
|
||||
@@ -5,21 +5,25 @@ import freeflowuniverse.herolib.core.pathlib
|
||||
pub struct CodeWalker {
|
||||
pub mut:
|
||||
ignorematcher IgnoreMatcher
|
||||
errors []CWError
|
||||
errors []CWError
|
||||
}
|
||||
|
||||
|
||||
@[params]
|
||||
pub struct FileMapArgs{
|
||||
pub struct FileMapArgs {
|
||||
pub mut:
|
||||
path string
|
||||
content string
|
||||
content_read bool = true //if we start from path, and this is on false then we don't read the content
|
||||
path string
|
||||
content string
|
||||
content_read bool = true // if we start from path, and this is on false then we don't read the content
|
||||
}
|
||||
|
||||
// Public factory to parse the filemap-text format directly
|
||||
pub fn (mut cw CodeWalker) parse(content string) !FileMap {
|
||||
return cw.filemap_get_from_content(content)
|
||||
}
|
||||
|
||||
pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
|
||||
if args.path != '' {
|
||||
return cw.filemap_get_from_path(args.path)!
|
||||
return cw.filemap_get_from_path(args.path, args.content_read)!
|
||||
} else if args.content != '' {
|
||||
return cw.filemap_get_from_content(args.content)!
|
||||
} else {
|
||||
@@ -27,76 +31,109 @@ pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
|
||||
}
|
||||
}
|
||||
|
||||
//walk recursirve over the dir find all .gitignore and .heroignore
|
||||
fn (mut cw CodeWalker) ignore_walk(path string) !{
|
||||
|
||||
//TODO: pahtlib has the features to walk
|
||||
self.ignorematcher.add(path, content)!
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//get the filemap from a path
|
||||
fn (mut cw CodeWalker) filemap_get_from_path(path string) !FileMap {
|
||||
// get the filemap from a path
|
||||
fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap {
|
||||
mut dir := pathlib.get(path)
|
||||
if !dir.exists() {
|
||||
if !dir.exists() || !dir.is_dir() {
|
||||
return error('Source directory "${path}" does not exist')
|
||||
}
|
||||
|
||||
//make recursive ourselves, if we find a gitignore then we use it for the level we are on
|
||||
|
||||
mut files := dir.list(recursive: true)!
|
||||
mut files := dir.list(ignoredefault: false)!
|
||||
mut fm := FileMap{
|
||||
source: path
|
||||
}
|
||||
|
||||
for mut file in files.paths {
|
||||
if file.is_file() {
|
||||
// Check if file should be ignored
|
||||
relpath := file.path_relative(path)!
|
||||
mut should_ignore := false
|
||||
|
||||
for pattern in cw.gitignore_patterns {
|
||||
if relpath.contains(pattern.trim_right('/')) ||
|
||||
(pattern.ends_with('/') && relpath.starts_with(pattern)) {
|
||||
should_ignore = true
|
||||
break
|
||||
// collect ignore patterns from .gitignore and .heroignore files (recursively),
|
||||
// and scope them to the directory where they were found
|
||||
for mut p in files.paths {
|
||||
if p.is_file() {
|
||||
name := p.name()
|
||||
if name == '.gitignore' || name == '.heroignore' {
|
||||
content := p.read() or { '' }
|
||||
if content != '' {
|
||||
rel := p.path_relative(path) or { '' }
|
||||
base_rel := if rel.contains('/') { rel.all_before_last('/') } else { '' }
|
||||
cw.ignorematcher.add_content_with_base(base_rel, content)
|
||||
}
|
||||
}
|
||||
if !should_ignore {
|
||||
}
|
||||
}
|
||||
|
||||
for mut file in files.paths {
|
||||
if file.is_file() {
|
||||
name := file.name()
|
||||
if name == '.gitignore' || name == '.heroignore' {
|
||||
continue
|
||||
}
|
||||
relpath := file.path_relative(path)!
|
||||
if cw.ignorematcher.is_ignored(relpath) {
|
||||
continue
|
||||
}
|
||||
if content_read {
|
||||
content := file.read()!
|
||||
fm.content[relpath] = content
|
||||
} else {
|
||||
fm.content[relpath] = ''
|
||||
}
|
||||
}
|
||||
}
|
||||
return fm
|
||||
}
|
||||
|
||||
fn (mut cw CodeWalker) error(msg string,linenr int,category string, fail bool) ! {
|
||||
// Parse a header line and return (kind, filename)
|
||||
// kind: 'FILE' | 'FILECHANGE' | 'LEGACY' | 'END'
|
||||
fn (mut cw CodeWalker) parse_header(line string, linenr int) !(string, string) {
|
||||
if line == '===END===' {
|
||||
return 'END', ''
|
||||
}
|
||||
if line.starts_with('===FILE:') && line.ends_with('===') {
|
||||
name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
|
||||
if name.len < 1 {
|
||||
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
|
||||
}
|
||||
return 'FILE', name
|
||||
}
|
||||
if line.starts_with('===FILECHANGE:') && line.ends_with('===') {
|
||||
name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
|
||||
if name.len < 1 {
|
||||
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
|
||||
}
|
||||
return 'FILECHANGE', name
|
||||
}
|
||||
// Legacy header: ===filename===
|
||||
if line.starts_with('===') && line.ends_with('===') {
|
||||
name := line.trim('=').trim_space()
|
||||
if name == 'END' {
|
||||
return 'END', ''
|
||||
}
|
||||
if name.len < 1 {
|
||||
cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
|
||||
}
|
||||
return 'LEGACY', name
|
||||
}
|
||||
return '', ''
|
||||
}
|
||||
|
||||
fn (mut cw CodeWalker) error(msg string, linenr int, category string, fail bool) ! {
|
||||
cw.errors << CWError{
|
||||
message: msg
|
||||
linenr: linenr
|
||||
message: msg
|
||||
linenr: linenr
|
||||
category: category
|
||||
}
|
||||
if fail {
|
||||
mut errormsg:= ""
|
||||
for e in cw.errors {
|
||||
errormsg += "${e.message} (line ${e.linenr}, category: ${e.category})\n"
|
||||
}
|
||||
return error(msg)
|
||||
}
|
||||
}
|
||||
|
||||
//internal function to get the filename
|
||||
fn (mut cw CodeWalker) parse_filename_get(line string,linenr int) !string {
|
||||
// internal function to get the filename
|
||||
fn (mut cw CodeWalker) parse_filename_get(line string, linenr int) !string {
|
||||
parts := line.split('===')
|
||||
if parts.len < 2 {
|
||||
cw.error("Invalid filename line: ${line}.",linenr, "filename_get", true)!
|
||||
cw.error('Invalid filename line: ${line}.', linenr, 'filename_get', true)!
|
||||
}
|
||||
mut name:=parts[1].trim_space()
|
||||
if name.len<2 {
|
||||
cw.error("Invalid filename, < 2 chars: ${name}.",linenr, "filename_get", true)!
|
||||
mut name := parts[1].trim_space()
|
||||
if name.len < 2 {
|
||||
cw.error('Invalid filename, < 2 chars: ${name}.', linenr, 'filename_get', true)!
|
||||
}
|
||||
return name
|
||||
}
|
||||
@@ -106,60 +143,76 @@ enum ParseState {
|
||||
in_block
|
||||
}
|
||||
|
||||
//get file, is the parser
|
||||
// Parse filemap content string
|
||||
fn (mut cw CodeWalker) filemap_get_from_content(content string) !FileMap {
|
||||
mut fm := FileMap{}
|
||||
|
||||
mut filename := ""
|
||||
mut current_kind := '' // 'FILE' | 'FILECHANGE' | 'LEGACY'
|
||||
mut filename := ''
|
||||
mut block := []string{}
|
||||
mut state := ParseState.start
|
||||
mut had_any_block := false
|
||||
|
||||
mut linenr := 0
|
||||
|
||||
for line in content.split_into_lines() {
|
||||
mut line2 := line.trim_space()
|
||||
linenr += 1
|
||||
line2 := line.trim_space()
|
||||
|
||||
match state {
|
||||
.start {
|
||||
if line2.starts_with('===FILE') && !line2.ends_with('===') {
|
||||
filename = cw.parse_filename_get(line2, linenr)!
|
||||
if filename == "END" {
|
||||
cw.error("END found at start, not good.", linenr, "parse", true)!
|
||||
return error("END found at start, not good.")
|
||||
}
|
||||
state = .in_block
|
||||
} else if line2.len > 0 {
|
||||
cw.error("Unexpected content before first file block: '${line}'.", linenr, "parse", false)!
|
||||
}
|
||||
}
|
||||
.in_block {
|
||||
if line2.starts_with('===FILE') {
|
||||
if line2 == '===END===' {
|
||||
fm.content[filename] = block.join_lines()
|
||||
filename = ""
|
||||
block = []string{}
|
||||
state = .start
|
||||
} else if line2.ends_with('===') {
|
||||
fm.content[filename] = block.join_lines()
|
||||
filename = cw.parse_filename_get(line2, linenr)!
|
||||
if filename == "END" {
|
||||
cw.error("Filename 'END' is reserved.", linenr, "parse", true)!
|
||||
return error("Filename 'END' is reserved.")
|
||||
}
|
||||
block = []string{}
|
||||
state = .in_block
|
||||
} else {
|
||||
block << line
|
||||
}
|
||||
kind, name := cw.parse_header(line2, linenr)!
|
||||
if kind == 'END' {
|
||||
if filename == '' {
|
||||
if had_any_block {
|
||||
cw.error("Filename 'END' is reserved.", linenr, 'parse', true)!
|
||||
} else {
|
||||
block << line
|
||||
cw.error('END found at start, not good.', linenr, 'parse', true)!
|
||||
}
|
||||
} else {
|
||||
if current_kind == 'FILE' || current_kind == 'LEGACY' {
|
||||
fm.content[filename] = block.join_lines()
|
||||
} else if current_kind == 'FILECHANGE' {
|
||||
fm.content_change[filename] = block.join_lines()
|
||||
}
|
||||
filename = ''
|
||||
block = []string{}
|
||||
current_kind = ''
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if kind in ['FILE', 'FILECHANGE', 'LEGACY'] {
|
||||
// starting a new block header
|
||||
if filename != '' {
|
||||
if current_kind == 'FILE' || current_kind == 'LEGACY' {
|
||||
fm.content[filename] = block.join_lines()
|
||||
} else if current_kind == 'FILECHANGE' {
|
||||
fm.content_change[filename] = block.join_lines()
|
||||
}
|
||||
}
|
||||
filename = name
|
||||
current_kind = kind
|
||||
block = []string{}
|
||||
had_any_block = true
|
||||
continue
|
||||
}
|
||||
|
||||
// Non-header line
|
||||
if filename == '' {
|
||||
if line2.len > 0 {
|
||||
cw.error("Unexpected content before first file block: '${line}'.", linenr,
|
||||
'parse', false)!
|
||||
}
|
||||
} else {
|
||||
block << line
|
||||
}
|
||||
}
|
||||
|
||||
if state == .in_block && filename != '' {
|
||||
fm.content[filename] = block.join_lines()
|
||||
// EOF: flush current block if any
|
||||
if filename != '' {
|
||||
if current_kind == 'FILE' || current_kind == 'LEGACY' {
|
||||
fm.content[filename] = block.join_lines()
|
||||
} else if current_kind == 'FILECHANGE' {
|
||||
fm.content_change[filename] = block.join_lines()
|
||||
}
|
||||
}
|
||||
|
||||
return fm
|
||||
|
||||
@@ -5,7 +5,7 @@ import freeflowuniverse.herolib.core.pathlib
|
||||
|
||||
fn test_parse_basic() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\nline1\nline2\n===END==='
|
||||
test_content := '===FILE:file1.txt===\nline1\nline2\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 1
|
||||
assert fm.content['file1.txt'] == 'line1\nline2'
|
||||
@@ -13,7 +13,7 @@ fn test_parse_basic() {
|
||||
|
||||
fn test_parse_multiple_files() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\nline1\n===file2.txt===\nlineA\nlineB\n===END==='
|
||||
test_content := '===FILE:file1.txt===\nline1\n===FILE:file2.txt===\nlineA\nlineB\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 2
|
||||
assert fm.content['file1.txt'] == 'line1'
|
||||
@@ -22,7 +22,7 @@ fn test_parse_multiple_files() {
|
||||
|
||||
fn test_parse_empty_file_block() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===empty.txt===\n===END==='
|
||||
test_content := '===FILE:empty.txt===\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 1
|
||||
assert fm.content['empty.txt'] == ''
|
||||
@@ -30,7 +30,7 @@ fn test_parse_empty_file_block() {
|
||||
|
||||
fn test_parse_consecutive_end_and_file() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\ncontent1\n===END===\n===file2.txt===\ncontent2\n===END==='
|
||||
test_content := '===FILE:file1.txt===\ncontent1\n===END===\n===FILE:file2.txt===\ncontent2\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 2
|
||||
assert fm.content['file1.txt'] == 'content1'
|
||||
@@ -39,7 +39,7 @@ fn test_parse_consecutive_end_and_file() {
|
||||
|
||||
fn test_parse_content_before_first_file_block() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := 'unexpected content\n===file1.txt===\ncontent\n===END==='
|
||||
test_content := 'unexpected content\n===FILE:file1.txt===\ncontent\n===END==='
|
||||
// This should ideally log an error but still parse the file
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 1
|
||||
@@ -50,29 +50,26 @@ fn test_parse_content_before_first_file_block() {
|
||||
|
||||
fn test_parse_content_after_end() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\ncontent\n===END===\nmore unexpected content'
|
||||
// This should ideally log an error but still parse the file up to END
|
||||
test_content := '===FILE:file1.txt===\ncontent\n===END===\nmore unexpected content'
|
||||
// Implementation chooses to ignore content after END but return parsed content
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 1
|
||||
assert fm.content['file1.txt'] == 'content'
|
||||
assert cw.errors.len > 0
|
||||
assert cw.errors[0].message.contains('Unexpected content after ===END===')
|
||||
}
|
||||
|
||||
fn test_parse_invalid_filename_line() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '=== ===\ncontent\n===END==='
|
||||
res := cw.parse(test_content)
|
||||
if res is error {
|
||||
assert res.msg.contains('Invalid filename, < 2 chars')
|
||||
} else {
|
||||
assert false // Should have errored
|
||||
test_content := '======\ncontent\n===END==='
|
||||
cw.parse(test_content) or {
|
||||
assert err.msg().contains('Invalid filename, < 1 chars')
|
||||
return
|
||||
}
|
||||
assert false // Should have errored
|
||||
}
|
||||
|
||||
fn test_parse_file_ending_without_end() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\nline1\nline2'
|
||||
test_content := '===FILE:file1.txt===\nline1\nline2'
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 1
|
||||
assert fm.content['file1.txt'] == 'line1\nline2'
|
||||
@@ -88,17 +85,26 @@ fn test_parse_empty_content() {
|
||||
fn test_parse_only_end_at_start() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===END==='
|
||||
res := cw.parse(test_content)
|
||||
if res is error {
|
||||
assert res.msg.contains('END found at start, not good.')
|
||||
} else {
|
||||
assert false // Should have errored
|
||||
cw.parse(test_content) or {
|
||||
assert err.msg().contains('END found at start, not good.')
|
||||
return
|
||||
}
|
||||
assert false // Should have errored
|
||||
}
|
||||
|
||||
fn test_parse_mixed_file_and_filechange() {
|
||||
mut cw2 := new(CodeWalkerArgs{})!
|
||||
test_content2 := '===FILE:file.txt===\nfull\n===FILECHANGE:file.txt===\npartial\n===END==='
|
||||
fm2 := cw2.parse(test_content2)!
|
||||
assert fm2.content.len == 1
|
||||
assert fm2.content_change.len == 1
|
||||
assert fm2.content['file.txt'] == 'full'
|
||||
assert fm2.content_change['file.txt'] == 'partial'
|
||||
}
|
||||
|
||||
fn test_parse_empty_block_between_files() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\ncontent1\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
|
||||
test_content := '===FILE:file1.txt===\ncontent1\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 3
|
||||
assert fm.content['file1.txt'] == 'content1'
|
||||
@@ -108,7 +114,7 @@ fn test_parse_empty_block_between_files() {
|
||||
|
||||
fn test_parse_multiple_empty_blocks() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
test_content := '===file1.txt===\n===END===\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
|
||||
test_content := '===FILE:file1.txt===\n===END===\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
|
||||
fm := cw.parse(test_content)!
|
||||
assert fm.content.len == 3
|
||||
assert fm.content['file1.txt'] == ''
|
||||
@@ -118,11 +124,130 @@ fn test_parse_multiple_empty_blocks() {
|
||||
|
||||
fn test_parse_filename_end_reserved() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
// Legacy header 'END' used as filename should error when used as header for new block
|
||||
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
|
||||
res := cw.parse(test_content)
|
||||
if res is error {
|
||||
assert res.msg.contains('Filename \'END\' is reserved.')
|
||||
} else {
|
||||
assert false // Should have errored
|
||||
cw.parse(test_content) or {
|
||||
assert err.msg().contains("Filename 'END' is reserved.")
|
||||
return
|
||||
}
|
||||
assert false // Should have errored
|
||||
}
|
||||
|
||||
fn test_filemap_export_and_write() ! {
|
||||
// Setup temp dir
|
||||
mut tmpdir := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'cw_test')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
defer {
|
||||
tmpdir.delete() or {}
|
||||
}
|
||||
// Build a FileMap
|
||||
mut fm := FileMap{
|
||||
source: tmpdir.path
|
||||
}
|
||||
fm.set('a/b.txt', 'hello')
|
||||
fm.set('c.txt', 'world')
|
||||
// Export to new dir
|
||||
mut dest := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'cw_out')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
defer {
|
||||
dest.delete() or {}
|
||||
}
|
||||
fm.export(dest.path)!
|
||||
mut f1 := pathlib.get_file(path: os.join_path(dest.path, 'a/b.txt'))!
|
||||
mut f2 := pathlib.get_file(path: os.join_path(dest.path, 'c.txt'))!
|
||||
assert f1.read()! == 'hello'
|
||||
assert f2.read()! == 'world'
|
||||
// Overwrite via write()
|
||||
fm.set('a/b.txt', 'hello2')
|
||||
fm.write(dest.path)!
|
||||
assert f1.read()! == 'hello2'
|
||||
}
|
||||
|
||||
fn test_filemap_content_roundtrip() {
|
||||
mut fm := FileMap{}
|
||||
fm.set('x.txt', 'X')
|
||||
fm.content_change['y.txt'] = 'Y'
|
||||
txt := fm.content()
|
||||
assert txt.contains('===FILE:x.txt===')
|
||||
assert txt.contains('===FILECHANGE:y.txt===')
|
||||
assert txt.contains('===END===')
|
||||
}
|
||||
|
||||
fn test_ignore_level_scoped() ! {
|
||||
// create temp dir structure
|
||||
mut root := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'cw_ign_lvl')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
defer { root.delete() or {} }
|
||||
// subdir with its own ignore
|
||||
mut sub := pathlib.get_dir(path: os.join_path(root.path, 'sub'), create: true)!
|
||||
mut hero := pathlib.get_file(path: os.join_path(sub.path, '.heroignore'), create: true)!
|
||||
hero.write('dist/\n')!
|
||||
// files under sub/dist should be ignored
|
||||
mut dist := pathlib.get_dir(path: os.join_path(sub.path, 'dist'), create: true)!
|
||||
mut a1 := pathlib.get_file(path: os.join_path(dist.path, 'a.txt'), create: true)!
|
||||
a1.write('A')!
|
||||
// sibling sub2 with a dist, should NOT be ignored by sub's .heroignore
|
||||
mut sub2 := pathlib.get_dir(path: os.join_path(root.path, 'sub2'), create: true)!
|
||||
mut dist2 := pathlib.get_dir(path: os.join_path(sub2.path, 'dist'), create: true)!
|
||||
mut b1 := pathlib.get_file(path: os.join_path(dist2.path, 'b.txt'), create: true)!
|
||||
b1.write('B')!
|
||||
// a normal file under sub should be included
|
||||
mut okf := pathlib.get_file(path: os.join_path(sub.path, 'ok.txt'), create: true)!
|
||||
okf.write('OK')!
|
||||
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
mut fm := cw.filemap_get(path: root.path)!
|
||||
|
||||
// sub/dist/a.txt should be ignored
|
||||
assert 'sub/dist/a.txt' !in fm.content.keys()
|
||||
// sub/ok.txt should be included
|
||||
assert fm.content['sub/ok.txt'] == 'OK'
|
||||
// sub2/dist/b.txt should be included (since .heroignore is level-scoped)
|
||||
assert fm.content['sub2/dist/b.txt'] == 'B'
|
||||
}
|
||||
|
||||
fn test_ignore_level_scoped_gitignore() ! {
|
||||
mut root := pathlib.get_dir(
|
||||
path: os.join_path(os.temp_dir(), 'cw_ign_git')
|
||||
create: true
|
||||
empty: true
|
||||
)!
|
||||
defer { root.delete() or {} }
|
||||
// root has .gitignore ignoring logs/
|
||||
mut g := pathlib.get_file(path: os.join_path(root.path, '.gitignore'), create: true)!
|
||||
g.write('logs/\n')!
|
||||
// nested structure
|
||||
mut svc := pathlib.get_dir(path: os.join_path(root.path, 'svc'), create: true)!
|
||||
// this logs/ should be ignored due to root .gitignore
|
||||
mut logs := pathlib.get_dir(path: os.join_path(svc.path, 'logs'), create: true)!
|
||||
mut out := pathlib.get_file(path: os.join_path(logs.path, 'out.txt'), create: true)!
|
||||
out.write('ignored')!
|
||||
// regular file should be included
|
||||
mut appf := pathlib.get_file(path: os.join_path(svc.path, 'app.txt'), create: true)!
|
||||
appf.write('app')!
|
||||
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
mut fm := cw.filemap_get(path: root.path)!
|
||||
assert 'svc/logs/out.txt' !in fm.content.keys()
|
||||
assert fm.content['svc/app.txt'] == 'app'
|
||||
}
|
||||
|
||||
fn test_parse_filename_end_reserved_legacy() {
|
||||
mut cw := new(CodeWalkerArgs{})!
|
||||
// Legacy header 'END' used as filename should error when used as header for new block
|
||||
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
|
||||
cw.parse(test_content) or {
|
||||
assert err.msg().contains("Filename 'END' is reserved.")
|
||||
return
|
||||
}
|
||||
assert false // Should have errored
|
||||
}
|
||||
@@ -1,19 +1,12 @@
|
||||
module codewalker
|
||||
|
||||
|
||||
@[params]
|
||||
pub struct CodeWalkerArgs {
|
||||
source string //content we will send to an LLM, starting from a dir
|
||||
content string //content as returned from LLM
|
||||
// No fields required for now; kept for API stability
|
||||
}
|
||||
|
||||
pub fn new(args CodeWalkerArgs) !CodeWalker {
|
||||
mut cw := CodeWalker{
|
||||
source: args.source
|
||||
}
|
||||
|
||||
// Load default gitignore patterns
|
||||
cw.gitignore_patterns = cw.default_gitignore()
|
||||
|
||||
mut cw := CodeWalker{}
|
||||
cw.ignorematcher = gitignore_matcher_new()
|
||||
return cw
|
||||
}
|
||||
|
||||
@@ -4,14 +4,14 @@ import freeflowuniverse.herolib.core.pathlib
|
||||
|
||||
pub struct FileMap {
|
||||
pub mut:
|
||||
source string
|
||||
content map[string]string
|
||||
source string
|
||||
content map[string]string
|
||||
content_change map[string]string
|
||||
errors []FMError
|
||||
errors []FMError
|
||||
}
|
||||
|
||||
pub fn (mut fm FileMap) content()string {
|
||||
mut out:= []string{}
|
||||
pub fn (mut fm FileMap) content() string {
|
||||
mut out := []string{}
|
||||
for filepath, filecontent in fm.content {
|
||||
out << '===FILE:${filepath}==='
|
||||
out << filecontent
|
||||
@@ -22,44 +22,36 @@ pub fn (mut fm FileMap) content()string {
|
||||
}
|
||||
out << '===END==='
|
||||
return out.join_lines()
|
||||
|
||||
}
|
||||
|
||||
|
||||
//write in new location, all will be overwritten, will only work with full files, not chanages
|
||||
pub fn (mut fm FileMap) export(path string)! {
|
||||
// write in new location, all will be overwritten, will only work with full files, not changes
|
||||
pub fn (mut fm FileMap) export(path string) ! {
|
||||
for filepath, filecontent in fm.content {
|
||||
dest := "${fm.source}/${filepath}"
|
||||
mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
|
||||
dest := '${path}/${filepath}'
|
||||
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
|
||||
filepathtowrite.write(filecontent)!
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@[PARAMS]
|
||||
pub struct WriteParams {
|
||||
path string
|
||||
v_test bool = true
|
||||
v_format bool = true
|
||||
path string
|
||||
v_test bool = true
|
||||
v_format bool = true
|
||||
python_test bool
|
||||
}
|
||||
|
||||
//update the files as found in the folder and update them or create
|
||||
pub fn (mut fm FileMap) write(path string)! {
|
||||
// update the files as found in the folder and update them or create
|
||||
pub fn (mut fm FileMap) write(path string) ! {
|
||||
for filepath, filecontent in fm.content {
|
||||
dest := "${fm.source}/${filepath}"
|
||||
//TODO check ends with .v or .py if v_test or python_test active then call python
|
||||
//or v to check format of the file so we don't write broken code
|
||||
//we first write in a temporary location $filename__.v and then test
|
||||
//if good then overwrite $filename.v
|
||||
mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
|
||||
dest := '${path}/${filepath}'
|
||||
// In future: validate language-specific formatting/tests before overwrite
|
||||
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
|
||||
filepathtowrite.write(filecontent)!
|
||||
}
|
||||
//TODO: phase 2, work with morphe to integrate change in the file
|
||||
// TODO: phase 2, work with morphe to integrate change in the file
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn (fm FileMap) get(relpath string) !string {
|
||||
return fm.content[relpath] or { return error('File not found: ${relpath}') }
|
||||
}
|
||||
|
||||
@@ -1,88 +1,118 @@
|
||||
module codewalker
|
||||
|
||||
// A minimal gitignore-like matcher used by CodeWalker
|
||||
// Supports:
|
||||
// - Directory patterns ending with '/': ignores any path that has this segment prefix
|
||||
// - Extension patterns like '*.pyc' or '*.<ext>'
|
||||
// - Simple substrings and '*' wildcards
|
||||
// - Lines starting with '#' are comments; empty lines ignored
|
||||
// No negation support for simplicity
|
||||
|
||||
const default_gitignore := '
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.env
|
||||
.venv
|
||||
venv/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coveragerc
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.gem
|
||||
*.pyc
|
||||
.cache
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.hypothesis/
|
||||
'
|
||||
const default_gitignore = '__pycache__/\n*.py[cod]\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n.env\n.venv\nvenv/\n.tox/\n.nox/\n.coverage\n.coveragerc\ncoverage.xml\n*.cover\n*.gem\n*.pyc\n.cache\n.pytest_cache/\n.mypy_cache/\n.hypothesis/\n'
|
||||
|
||||
struct IgnoreRule {
|
||||
base string // relative dir from source root where the ignore file lives ('' means global)
|
||||
pattern string
|
||||
}
|
||||
|
||||
//responsible to help us to find if a file matches or not
|
||||
pub struct IgnoreMatcher {
|
||||
pub mut:
|
||||
items map[string]Ignore //the key is the path where the gitignore plays
|
||||
rules []IgnoreRule
|
||||
}
|
||||
|
||||
pub struct Ignore {
|
||||
pub mut:
|
||||
patterns map[string]string
|
||||
pub fn gitignore_matcher_new() IgnoreMatcher {
|
||||
mut m := IgnoreMatcher{}
|
||||
m.add_content(default_gitignore)
|
||||
return m
|
||||
}
|
||||
|
||||
// Add raw .gitignore-style content as global (root-scoped) rules
|
||||
pub fn (mut m IgnoreMatcher) add_content(content string) {
|
||||
m.add_content_with_base('', content)
|
||||
}
|
||||
|
||||
pub fn (mut self Ignore) add(content string) ! {
|
||||
for line in content.split_into_lines() {
|
||||
line = line.trim_space()
|
||||
if line.len == 0 {
|
||||
// Add raw .gitignore/.heroignore-style content scoped to base_rel
|
||||
pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
|
||||
mut base := base_rel.replace('\\', '/').trim('/').to_lower()
|
||||
for raw_line in content.split_into_lines() {
|
||||
mut line := raw_line.trim_space()
|
||||
if line.len == 0 || line.starts_with('#') {
|
||||
continue
|
||||
}
|
||||
self.patterns[line] = line
|
||||
m.rules << IgnoreRule{
|
||||
base: base
|
||||
pattern: line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn (mut self Ignore) check(path string) !bool {
|
||||
return false //TODO
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn gitignore_matcher_new() !IgnoreMatcher {
|
||||
mut matcher := IgnoreMatcher{}
|
||||
gitignore.add(default_gitignore)!
|
||||
matcher.patterns['.gitignore'] = gitignore
|
||||
return matcher
|
||||
|
||||
}
|
||||
|
||||
//add content to path of gitignore
|
||||
pub fn (mut self IgnoreMatcher) add(path string, content string) ! {
|
||||
self.items[path] = Ignore{}
|
||||
self.items[path].add(content)!
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn (mut self IgnoreMatcher) check(path string) !bool {
|
||||
return false //TODO here figure out which gitignores apply to the given path and check them all
|
||||
// Very simple glob/substring-based matching with directory scoping
|
||||
pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
|
||||
mut path := relpath.replace('\\', '/').trim_left('/')
|
||||
path_low := path.to_lower()
|
||||
for rule in m.rules {
|
||||
mut pat := rule.pattern.replace('\\', '/').trim_space()
|
||||
if pat == '' {
|
||||
continue
|
||||
}
|
||||
|
||||
// Determine subpath relative to base
|
||||
mut sub := path_low
|
||||
if rule.base != '' {
|
||||
base := rule.base
|
||||
if sub == base {
|
||||
// path equals the base dir; ignore rules apply to entries under base, not the base itself
|
||||
continue
|
||||
}
|
||||
if sub.starts_with(base + '/') {
|
||||
sub = sub[(base.len + 1)..]
|
||||
} else {
|
||||
continue // rule not applicable for this path
|
||||
}
|
||||
}
|
||||
|
||||
// Directory pattern (relative to base)
|
||||
if pat.ends_with('/') {
|
||||
mut dirpat := pat.trim_right('/')
|
||||
dirpat = dirpat.trim_left('/').to_lower()
|
||||
if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
|
||||
return true
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Extension pattern *.ext
|
||||
if pat.starts_with('*.') {
|
||||
ext := pat.all_after_last('.').to_lower()
|
||||
if sub.ends_with('.' + ext) {
|
||||
return true
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Simple wildcard * anywhere -> sequential contains match
|
||||
if pat.contains('*') {
|
||||
mut parts := pat.to_lower().split('*')
|
||||
mut idx := 0
|
||||
mut ok := true
|
||||
for part in parts {
|
||||
if part == '' {
|
||||
continue
|
||||
}
|
||||
pos := sub.index_after(part, idx) or { -1 }
|
||||
if pos == -1 {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
idx = pos + part.len
|
||||
}
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Fallback: substring match (case-insensitive) on subpath
|
||||
if sub.contains(pat.to_lower()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user