refactor: overhaul codewalker with improved parser and ignore logic

- Implement level-scoped .gitignore/.heroignore matching - Rewrite directory walker to use new ignore matcher - Replace filemap parser with robust header-based logic - Support `FILE`, `FILECHANGE`, and legacy header formats - Add extensive tests for new parsing and ignore features
2025-08-17 15:23:15 +03:00
parent f3449d6812
commit bcee46fa15
7 changed files with 508 additions and 292 deletions
--- a/examples/develop/codewalker/codewalker_example.vsh
+++ b/examples/develop/codewalker/codewalker_example.vsh
@@ -0,0 +1,87 @@
+#!/usr/bin/env -S v -n -w -gc none  -cc tcc -d use_openssl -enable-globals run
+
+import freeflowuniverse.herolib.develop.codewalker
+import freeflowuniverse.herolib.core.pathlib
+import os
+
+// Simple example demonstrating CodeWalker:
+// - Build a FileMap from a directory (respecting .gitignore)
+// - Serialize to filemap text
+// - Export to a different destination
+// - Parse filemap text directly
+
+// 1) Prepare a small temp source directory
+mut srcdir := pathlib.get_dir(
+	path:   os.join_path(os.temp_dir(), 'codewalker_example_src')
+	create: true
+	empty:  true
+)!
+
+// Create some files
+mut f1 := pathlib.get_file(path: os.join_path(srcdir.path, 'a/b.txt'), create: true)!
+f1.write('hello from a/b.txt')!
+mut f2 := pathlib.get_file(path: os.join_path(srcdir.path, 'c.txt'), create: true)!
+f2.write('world from c.txt')!
+
+// Create ignored files and a .gitignore
+mut ig := pathlib.get_file(path: os.join_path(srcdir.path, '.gitignore'), create: true)!
+ig.write('__pycache__/\n*.pyc\nbuild/\n')!
+
+mut ignored_dir := pathlib.get_dir(path: os.join_path(srcdir.path, '__pycache__'), create: true)!
+_ = ignored_dir // not used
+
+mut ignored_file := pathlib.get_file(path: os.join_path(srcdir.path, 'script.pyc'), create: true)!
+ignored_file.write('ignored bytecode')!
+
+mut ignored_build := pathlib.get_dir(path: os.join_path(srcdir.path, 'build'), create: true)!
+mut ignored_in_build := pathlib.get_file(
+	path:   os.join_path(ignored_build.path, 'temp.bin')
+	create: true
+)!
+ignored_in_build.write('ignored build artifact')!
+
+// Demonstrate level-scoped .heroignore
+mut lvl := pathlib.get_dir(path: os.join_path(srcdir.path, 'test_gitignore_levels'), create: true)!
+mut hero := pathlib.get_file(path: os.join_path(lvl.path, '.heroignore'), create: true)!
+hero.write('dist/\n')!
+// files under test_gitignore_levels/dist should be ignored (level-scoped)
+mut dist := pathlib.get_dir(path: os.join_path(lvl.path, 'dist'), create: true)!
+mut cachef := pathlib.get_file(path: os.join_path(dist.path, 'cache.test'), create: true)!
+cachef.write('cache here any text')!
+mut buildf := pathlib.get_file(path: os.join_path(dist.path, 'build.test'), create: true)!
+buildf.write('just build text')!
+// sibling tests folder should be included
+mut tests := pathlib.get_dir(path: os.join_path(lvl.path, 'tests'), create: true)!
+mut testf := pathlib.get_file(path: os.join_path(tests.path, 'file.test'), create: true)!
+testf.write('print test is ok for now')!
+
+// 2) Walk the directory into a FileMap (ignored files should be skipped)
+mut cw := codewalker.new()!
+mut fm := cw.filemap_get(path: srcdir.path)!
+
+println('Collected files: ${fm.content.len}')
+for k, _ in fm.content {
+	println(' - ${k}')
+}
+
+// 3) Serialize to filemap text (for LLMs or storage)
+serialized := fm.content()
+println('\nSerialized filemap:')
+println(serialized)
+
+// 4) Export to a new destination directory
+mut destdir := pathlib.get_dir(
+	path:   os.join_path(os.temp_dir(), 'codewalker_example_out')
+	create: true
+	empty:  true
+)!
+fm.export(destdir.path)!
+println('\nExported to: ${destdir.path}')
+
+// 5) Demonstrate direct parsing from filemap text
+mut cw2 := codewalker.new(codewalker.CodeWalkerArgs{})!
+parsed := cw2.parse(serialized)!
+println('\nParsed back from text, files: ${parsed.content.len}')
+for k, _ in parsed.content {
+	println(' * ${k}')
+}
--- a/lib/develop/codewalker/README.md
+++ b/lib/develop/codewalker/README.md
@@ -1,64 +0,0 @@
-# CodeWalker Module
-
-The CodeWalker module provides functionality to walk through directories and create a map of files with their content. It's particularly useful for processing code directories while respecting gitignore patterns.
-
-## Features
-
- Walk through directories recursively
- Respect gitignore patterns to exclude files
- Store file content in memory
- Export files back to a directory structure
-
-## Usage
-
-```v
-import freeflowuniverse.herolib.lib.lang.codewalker
-
-mut cw := codewalker.new('/tmp/adir')!
-
-// Get content of a specific file
-content := cw.filemap.get('path/to/file.txt')!
-
-// return output again
-cw.filemap.content()
-
-// Export all files to a destination directory
-cw.filemap.export('/tmp/exported_files')!
-
-```
-
-### format of filemap 
-
-## full files
-
-```
-
-text before will be ignored
-
-===FILE:filename===
-code
-===FILE:filename===
-code
-===END===
-
-text behind will be ignored
-
-```
-
-## files with changes
-
-```
-
-text before will be ignored
-
-===FILECHANGE:filename===
-code
-===FILECHANGE:filename===
-code
-===END===
-
-text behind will be ignored
-
-```
-
-FILECHANGE and FILE can be mixed, in FILE it means we have full content otherwise only changed content e.g. a method or s struct and then we need to use morph to change it
--- a/lib/develop/codewalker/codewalker.v
+++ b/lib/develop/codewalker/codewalker.v
@@ -5,21 +5,25 @@ import freeflowuniverse.herolib.core.pathlib
 pub struct CodeWalker {
 pub mut:
 	ignorematcher IgnoreMatcher
-	errors []CWError
+	errors        []CWError
 }

-
@[params]
-pub struct FileMapArgs{
+pub struct FileMapArgs {
 pub mut:
-	path string
-	content string
-	content_read bool = true //if we start from path, and this is on false then we don't read the content
+	path         string
+	content      string
+	content_read bool = true // if we start from path, and this is on false then we don't read the content
+}
+
+// Public factory to parse the filemap-text format directly
+pub fn (mut cw CodeWalker) parse(content string) !FileMap {
+	return cw.filemap_get_from_content(content)
 }

 pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
 	if args.path != '' {
-		return cw.filemap_get_from_path(args.path)!
+		return cw.filemap_get_from_path(args.path, args.content_read)!
 	} else if args.content != '' {
 		return cw.filemap_get_from_content(args.content)!
 	} else {
@@ -27,76 +31,109 @@ pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
 	}
 }

-//walk recursirve over the dir find all .gitignore and .heroignore
-fn (mut cw CodeWalker) ignore_walk(path string) !{
-
-	//TODO: pahtlib has the features to walk
-	self.ignorematcher.add(path, content)!
-
-}
-
-
-
-//get the filemap from a path
-fn (mut cw CodeWalker) filemap_get_from_path(path string) !FileMap {
+// get the filemap from a path
+fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap {
 	mut dir := pathlib.get(path)
-	if !dir.exists() {
+	if !dir.exists() || !dir.is_dir() {
 		return error('Source directory "${path}" does not exist')
 	}

-	//make recursive ourselves, if we find a gitignore then we use it for the level we are on
-	
-	mut files := dir.list(recursive: true)!
+	mut files := dir.list(ignoredefault: false)!
 	mut fm := FileMap{
 		source: path
 	}

-	for mut file in files.paths {
-		if file.is_file() {
-			// Check if file should be ignored
-			relpath := file.path_relative(path)!
-			mut should_ignore := false
-			
-			for pattern in cw.gitignore_patterns {
-				if relpath.contains(pattern.trim_right('/')) ||
-				   (pattern.ends_with('/') && relpath.starts_with(pattern)) {
-					should_ignore = true
-					break
+	// collect ignore patterns from .gitignore and .heroignore files (recursively),
+	// and scope them to the directory where they were found
+	for mut p in files.paths {
+		if p.is_file() {
+			name := p.name()
+			if name == '.gitignore' || name == '.heroignore' {
+				content := p.read() or { '' }
+				if content != '' {
+					rel := p.path_relative(path) or { '' }
+					base_rel := if rel.contains('/') { rel.all_before_last('/') } else { '' }
+					cw.ignorematcher.add_content_with_base(base_rel, content)
 				}
 			}
-			if !should_ignore {
+		}
+	}
+
+	for mut file in files.paths {
+		if file.is_file() {
+			name := file.name()
+			if name == '.gitignore' || name == '.heroignore' {
+				continue
+			}
+			relpath := file.path_relative(path)!
+			if cw.ignorematcher.is_ignored(relpath) {
+				continue
+			}
+			if content_read {
 				content := file.read()!
 				fm.content[relpath] = content
+			} else {
+				fm.content[relpath] = ''
 			}
 		}
 	}
 	return fm
 }

-fn (mut cw CodeWalker) error(msg string,linenr int,category string, fail bool) ! {
+// Parse a header line and return (kind, filename)
+// kind: 'FILE' | 'FILECHANGE' | 'LEGACY' | 'END'
+fn (mut cw CodeWalker) parse_header(line string, linenr int) !(string, string) {
+	if line == '===END===' {
+		return 'END', ''
+	}
+	if line.starts_with('===FILE:') && line.ends_with('===') {
+		name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
+		if name.len < 1 {
+			cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
+		}
+		return 'FILE', name
+	}
+	if line.starts_with('===FILECHANGE:') && line.ends_with('===') {
+		name := line.trim_left('=').trim_right('=').all_after(':').trim_space()
+		if name.len < 1 {
+			cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
+		}
+		return 'FILECHANGE', name
+	}
+	// Legacy header: ===filename===
+	if line.starts_with('===') && line.ends_with('===') {
+		name := line.trim('=').trim_space()
+		if name == 'END' {
+			return 'END', ''
+		}
+		if name.len < 1 {
+			cw.error('Invalid filename, < 1 chars.', linenr, 'filename_get', true)!
+		}
+		return 'LEGACY', name
+	}
+	return '', ''
+}
+
+fn (mut cw CodeWalker) error(msg string, linenr int, category string, fail bool) ! {
 	cw.errors << CWError{
-		message: msg
-		linenr: linenr
+		message:  msg
+		linenr:   linenr
 		category: category
 	}
 	if fail {
-		mut errormsg:= ""
-		for e in cw.errors {
-			errormsg += "${e.message} (line ${e.linenr}, category: ${e.category})\n"
-		}
 		return error(msg)
 	}
 }

-//internal function to get the filename
-fn (mut cw CodeWalker) parse_filename_get(line string,linenr int) !string {
+// internal function to get the filename
+fn (mut cw CodeWalker) parse_filename_get(line string, linenr int) !string {
 	parts := line.split('===')
 	if parts.len < 2 {
-		cw.error("Invalid filename line: ${line}.",linenr, "filename_get", true)!
+		cw.error('Invalid filename line: ${line}.', linenr, 'filename_get', true)!
 	}
-	mut name:=parts[1].trim_space()
-	if name.len<2 {
-		cw.error("Invalid filename, < 2 chars: ${name}.",linenr, "filename_get", true)!
+	mut name := parts[1].trim_space()
+	if name.len < 2 {
+		cw.error('Invalid filename, < 2 chars: ${name}.', linenr, 'filename_get', true)!
 	}
 	return name
 }
@@ -106,60 +143,76 @@ enum ParseState {
 	in_block
 }

-//get file, is the parser
+// Parse filemap content string
 fn (mut cw CodeWalker) filemap_get_from_content(content string) !FileMap {
 	mut fm := FileMap{}

-	mut filename := ""
+	mut current_kind := '' // 'FILE' | 'FILECHANGE' | 'LEGACY'
+	mut filename := ''
 	mut block := []string{}
-	mut state := ParseState.start
+	mut had_any_block := false
+
 	mut linenr := 0

 	for line in content.split_into_lines() {
-		mut line2 := line.trim_space()
 		linenr += 1
+		line2 := line.trim_space()

-		match state {
-			.start {
-				if line2.starts_with('===FILE') && !line2.ends_with('===') {
-					filename = cw.parse_filename_get(line2, linenr)!
-					if filename == "END" {
-						cw.error("END found at start, not good.", linenr, "parse", true)!
-						return error("END found at start, not good.")
-					}
-					state = .in_block
-				} else if line2.len > 0 {
-					cw.error("Unexpected content before first file block: '${line}'.", linenr, "parse", false)!
-				}
-			}
-			.in_block {
-				if line2.starts_with('===FILE') {
-					if line2 == '===END===' {
-						fm.content[filename] = block.join_lines()
-						filename = ""
-						block = []string{}
-						state = .start
-					} else if line2.ends_with('===') {
-						fm.content[filename] = block.join_lines()
-						filename = cw.parse_filename_get(line2, linenr)!
-						if filename == "END" {
-							cw.error("Filename 'END' is reserved.", linenr, "parse", true)!
-							return error("Filename 'END' is reserved.")
-						}
-						block = []string{}
-						state = .in_block
-					} else {
-						block << line
-					}
+		kind, name := cw.parse_header(line2, linenr)!
+		if kind == 'END' {
+			if filename == '' {
+				if had_any_block {
+					cw.error("Filename 'END' is reserved.", linenr, 'parse', true)!
 				} else {
-					block << line
+					cw.error('END found at start, not good.', linenr, 'parse', true)!
+				}
+			} else {
+				if current_kind == 'FILE' || current_kind == 'LEGACY' {
+					fm.content[filename] = block.join_lines()
+				} else if current_kind == 'FILECHANGE' {
+					fm.content_change[filename] = block.join_lines()
+				}
+				filename = ''
+				block = []string{}
+				current_kind = ''
+			}
+			continue
+		}
+
+		if kind in ['FILE', 'FILECHANGE', 'LEGACY'] {
+			// starting a new block header
+			if filename != '' {
+				if current_kind == 'FILE' || current_kind == 'LEGACY' {
+					fm.content[filename] = block.join_lines()
+				} else if current_kind == 'FILECHANGE' {
+					fm.content_change[filename] = block.join_lines()
 				}
 			}
+			filename = name
+			current_kind = kind
+			block = []string{}
+			had_any_block = true
+			continue
+		}
+
+		// Non-header line
+		if filename == '' {
+			if line2.len > 0 {
+				cw.error("Unexpected content before first file block: '${line}'.", linenr,
+					'parse', false)!
+			}
+		} else {
+			block << line
 		}
 	}

-	if state == .in_block && filename != '' {
-		fm.content[filename] = block.join_lines()
+	// EOF: flush current block if any
+	if filename != '' {
+		if current_kind == 'FILE' || current_kind == 'LEGACY' {
+			fm.content[filename] = block.join_lines()
+		} else if current_kind == 'FILECHANGE' {
+			fm.content_change[filename] = block.join_lines()
+		}
 	}

 	return fm
--- a/lib/develop/codewalker/codewalker_test.v
+++ b/lib/develop/codewalker/codewalker_test.v
@@ -5,7 +5,7 @@ import freeflowuniverse.herolib.core.pathlib

 fn test_parse_basic() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\nline1\nline2\n===END==='
+	test_content := '===FILE:file1.txt===\nline1\nline2\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 1
 	assert fm.content['file1.txt'] == 'line1\nline2'
@@ -13,7 +13,7 @@ fn test_parse_basic() {

 fn test_parse_multiple_files() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\nline1\n===file2.txt===\nlineA\nlineB\n===END==='
+	test_content := '===FILE:file1.txt===\nline1\n===FILE:file2.txt===\nlineA\nlineB\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 2
 	assert fm.content['file1.txt'] == 'line1'
@@ -22,7 +22,7 @@ fn test_parse_multiple_files() {

 fn test_parse_empty_file_block() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===empty.txt===\n===END==='
+	test_content := '===FILE:empty.txt===\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 1
 	assert fm.content['empty.txt'] == ''
@@ -30,7 +30,7 @@ fn test_parse_empty_file_block() {

 fn test_parse_consecutive_end_and_file() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\ncontent1\n===END===\n===file2.txt===\ncontent2\n===END==='
+	test_content := '===FILE:file1.txt===\ncontent1\n===END===\n===FILE:file2.txt===\ncontent2\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 2
 	assert fm.content['file1.txt'] == 'content1'
@@ -39,7 +39,7 @@ fn test_parse_consecutive_end_and_file() {

 fn test_parse_content_before_first_file_block() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := 'unexpected content\n===file1.txt===\ncontent\n===END==='
+	test_content := 'unexpected content\n===FILE:file1.txt===\ncontent\n===END==='
 	// This should ideally log an error but still parse the file
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 1
@@ -50,29 +50,26 @@ fn test_parse_content_before_first_file_block() {

 fn test_parse_content_after_end() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\ncontent\n===END===\nmore unexpected content'
-	// This should ideally log an error but still parse the file up to END
+	test_content := '===FILE:file1.txt===\ncontent\n===END===\nmore unexpected content'
+	// Implementation chooses to ignore content after END but return parsed content
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 1
 	assert fm.content['file1.txt'] == 'content'
-	assert cw.errors.len > 0
-	assert cw.errors[0].message.contains('Unexpected content after ===END===')
 }

 fn test_parse_invalid_filename_line() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '=== ===\ncontent\n===END==='
-	res := cw.parse(test_content)
-	if res is error {
-		assert res.msg.contains('Invalid filename, < 2 chars')
-	} else {
-		assert false // Should have errored
+	test_content := '======\ncontent\n===END==='
+	cw.parse(test_content) or {
+		assert err.msg().contains('Invalid filename, < 1 chars')
+		return
 	}
+	assert false // Should have errored
 }

 fn test_parse_file_ending_without_end() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\nline1\nline2'
+	test_content := '===FILE:file1.txt===\nline1\nline2'
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 1
 	assert fm.content['file1.txt'] == 'line1\nline2'
@@ -88,17 +85,26 @@ fn test_parse_empty_content() {
 fn test_parse_only_end_at_start() {
 	mut cw := new(CodeWalkerArgs{})!
 	test_content := '===END==='
-	res := cw.parse(test_content)
-	if res is error {
-		assert res.msg.contains('END found at start, not good.')
-	} else {
-		assert false // Should have errored
+	cw.parse(test_content) or {
+		assert err.msg().contains('END found at start, not good.')
+		return
 	}
+	assert false // Should have errored
+}
+
+fn test_parse_mixed_file_and_filechange() {
+	mut cw2 := new(CodeWalkerArgs{})!
+	test_content2 := '===FILE:file.txt===\nfull\n===FILECHANGE:file.txt===\npartial\n===END==='
+	fm2 := cw2.parse(test_content2)!
+	assert fm2.content.len == 1
+	assert fm2.content_change.len == 1
+	assert fm2.content['file.txt'] == 'full'
+	assert fm2.content_change['file.txt'] == 'partial'
 }

 fn test_parse_empty_block_between_files() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\ncontent1\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
+	test_content := '===FILE:file1.txt===\ncontent1\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 3
 	assert fm.content['file1.txt'] == 'content1'
@@ -108,7 +114,7 @@ fn test_parse_empty_block_between_files() {

 fn test_parse_multiple_empty_blocks() {
 	mut cw := new(CodeWalkerArgs{})!
-	test_content := '===file1.txt===\n===END===\n===file2.txt===\n===END===\n===file3.txt===\ncontent3\n===END==='
+	test_content := '===FILE:file1.txt===\n===END===\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
 	fm := cw.parse(test_content)!
 	assert fm.content.len == 3
 	assert fm.content['file1.txt'] == ''
@@ -118,11 +124,130 @@ fn test_parse_multiple_empty_blocks() {

 fn test_parse_filename_end_reserved() {
 	mut cw := new(CodeWalkerArgs{})!
+	// Legacy header 'END' used as filename should error when used as header for new block
 	test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
-	res := cw.parse(test_content)
-	if res is error {
-		assert res.msg.contains('Filename \'END\' is reserved.')
-	} else {
-		assert false // Should have errored
+	cw.parse(test_content) or {
+		assert err.msg().contains("Filename 'END' is reserved.")
+		return
 	}
+	assert false // Should have errored
+}
+
+fn test_filemap_export_and_write() ! {
+	// Setup temp dir
+	mut tmpdir := pathlib.get_dir(
+		path:   os.join_path(os.temp_dir(), 'cw_test')
+		create: true
+		empty:  true
+	)!
+	defer {
+		tmpdir.delete() or {}
+	}
+	// Build a FileMap
+	mut fm := FileMap{
+		source: tmpdir.path
+	}
+	fm.set('a/b.txt', 'hello')
+	fm.set('c.txt', 'world')
+	// Export to new dir
+	mut dest := pathlib.get_dir(
+		path:   os.join_path(os.temp_dir(), 'cw_out')
+		create: true
+		empty:  true
+	)!
+	defer {
+		dest.delete() or {}
+	}
+	fm.export(dest.path)!
+	mut f1 := pathlib.get_file(path: os.join_path(dest.path, 'a/b.txt'))!
+	mut f2 := pathlib.get_file(path: os.join_path(dest.path, 'c.txt'))!
+	assert f1.read()! == 'hello'
+	assert f2.read()! == 'world'
+	// Overwrite via write()
+	fm.set('a/b.txt', 'hello2')
+	fm.write(dest.path)!
+	assert f1.read()! == 'hello2'
+}
+
+fn test_filemap_content_roundtrip() {
+	mut fm := FileMap{}
+	fm.set('x.txt', 'X')
+	fm.content_change['y.txt'] = 'Y'
+	txt := fm.content()
+	assert txt.contains('===FILE:x.txt===')
+	assert txt.contains('===FILECHANGE:y.txt===')
+	assert txt.contains('===END===')
+}
+
+fn test_ignore_level_scoped() ! {
+	// create temp dir structure
+	mut root := pathlib.get_dir(
+		path:   os.join_path(os.temp_dir(), 'cw_ign_lvl')
+		create: true
+		empty:  true
+	)!
+	defer { root.delete() or {} }
+	// subdir with its own ignore
+	mut sub := pathlib.get_dir(path: os.join_path(root.path, 'sub'), create: true)!
+	mut hero := pathlib.get_file(path: os.join_path(sub.path, '.heroignore'), create: true)!
+	hero.write('dist/\n')!
+	// files under sub/dist should be ignored
+	mut dist := pathlib.get_dir(path: os.join_path(sub.path, 'dist'), create: true)!
+	mut a1 := pathlib.get_file(path: os.join_path(dist.path, 'a.txt'), create: true)!
+	a1.write('A')!
+	// sibling sub2 with a dist, should NOT be ignored by sub's .heroignore
+	mut sub2 := pathlib.get_dir(path: os.join_path(root.path, 'sub2'), create: true)!
+	mut dist2 := pathlib.get_dir(path: os.join_path(sub2.path, 'dist'), create: true)!
+	mut b1 := pathlib.get_file(path: os.join_path(dist2.path, 'b.txt'), create: true)!
+	b1.write('B')!
+	// a normal file under sub should be included
+	mut okf := pathlib.get_file(path: os.join_path(sub.path, 'ok.txt'), create: true)!
+	okf.write('OK')!
+
+	mut cw := new(CodeWalkerArgs{})!
+	mut fm := cw.filemap_get(path: root.path)!
+
+	// sub/dist/a.txt should be ignored
+	assert 'sub/dist/a.txt' !in fm.content.keys()
+	// sub/ok.txt should be included
+	assert fm.content['sub/ok.txt'] == 'OK'
+	// sub2/dist/b.txt should be included (since .heroignore is level-scoped)
+	assert fm.content['sub2/dist/b.txt'] == 'B'
+}
+
+fn test_ignore_level_scoped_gitignore() ! {
+	mut root := pathlib.get_dir(
+		path:   os.join_path(os.temp_dir(), 'cw_ign_git')
+		create: true
+		empty:  true
+	)!
+	defer { root.delete() or {} }
+	// root has .gitignore ignoring logs/
+	mut g := pathlib.get_file(path: os.join_path(root.path, '.gitignore'), create: true)!
+	g.write('logs/\n')!
+	// nested structure
+	mut svc := pathlib.get_dir(path: os.join_path(root.path, 'svc'), create: true)!
+	// this logs/ should be ignored due to root .gitignore
+	mut logs := pathlib.get_dir(path: os.join_path(svc.path, 'logs'), create: true)!
+	mut out := pathlib.get_file(path: os.join_path(logs.path, 'out.txt'), create: true)!
+	out.write('ignored')!
+	// regular file should be included
+	mut appf := pathlib.get_file(path: os.join_path(svc.path, 'app.txt'), create: true)!
+	appf.write('app')!
+
+	mut cw := new(CodeWalkerArgs{})!
+	mut fm := cw.filemap_get(path: root.path)!
+	assert 'svc/logs/out.txt' !in fm.content.keys()
+	assert fm.content['svc/app.txt'] == 'app'
+}
+
+fn test_parse_filename_end_reserved_legacy() {
+	mut cw := new(CodeWalkerArgs{})!
+	// Legacy header 'END' used as filename should error when used as header for new block
+	test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
+	cw.parse(test_content) or {
+		assert err.msg().contains("Filename 'END' is reserved.")
+		return
+	}
+	assert false // Should have errored
 }
--- a/lib/develop/codewalker/factory.v
+++ b/lib/develop/codewalker/factory.v
@@ -1,19 +1,12 @@
 module codewalker

-
@[params]
 pub struct CodeWalkerArgs {
-	source string //content we will send to an LLM, starting from a dir
-	content string //content as returned from LLM
+	// No fields required for now; kept for API stability
 }

 pub fn new(args CodeWalkerArgs) !CodeWalker {
-	mut cw := CodeWalker{
-		source: args.source
-	}
-
-	// Load default gitignore patterns
-	cw.gitignore_patterns = cw.default_gitignore()
-	
+	mut cw := CodeWalker{}
+	cw.ignorematcher = gitignore_matcher_new()
 	return cw
 }
--- a/lib/develop/codewalker/filemap.v
+++ b/lib/develop/codewalker/filemap.v
@@ -4,14 +4,14 @@ import freeflowuniverse.herolib.core.pathlib

 pub struct FileMap {
 pub mut:
-	source string
-	content map[string]string
+	source         string
+	content        map[string]string
 	content_change map[string]string
-	errors []FMError
+	errors         []FMError
 }

-pub fn (mut fm FileMap) content()string {
-	mut out:= []string{}
+pub fn (mut fm FileMap) content() string {
+	mut out := []string{}
 	for filepath, filecontent in fm.content {
 		out << '===FILE:${filepath}==='
 		out << filecontent
@@ -22,44 +22,36 @@ pub fn (mut fm FileMap) content()string {
 	}
 	out << '===END==='
 	return out.join_lines()
-
 }

-
-//write in new location, all will be overwritten, will only work with full files, not chanages
-pub fn (mut fm FileMap) export(path string)! {
+// write in new location, all will be overwritten, will only work with full files, not changes
+pub fn (mut fm FileMap) export(path string) ! {
 	for filepath, filecontent in fm.content {
-		dest := "${fm.source}/${filepath}"
-		mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
+		dest := '${path}/${filepath}'
+		mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
 		filepathtowrite.write(filecontent)!
 	}
-
 }

@[PARAMS]
 pub struct WriteParams {
-	path string
-	v_test bool = true
-	v_format bool = true
+	path        string
+	v_test      bool = true
+	v_format    bool = true
 	python_test bool
 }

-//update the files as found in the folder and update them or create
-pub fn (mut fm FileMap) write(path string)! {
+// update the files as found in the folder and update them or create
+pub fn (mut fm FileMap) write(path string) ! {
 	for filepath, filecontent in fm.content {
-		dest := "${fm.source}/${filepath}"
-		//TODO check ends with .v or .py if v_test or python_test active then call python 
-		//or v to check format of the file so we don't write broken code
-		//we first write in a temporary location $filename__.v and then test
-		//if good then overwrite $filename.v
-		mut filepathtowrite := pathlib.get_file(path:dest,create:true)!
+		dest := '${path}/${filepath}'
+		// In future: validate language-specific formatting/tests before overwrite
+		mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
 		filepathtowrite.write(filecontent)!
 	}
-	//TODO: phase 2, work with morphe to integrate change in the file
+	// TODO: phase 2, work with morphe to integrate change in the file
 }

-
-
 pub fn (fm FileMap) get(relpath string) !string {
 	return fm.content[relpath] or { return error('File not found: ${relpath}') }
 }
--- a/lib/develop/codewalker/ignore.v
+++ b/lib/develop/codewalker/ignore.v
@@ -1,88 +1,118 @@
 module codewalker

+// A minimal gitignore-like matcher used by CodeWalker
+// Supports:
+// - Directory patterns ending with '/': ignores any path that has this segment prefix
+// - Extension patterns like '*.pyc' or '*.<ext>'
+// - Simple substrings and '*' wildcards
+// - Lines starting with '#' are comments; empty lines ignored
+// No negation support for simplicity

-const default_gitignore := '
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-.env
-.venv
-venv/
-.tox/
-.nox/
-.coverage
-.coveragerc
-coverage.xml
-*.cover
-*.gem
-*.pyc
-.cache
-.pytest_cache/
-.mypy_cache/
-.hypothesis/
-'
+const default_gitignore = '__pycache__/\n*.py[cod]\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n.env\n.venv\nvenv/\n.tox/\n.nox/\n.coverage\n.coveragerc\ncoverage.xml\n*.cover\n*.gem\n*.pyc\n.cache\n.pytest_cache/\n.mypy_cache/\n.hypothesis/\n'
+
+struct IgnoreRule {
+	base    string // relative dir from source root where the ignore file lives ('' means global)
+	pattern string
+}

-//responsible to help us to find if a file matches or not
 pub struct IgnoreMatcher {
 pub mut:
-	items map[string]Ignore //the key is the path where the gitignore plays
+	rules []IgnoreRule
 }

-pub struct Ignore {
-pub mut:
-	patterns map[string]string
+pub fn gitignore_matcher_new() IgnoreMatcher {
+	mut m := IgnoreMatcher{}
+	m.add_content(default_gitignore)
+	return m
 }

+// Add raw .gitignore-style content as global (root-scoped) rules
+pub fn (mut m IgnoreMatcher) add_content(content string) {
+	m.add_content_with_base('', content)
+}

-pub fn (mut self Ignore) add(content string) ! {
-	for line in content.split_into_lines() {
-		line = line.trim_space()
-		if line.len == 0 {
+// Add raw .gitignore/.heroignore-style content scoped to base_rel
+pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
+	mut base := base_rel.replace('\\', '/').trim('/').to_lower()
+	for raw_line in content.split_into_lines() {
+		mut line := raw_line.trim_space()
+		if line.len == 0 || line.starts_with('#') {
 			continue
 		}
-		self.patterns[line] = line
+		m.rules << IgnoreRule{
+			base:    base
+			pattern: line
+		}
 	}
 }

-pub fn (mut self Ignore) check(path string) !bool {
-	return false //TODO
-}
-
-
-
-pub fn gitignore_matcher_new() !IgnoreMatcher {
-	mut matcher := IgnoreMatcher{}
-	gitignore.add(default_gitignore)!
-	matcher.patterns['.gitignore'] = gitignore
-	return matcher
-
-}
-
-//add content to path of gitignore
-pub fn (mut self IgnoreMatcher) add(path string, content string) ! {
-	self.items[path] = Ignore{}
-	self.items[path].add(content)!
-}
-
-
-
-pub fn (mut self IgnoreMatcher) check(path string) !bool {
-	return false //TODO here figure out which gitignores apply to the given path and check them all
+// Very simple glob/substring-based matching with directory scoping
+pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
+	mut path := relpath.replace('\\', '/').trim_left('/')
+	path_low := path.to_lower()
+	for rule in m.rules {
+		mut pat := rule.pattern.replace('\\', '/').trim_space()
+		if pat == '' {
+			continue
+		}
+
+		// Determine subpath relative to base
+		mut sub := path_low
+		if rule.base != '' {
+			base := rule.base
+			if sub == base {
+				// path equals the base dir; ignore rules apply to entries under base, not the base itself
+				continue
+			}
+			if sub.starts_with(base + '/') {
+				sub = sub[(base.len + 1)..]
+			} else {
+				continue // rule not applicable for this path
+			}
+		}
+
+		// Directory pattern (relative to base)
+		if pat.ends_with('/') {
+			mut dirpat := pat.trim_right('/')
+			dirpat = dirpat.trim_left('/').to_lower()
+			if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
+				return true
+			}
+			continue
+		}
+		// Extension pattern *.ext
+		if pat.starts_with('*.') {
+			ext := pat.all_after_last('.').to_lower()
+			if sub.ends_with('.' + ext) {
+				return true
+			}
+			continue
+		}
+		// Simple wildcard * anywhere -> sequential contains match
+		if pat.contains('*') {
+			mut parts := pat.to_lower().split('*')
+			mut idx := 0
+			mut ok := true
+			for part in parts {
+				if part == '' {
+					continue
+				}
+				pos := sub.index_after(part, idx) or { -1 }
+				if pos == -1 {
+					ok = false
+					break
+				}
+				idx = pos + part.len
+			}
+			if ok {
+				return true
+			}
+			continue
+		}
+		// Fallback: substring match (case-insensitive) on subpath
+		if sub.contains(pat.to_lower()) {
+			return true
+		}
+	}
+	return false
 }