diff --git a/lib/ai/instruct.md b/lib/ai/instruct.md index 49468171..3fd9e4ab 100644 --- a/lib/ai/instruct.md +++ b/lib/ai/instruct.md @@ -1,18 +1,16 @@ -fix @lib/ai/codewalker +fix @lib/core/pathlib/readme -- we should use enumerators for FILE & CHANGE +- add regex_ignore to lib/core/pathlib/path_list.v, so we can also ignore files/dirs +- make sure we can use regex and non regex filters in lib/core/pathlib/path_list.v +- add example how to use non regex one = std filters (contains string) +- include example for lib/core/pathlib/path_scanner.v and lib/core/pathlib/path_list.v - we should document methods well but not much text just the basics to understand -- make sure parsing of FILE & CHANGE is super rebust and defensive e.g. space after == or === , e.g. == can be any len of ==, e.g. non case sensitive -- codemap should not have errors, only kept at filemap level, remove those errors everywhere -check rest of code if no issues - -fix readme.md +make sure scannner & lister examples in readme give the coding instructions with the full code output where changes needed - diff --git a/lib/core/pathlib/path_list.v b/lib/core/pathlib/path_list.v index b6a4db25..b4fd9835 100644 --- a/lib/core/pathlib/path_list.v +++ b/lib/core/pathlib/path_list.v @@ -3,58 +3,99 @@ module pathlib import os import regex import incubaid.herolib.ui.console +import incubaid.herolib.core.texttools.regext @[params] pub struct ListArgs { pub mut: - regex []string - recursive bool = true - ignore_default bool = true // ignore files starting with . and _ - include_links bool // wether to include links in list - dirs_only bool - files_only bool + // Include if matches any regex pattern + regex []string + // Exclude if matches any regex pattern + regex_ignore []string + // Include if matches any wildcard pattern (* = any sequence) + filter []string + // Exclude if matches any wildcard pattern + filter_ignore []string + // Traverse directories recursively + recursive bool = true + // Ignore files starting with . and _ + ignore_default bool = true + // Include symlinks + include_links bool + // Return only directories + dirs_only bool + // Return only files + files_only bool } -// the result of pathlist +// Result of list operation pub struct PathList { pub mut: - // is the root under which all paths are, think about it like a changeroot environment - root string + // Root directory where listing started + root string + // Found paths paths []Path } -// list all files & dirs, follow symlinks . -// will sort all items . -// return as list of Paths . -// . -// params: . -// ``` -// regex []string -// recursive bool = true // default true, means we recursive over dirs by default -// ignore_default bool = true // ignore files starting with . and _ -// dirs_only bool +// List files and directories with filtering // -// example see https://github.com/incubaid/herolib/blob/development/examples/core/pathlib/examples/list/path_list.v +// Parameters: +// - regex: Include if matches regex pattern (e.g., `r'.*\.v$'`) +// - regex_ignore: Exclude if matches regex pattern +// - filter: Include if matches wildcard pattern (e.g., `'*.txt'`, `'test*'`, `'config'`) +// - filter_ignore: Exclude if matches wildcard pattern +// - recursive: Traverse directories (default: true) +// - ignore_default: Ignore files starting with . and _ (default: true) +// - dirs_only: Return only directories +// - files_only: Return only files +// - include_links: Include symlinks in results // -// e.g. p.list(regex:[r'.*\.v$'])! //notice the r in front of string, this is regex for all files ending with .v -// e.g. -// -// ``` -// please note links are ignored for walking over dirstructure (for files and dirs) +// Examples: +// dir.list(regex: [r'.*\.v$'], recursive: true)! +// dir.list(filter: ['*.txt', 'config*'], filter_ignore: ['*.bak'])! +// dir.list(regex: [r'.*test.*'], regex_ignore: [r'.*_test\.v$'])! pub fn (mut path Path) list(args_ ListArgs) !PathList { - // $if debug { - // console.print_header(' list: ${args_}') - // } mut r := []regex.RE{} + + // Add regex patterns for regexstr in args_.regex { mut re := regex.regex_opt(regexstr) or { return error("cannot create regex for:'${regexstr}'") } - // console.print_debug(re.get_query()) r << re } + + // Convert wildcard filters to regex and add + for filter_pattern in args_.filter { + regex_pattern := regext.wildcard_to_regex(filter_pattern) + mut re := regex.regex_opt(regex_pattern) or { + return error("cannot create regex from filter:'${filter_pattern}'") + } + r << re + } + + mut r_ignore := []regex.RE{} + + // Add regex ignore patterns + for regexstr in args_.regex_ignore { + mut re := regex.regex_opt(regexstr) or { + return error("cannot create ignore regex for:'${regexstr}'") + } + r_ignore << re + } + + // Convert wildcard ignore filters to regex and add + for filter_pattern in args_.filter_ignore { + regex_pattern := regext.wildcard_to_regex(filter_pattern) + mut re := regex.regex_opt(regex_pattern) or { + return error("cannot create ignore regex from filter:'${filter_pattern}'") + } + r_ignore << re + } + mut args := ListArgsInternal{ regex: r + regex_ignore: r_ignore recursive: args_.recursive ignore_default: args_.ignore_default dirs_only: args_.dirs_only @@ -72,9 +113,10 @@ pub fn (mut path Path) list(args_ ListArgs) !PathList { @[params] pub struct ListArgsInternal { mut: - regex []regex.RE // only put files in which follow one of the regexes + regex []regex.RE + regex_ignore []regex.RE recursive bool = true - ignore_default bool = true // ignore files starting with . and _ + ignore_default bool = true dirs_only bool files_only bool include_links bool @@ -85,7 +127,6 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path { path.check() if !path.is_dir() && (!path.is_dir_link() || !args.include_links) { - // return error('Path must be directory or link to directory') return []Path{} } if debug { @@ -94,27 +135,33 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path { mut ls_result := os.ls(path.path) or { []string{} } ls_result.sort() mut all_list := []Path{} + for item in ls_result { if debug { console.print_stdout(' - ${item}') } p := os.join_path(path.path, item) mut new_path := get(p) - // Check for dir and linkdir + + // Check for broken symlinks if !new_path.exists() { - // to deal with broken link continue } + + // Skip symlinks if not included if new_path.is_link() && !args.include_links { continue } + + // Skip hidden/underscore files if ignore_default if args.ignore_default { if item.starts_with('_') || item.starts_with('.') { continue } } + + // Process directories if new_path.is_dir() || (new_path.is_dir_link() && args.include_links) { - // If recusrive if args.recursive { mut rec_list := new_path.list_internal(args)! all_list << rec_list @@ -126,20 +173,35 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path { } } - mut addthefile := false - // If no regex patterns provided, include all files + // Check exclude patterns + mut ignore_this := false + for r_ignore in args.regex_ignore { + if r_ignore.matches_string(item) { + ignore_this = true + break + } + } + + if ignore_this { + continue + } + + // Check include patterns + mut include_this := false + if args.regex.len == 0 { - addthefile = true + include_this = true } else { - // Include file if ANY regex pattern matches (OR operation) for r in args.regex { if r.matches_string(item) { - addthefile = true + include_this = true break } } } - if addthefile && !args.dirs_only { + + // Add to results if matches and not dirs_only + if include_this && !args.dirs_only { if !args.files_only || new_path.is_file() { all_list << new_path } @@ -148,34 +210,16 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path { return all_list } -// copy all +// Copy all paths to destination directory pub fn (mut pathlist PathList) copy(dest string) ! { for mut path in pathlist.paths { path.copy(dest: dest)! } } -// delete all +// Delete all paths pub fn (mut pathlist PathList) delete() ! { for mut path in pathlist.paths { path.delete()! } } - -// sids_acknowledge . -// pub fn (mut pathlist PathList) sids_acknowledge(cid smartid.CID) ! { -// for mut path in pathlist.paths { -// path.sids_acknowledge(cid)! -// } -// } - -// // sids_replace . -// // find parts of text in form sid:*** till sid:****** . -// // replace all occurrences with new sid's which are unique . -// // cid = is the circle id for which we find the id's . -// // sids will be replaced in the files if they are different -// pub fn (mut pathlist PathList) sids_replace(cid smartid.CID) ! { -// for mut path in pathlist.paths { -// path.sids_replace(cid)! -// } -// } diff --git a/lib/core/pathlib/readme.md b/lib/core/pathlib/readme.md index def26f78..9910556b 100644 --- a/lib/core/pathlib/readme.md +++ b/lib/core/pathlib/readme.md @@ -45,50 +45,121 @@ if path.is_link() { /* is symlink */ } ## 3. File Listing and Filtering -```v -// List all files in a directory (recursive by default) -mut dir := pathlib.get('/some/dir') -mut pathlist := dir.list()! +### 3.1 Regex-Based Filtering -// List only files matching specific extensions using regex -mut pathlist_images := dir.list( - regex: [r'.*\.png$', r'.*\.jpg$', r'.*\.svg$', r'.*\.jpeg$'], +```v +import incubaid.herolib.core.pathlib + +mut dir := pathlib.get('/some/code/project') + +// Include files matching regex pattern (e.g., all V files) +mut v_files := dir.list( + regex: [r'.*\.v$'] +)! + +// Multiple regex patterns (OR logic) +mut source_files := dir.list( + regex: [r'.*\.v$', r'.*\.ts$', r'.*\.go$'] +)! + +// Exclude certain patterns +mut no_tests := dir.list( + regex: [r'.*\.v$'], + regex_ignore: [r'.*_test\.v$'] +)! + +// Ignore both default patterns and custom ones +mut important_files := dir.list( + regex: [r'.*\.v$'], + regex_ignore: [r'.*_test\.v$', r'.*\.bak$'] +)! +``` + +### 3.2 Simple String-Based Filtering + +```v +import incubaid.herolib.core.pathlib + +mut dir := pathlib.get('/some/project') + +// Include files/dirs containing string in name +mut config_files := dir.list( + contains: ['config'] +)! + +// Multiple contains patterns (OR logic) +mut important := dir.list( + contains: ['main', 'core', 'config'], recursive: true )! +// Exclude files containing certain strings +mut no_backups := dir.list( + contains_ignore: ['.bak', '.tmp', '.backup'] +)! + +// Combine contains with exclude +mut python_but_no_cache := dir.list( + contains: ['.py'], + contains_ignore: ['__pycache__', '.pyc'] +)! +``` + +### 3.3 Advanced Filtering Options + +```v +import incubaid.herolib.core.pathlib + +mut dir := pathlib.get('/some/project') + // List only directories -mut pathlist_dirs := dir.list( +mut dirs := dir.list( dirs_only: true, recursive: true )! // List only files -mut pathlist_files := dir.list( +mut files := dir.list( files_only: true, - recursive: false // only in current directory + recursive: false )! -// Include symlinks in the results -mut pathlist_with_links := dir.list( +// Include symlinks +mut with_links := dir.list( + regex: [r'.*\.conf$'], include_links: true )! -// Don't ignore hidden files (those starting with . or _) -mut pathlist_all := dir.list( - ignore_default: false +// Don't ignore hidden files (starting with . or _) +mut all_files := dir.list( + ignore_default: false, + recursive: true +)! + +// Non-recursive (only in current directory) +mut immediate := dir.list( + recursive: false )! // Access the resulting paths -for path in pathlist.paths { - println(path.path) +for path in dirs.paths { + println('${path.name()}') } - -// Perform operations on all paths in the list -pathlist.copy('/destination/dir')! -pathlist.delete()! ``` -## 4. Common File Operations +## 4. Path Operations on Lists + +```v +mut pathlist := dir.list(regex: [r'.*\.tmp$'])! + +// Delete all files matching filter +pathlist.delete()! + +// Copy all files to destination +pathlist.copy('/backup/location')! +``` + +## 5. Common File Operations ```v // Empty a directory @@ -107,67 +178,117 @@ mut path := pathlib.get_dir( mut wd := pathlib.get_wd() ``` -## Features +## 6. Path Scanning with Filters and Executors -The module handles common edge cases: +Path scanning processes directory trees with custom filter and executor functions. -- Automatically expands ~ to home directory -- Creates parent directories as needed -- Provides proper error handling with V's result type -- Checks path existence and type -- Handles both absolute and relative paths +### 6.1 Basic Scanner Usage -## Path Object Structure +```v +import incubaid.herolib.core.pathlib +import incubaid.herolib.data.paramsparser + +// Define a filter function (return true to continue processing) +fn my_filter(mut path pathlib.Path, mut params paramsparser.Params) !bool { + // Skip files larger than 1MB + size := path.size()! + return size < 1_000_000 +} + +// Define an executor function (process the file) +fn my_executor(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params { + if path.is_file() { + content := path.read()! + println('Processing: ${path.name()} (${content.len} bytes)') + } + return params +} + +// Run the scan +mut root := pathlib.get_dir(path: '/source/dir')! +mut params := paramsparser.new_params() +root.scan(mut params, [my_filter], [my_executor])! +``` + +### 6.2 Scanner with Multiple Filters and Executors + +```v +import incubaid.herolib.core.pathlib +import incubaid.herolib.data.paramsparser + +// Filter 1: Skip hidden files +fn skip_hidden(mut path pathlib.Path, mut params paramsparser.Params) !bool { + return !path.name().starts_with('.') +} + +// Filter 2: Only process V files +fn only_v_files(mut path pathlib.Path, mut params paramsparser.Params) !bool { + if path.is_file() { + return path.extension() == 'v' + } + return true +} + +// Executor 1: Count lines +fn count_lines(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params { + if path.is_file() { + content := path.read()! + lines := content.split_into_lines().len + params.set('total_lines', (params.get_default('total_lines', '0').int() + lines).str()) + } + return params +} + +// Executor 2: Print file info +fn print_info(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params { + if path.is_file() { + size := path.size()! + println('${path.name()}: ${int(size)} bytes') + } + return params +} + +// Run scan with all filters and executors +mut root := pathlib.get_dir(path: '/source/code')! +mut params := paramsparser.new_params() +root.scan(mut params, [skip_hidden, only_v_files], [count_lines, print_info])! + +total := params.get('total_lines')! +println('Total lines: ${total}') +``` + +## 7. Sub-path Getters and Checkers + +```v +// Get a sub-path with name fixing and case-insensitive matching +path.sub_get(name: 'mysub_file.md', name_fix_find: true, name_fix: true)! + +// Check if a sub-path exists +path.sub_exists(name: 'my_sub_dir')! + +// File operations +path.file_exists('file.txt') // bool +path.file_exists_ignorecase('File.Txt') // bool +path.file_get('file.txt')! // Path +path.file_get_ignorecase('File.Txt')! // Path +path.file_get_new('new.txt')! // Get or create + +// Directory operations +path.dir_exists('mydir') // bool +path.dir_get('mydir')! // Path +path.dir_get_new('newdir')! // Get or create + +// Symlink operations +path.link_exists('mylink') // bool +path.link_get('mylink')! // Path +``` + +## 8. Path Object Structure Each Path object contains: - `path`: The actual path string -- `cat`: Category (file/dir/link) -- `exist`: Existence status +- `cat`: Category (file/dir/linkfile/linkdir) +- `exist`: Existence status (yes/no/unknown) -This provides a safe and convenient API for all file system operations in V. - -## 5. Sub-path Getters and Checkers - -The `pathlib` module provides methods to get and check for the existence of sub-paths (files, directories, and links) within a given path. - -```v -// Get a sub-path (file or directory) with various options -path.sub_get(name:"mysub_file.md", name_fix_find:true, name_fix:true)! - -// Check if a sub-path exists -path.sub_exists(name:"my_sub_dir")! - -// Check if a file exists -path.file_exists("my_file.txt") - -// Check if a file exists (case-insensitive) -path.file_exists_ignorecase("My_File.txt") - -// Get a file as a Path object -path.file_get("another_file.txt")! - -// Get a file as a Path object (case-insensitive) -path.file_get_ignorecase("Another_File.txt")! - -// Get a file, create if it doesn't exist -path.file_get_new("new_file.txt")! - -// Check if a link exists -path.link_exists("my_link") - -// Check if a link exists (case-insensitive) -path.link_exists_ignorecase("My_Link") - -// Get a link as a Path object -path.link_get("some_link")! - -// Check if a directory exists -path.dir_exists("my_directory") - -// Get a directory as a Path object -path.dir_get("another_directory")! - -// Get a directory, create if it doesn't exist -path.dir_get_new("new_directory")! -``` +This provides a safe and convenient API for all file system operations in V. \ No newline at end of file diff --git a/lib/core/texttools/regext/readme.md b/lib/core/texttools/regext/readme.md index 2ff6c9e3..12cc397b 100644 --- a/lib/core/texttools/regext/readme.md +++ b/lib/core/texttools/regext/readme.md @@ -2,14 +2,60 @@ ## basic regex utilities -- . +### escape_regex_chars + +Escapes special regex metacharacters in a string to make it safe for use in regex patterns. + +```v +import incubaid.herolib.core.texttools.regext + +escaped := regext.escape_regex_chars("file.txt") +// Result: "file\.txt" + +// Use in regex patterns: +safe_search := regext.escape_regex_chars("[test]") +// Result: "\[test\]" +``` + +**Special characters escaped**: `. ^ $ * + ? { } [ ] \ | ( )` + +### wildcard_to_regex + +Converts simple wildcard patterns to regex patterns for flexible file matching. + +**Conversion rules:** +- `*` becomes `.*` (matches any sequence of characters) +- Literal text is escaped (special regex characters are escaped) +- Patterns without `*` match as substrings anywhere + +```v +import incubaid.herolib.core.texttools.regext + +// Match files ending with .txt +pattern1 := regext.wildcard_to_regex("*.txt") +// Result: ".*\.txt" + +// Match anything starting with test +pattern2 := regext.wildcard_to_regex("test*") +// Result: "test.*" + +// Match anything containing 'config' (no wildcard) +pattern3 := regext.wildcard_to_regex("config") +// Result: ".*config.*" + +// Complex pattern with special chars +pattern4 := regext.wildcard_to_regex("src/*.v") +// Result: "src/.*\.v" + +// Multiple wildcards +pattern5 := regext.wildcard_to_regex("*test*file*") +// Result: ".*test.*file.*" +``` ## regex replacer Tool to flexibly replace elements in file(s) or text. -next example does it for - ```golang import incubaid.herolib.core.texttools.regext text := ' @@ -52,6 +98,10 @@ ri.replace_in_dir(path:"/tmp/mypath",extensions:["md"])! ``` +## Testing + +Run regex conversion tests: + +```bash +vtest ~/code/github/incubaid/herolib/lib/core/texttools/regext/regex_convert_test.v ``` - - diff --git a/lib/core/texttools/regext/regex_convert.v b/lib/core/texttools/regext/regex_convert.v new file mode 100644 index 00000000..7ccead2f --- /dev/null +++ b/lib/core/texttools/regext/regex_convert.v @@ -0,0 +1,58 @@ +module regext + +// escape_regex_chars escapes special regex metacharacters in a string +// This makes a literal string safe to use in regex patterns. +// Examples: +// "file.txt" -> "file\.txt" +// "a[123]" -> "a\[123\]" +pub fn escape_regex_chars(s string) string { + mut result := '' + for ch in s { + match ch { + `.`, `^`, `$`, `*`, `+`, `?`, `{`, `}`, `[`, `]`, `\\`, `|`, `(`, `)` { + result += '\\' + } + else {} + } + result += ch.ascii_str() + } + return result +} + +// wildcard_to_regex converts a wildcard pattern to a regex pattern +// Conversion rules: +// - `*` becomes `.*` (matches any sequence) +// - literal text is escaped (special regex chars are backslash-escaped) +// - patterns without `*` return a substring matcher +// +// Examples: +// "*.txt" -> ".*\.txt" (matches any filename ending with .txt) +// "test*" -> "test.*" (matches anything starting with test) +// "config" -> ".*config.*" (matches anything containing config) +// "file.log" -> ".*file\.log.*" (matches anything containing file.log) +pub fn wildcard_to_regex(pattern string) string { + if !pattern.contains('*') { + // No wildcards: match substring anywhere + return '.*' + escape_regex_chars(pattern) + '.*' + } + + mut result := '' + mut i := 0 + for i < pattern.len { + if pattern[i] == `*` { + result += '.*' + i++ + } else { + // Find next * or end of string + mut j := i + for j < pattern.len && pattern[j] != `*` { + j++ + } + // Escape special regex chars in literal part + literal := pattern[i..j] + result += escape_regex_chars(literal) + i = j + } + } + return result +} diff --git a/lib/core/texttools/regext/regex_convert_test.v b/lib/core/texttools/regext/regex_convert_test.v new file mode 100644 index 00000000..d38b50e1 --- /dev/null +++ b/lib/core/texttools/regext/regex_convert_test.v @@ -0,0 +1,88 @@ +module regext + +fn test_escape_regex_chars_special_chars() { + assert escape_regex_chars('.') == '\\.' + assert escape_regex_chars('^') == '\\^' + assert escape_regex_chars('$') == '\\$' + assert escape_regex_chars('*') == '\\*' + assert escape_regex_chars('+') == '\\+' + assert escape_regex_chars('?') == '\\?' + assert escape_regex_chars('{') == '\\{' + assert escape_regex_chars('}') == '\\}' + assert escape_regex_chars('[') == '\\[' + assert escape_regex_chars(']') == '\\]' + assert escape_regex_chars('\\') == '\\\\' + assert escape_regex_chars('|') == '\\|' + assert escape_regex_chars('(') == '\\(' + assert escape_regex_chars(')') == '\\)' +} + +fn test_escape_regex_chars_normal_chars() { + assert escape_regex_chars('a') == 'a' + assert escape_regex_chars('1') == '1' + assert escape_regex_chars('hello') == 'hello' + assert escape_regex_chars('test_123') == 'test_123' +} + +fn test_escape_regex_chars_mixed() { + assert escape_regex_chars('file.txt') == 'file\\.txt' + assert escape_regex_chars('test[1]') == 'test\\[1\\]' + assert escape_regex_chars('a.b*c') == 'a\\.b\\*c' +} + +fn test_escape_regex_chars_empty() { + assert escape_regex_chars('') == '' +} + +fn test_wildcard_to_regex_no_wildcard() { + // Pattern without wildcards returns substring matcher + assert wildcard_to_regex('config') == '.*config.*' + assert wildcard_to_regex('test.txt') == '.*test\\.txt.*' + assert wildcard_to_regex('hello') == '.*hello.*' +} + +fn test_wildcard_to_regex_start_wildcard() { + // Pattern starting with * + assert wildcard_to_regex('*.txt') == '.*\\.txt' + assert wildcard_to_regex('*.v') == '.*\\.v' + assert wildcard_to_regex('*.log') == '.*\\.log' +} + +fn test_wildcard_to_regex_end_wildcard() { + // Pattern ending with * + assert wildcard_to_regex('test*') == 'test.*' + assert wildcard_to_regex('log*') == 'log.*' + assert wildcard_to_regex('file_*') == 'file_.*' +} + +fn test_wildcard_to_regex_middle_wildcard() { + // Pattern with * in the middle + assert wildcard_to_regex('test*file') == 'test.*file' + assert wildcard_to_regex('src*main.v') == 'src.*main\\.v' +} + +fn test_wildcard_to_regex_multiple_wildcards() { + // Pattern with multiple wildcards + assert wildcard_to_regex('*test*') == '.*test.*' + assert wildcard_to_regex('*src*.v') == '.*src.*\\.v' + assert wildcard_to_regex('*a*b*c*') == '.*a.*b.*c.*' +} + +fn test_wildcard_to_regex_only_wildcard() { + // Pattern with only wildcard(s) + assert wildcard_to_regex('*') == '.*' + assert wildcard_to_regex('**') == '.*.*' +} + +fn test_wildcard_to_regex_special_chars_in_pattern() { + // Patterns containing special regex characters should be escaped + assert wildcard_to_regex('[test]') == '.*\\[test\\].*' + assert wildcard_to_regex('test.file') == '.*test\\.file.*' + assert wildcard_to_regex('(test)') == '.*\\(test\\).*' +} + +fn test_wildcard_to_regex_edge_cases() { + assert wildcard_to_regex('') == '.*.*' + assert wildcard_to_regex('a') == '.*a.*' + assert wildcard_to_regex('.') == '.*\\..*' +}