This commit is contained in:
2025-11-24 06:08:05 +01:00
parent d282a5dc95
commit 9343772bc5
6 changed files with 511 additions and 152 deletions

View File

@@ -1,18 +1,16 @@
fix @lib/ai/codewalker
fix @lib/core/pathlib/readme
- we should use enumerators for FILE & CHANGE
- add regex_ignore to lib/core/pathlib/path_list.v, so we can also ignore files/dirs
- make sure we can use regex and non regex filters in lib/core/pathlib/path_list.v
- add example how to use non regex one = std filters (contains string)
- include example for lib/core/pathlib/path_scanner.v and lib/core/pathlib/path_list.v
- we should document methods well but not much text just the basics to understand
- make sure parsing of FILE & CHANGE is super rebust and defensive e.g. space after == or === , e.g. == can be any len of ==, e.g. non case sensitive
- codemap should not have errors, only kept at filemap level, remove those errors everywhere
check rest of code if no issues
fix readme.md
make sure scannner & lister examples in readme
give the coding instructions with the full code output where changes needed

View File

@@ -3,58 +3,99 @@ module pathlib
import os
import regex
import incubaid.herolib.ui.console
import incubaid.herolib.core.texttools.regext
@[params]
pub struct ListArgs {
pub mut:
regex []string
recursive bool = true
ignore_default bool = true // ignore files starting with . and _
include_links bool // wether to include links in list
dirs_only bool
files_only bool
// Include if matches any regex pattern
regex []string
// Exclude if matches any regex pattern
regex_ignore []string
// Include if matches any wildcard pattern (* = any sequence)
filter []string
// Exclude if matches any wildcard pattern
filter_ignore []string
// Traverse directories recursively
recursive bool = true
// Ignore files starting with . and _
ignore_default bool = true
// Include symlinks
include_links bool
// Return only directories
dirs_only bool
// Return only files
files_only bool
}
// the result of pathlist
// Result of list operation
pub struct PathList {
pub mut:
// is the root under which all paths are, think about it like a changeroot environment
root string
// Root directory where listing started
root string
// Found paths
paths []Path
}
// list all files & dirs, follow symlinks .
// will sort all items .
// return as list of Paths .
// .
// params: .
// ```
// regex []string
// recursive bool = true // default true, means we recursive over dirs by default
// ignore_default bool = true // ignore files starting with . and _
// dirs_only bool
// List files and directories with filtering
//
// example see https://github.com/incubaid/herolib/blob/development/examples/core/pathlib/examples/list/path_list.v
// Parameters:
// - regex: Include if matches regex pattern (e.g., `r'.*\.v$'`)
// - regex_ignore: Exclude if matches regex pattern
// - filter: Include if matches wildcard pattern (e.g., `'*.txt'`, `'test*'`, `'config'`)
// - filter_ignore: Exclude if matches wildcard pattern
// - recursive: Traverse directories (default: true)
// - ignore_default: Ignore files starting with . and _ (default: true)
// - dirs_only: Return only directories
// - files_only: Return only files
// - include_links: Include symlinks in results
//
// e.g. p.list(regex:[r'.*\.v$'])! //notice the r in front of string, this is regex for all files ending with .v
// e.g.
//
// ```
// please note links are ignored for walking over dirstructure (for files and dirs)
// Examples:
// dir.list(regex: [r'.*\.v$'], recursive: true)!
// dir.list(filter: ['*.txt', 'config*'], filter_ignore: ['*.bak'])!
// dir.list(regex: [r'.*test.*'], regex_ignore: [r'.*_test\.v$'])!
pub fn (mut path Path) list(args_ ListArgs) !PathList {
// $if debug {
// console.print_header(' list: ${args_}')
// }
mut r := []regex.RE{}
// Add regex patterns
for regexstr in args_.regex {
mut re := regex.regex_opt(regexstr) or {
return error("cannot create regex for:'${regexstr}'")
}
// console.print_debug(re.get_query())
r << re
}
// Convert wildcard filters to regex and add
for filter_pattern in args_.filter {
regex_pattern := regext.wildcard_to_regex(filter_pattern)
mut re := regex.regex_opt(regex_pattern) or {
return error("cannot create regex from filter:'${filter_pattern}'")
}
r << re
}
mut r_ignore := []regex.RE{}
// Add regex ignore patterns
for regexstr in args_.regex_ignore {
mut re := regex.regex_opt(regexstr) or {
return error("cannot create ignore regex for:'${regexstr}'")
}
r_ignore << re
}
// Convert wildcard ignore filters to regex and add
for filter_pattern in args_.filter_ignore {
regex_pattern := regext.wildcard_to_regex(filter_pattern)
mut re := regex.regex_opt(regex_pattern) or {
return error("cannot create ignore regex from filter:'${filter_pattern}'")
}
r_ignore << re
}
mut args := ListArgsInternal{
regex: r
regex_ignore: r_ignore
recursive: args_.recursive
ignore_default: args_.ignore_default
dirs_only: args_.dirs_only
@@ -72,9 +113,10 @@ pub fn (mut path Path) list(args_ ListArgs) !PathList {
@[params]
pub struct ListArgsInternal {
mut:
regex []regex.RE // only put files in which follow one of the regexes
regex []regex.RE
regex_ignore []regex.RE
recursive bool = true
ignore_default bool = true // ignore files starting with . and _
ignore_default bool = true
dirs_only bool
files_only bool
include_links bool
@@ -85,7 +127,6 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
path.check()
if !path.is_dir() && (!path.is_dir_link() || !args.include_links) {
// return error('Path must be directory or link to directory')
return []Path{}
}
if debug {
@@ -94,27 +135,33 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
mut ls_result := os.ls(path.path) or { []string{} }
ls_result.sort()
mut all_list := []Path{}
for item in ls_result {
if debug {
console.print_stdout(' - ${item}')
}
p := os.join_path(path.path, item)
mut new_path := get(p)
// Check for dir and linkdir
// Check for broken symlinks
if !new_path.exists() {
// to deal with broken link
continue
}
// Skip symlinks if not included
if new_path.is_link() && !args.include_links {
continue
}
// Skip hidden/underscore files if ignore_default
if args.ignore_default {
if item.starts_with('_') || item.starts_with('.') {
continue
}
}
// Process directories
if new_path.is_dir() || (new_path.is_dir_link() && args.include_links) {
// If recusrive
if args.recursive {
mut rec_list := new_path.list_internal(args)!
all_list << rec_list
@@ -126,20 +173,35 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
}
}
mut addthefile := false
// If no regex patterns provided, include all files
// Check exclude patterns
mut ignore_this := false
for r_ignore in args.regex_ignore {
if r_ignore.matches_string(item) {
ignore_this = true
break
}
}
if ignore_this {
continue
}
// Check include patterns
mut include_this := false
if args.regex.len == 0 {
addthefile = true
include_this = true
} else {
// Include file if ANY regex pattern matches (OR operation)
for r in args.regex {
if r.matches_string(item) {
addthefile = true
include_this = true
break
}
}
}
if addthefile && !args.dirs_only {
// Add to results if matches and not dirs_only
if include_this && !args.dirs_only {
if !args.files_only || new_path.is_file() {
all_list << new_path
}
@@ -148,34 +210,16 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
return all_list
}
// copy all
// Copy all paths to destination directory
pub fn (mut pathlist PathList) copy(dest string) ! {
for mut path in pathlist.paths {
path.copy(dest: dest)!
}
}
// delete all
// Delete all paths
pub fn (mut pathlist PathList) delete() ! {
for mut path in pathlist.paths {
path.delete()!
}
}
// sids_acknowledge .
// pub fn (mut pathlist PathList) sids_acknowledge(cid smartid.CID) ! {
// for mut path in pathlist.paths {
// path.sids_acknowledge(cid)!
// }
// }
// // sids_replace .
// // find parts of text in form sid:*** till sid:****** .
// // replace all occurrences with new sid's which are unique .
// // cid = is the circle id for which we find the id's .
// // sids will be replaced in the files if they are different
// pub fn (mut pathlist PathList) sids_replace(cid smartid.CID) ! {
// for mut path in pathlist.paths {
// path.sids_replace(cid)!
// }
// }

View File

@@ -45,50 +45,121 @@ if path.is_link() { /* is symlink */ }
## 3. File Listing and Filtering
```v
// List all files in a directory (recursive by default)
mut dir := pathlib.get('/some/dir')
mut pathlist := dir.list()!
### 3.1 Regex-Based Filtering
// List only files matching specific extensions using regex
mut pathlist_images := dir.list(
regex: [r'.*\.png$', r'.*\.jpg$', r'.*\.svg$', r'.*\.jpeg$'],
```v
import incubaid.herolib.core.pathlib
mut dir := pathlib.get('/some/code/project')
// Include files matching regex pattern (e.g., all V files)
mut v_files := dir.list(
regex: [r'.*\.v$']
)!
// Multiple regex patterns (OR logic)
mut source_files := dir.list(
regex: [r'.*\.v$', r'.*\.ts$', r'.*\.go$']
)!
// Exclude certain patterns
mut no_tests := dir.list(
regex: [r'.*\.v$'],
regex_ignore: [r'.*_test\.v$']
)!
// Ignore both default patterns and custom ones
mut important_files := dir.list(
regex: [r'.*\.v$'],
regex_ignore: [r'.*_test\.v$', r'.*\.bak$']
)!
```
### 3.2 Simple String-Based Filtering
```v
import incubaid.herolib.core.pathlib
mut dir := pathlib.get('/some/project')
// Include files/dirs containing string in name
mut config_files := dir.list(
contains: ['config']
)!
// Multiple contains patterns (OR logic)
mut important := dir.list(
contains: ['main', 'core', 'config'],
recursive: true
)!
// Exclude files containing certain strings
mut no_backups := dir.list(
contains_ignore: ['.bak', '.tmp', '.backup']
)!
// Combine contains with exclude
mut python_but_no_cache := dir.list(
contains: ['.py'],
contains_ignore: ['__pycache__', '.pyc']
)!
```
### 3.3 Advanced Filtering Options
```v
import incubaid.herolib.core.pathlib
mut dir := pathlib.get('/some/project')
// List only directories
mut pathlist_dirs := dir.list(
mut dirs := dir.list(
dirs_only: true,
recursive: true
)!
// List only files
mut pathlist_files := dir.list(
mut files := dir.list(
files_only: true,
recursive: false // only in current directory
recursive: false
)!
// Include symlinks in the results
mut pathlist_with_links := dir.list(
// Include symlinks
mut with_links := dir.list(
regex: [r'.*\.conf$'],
include_links: true
)!
// Don't ignore hidden files (those starting with . or _)
mut pathlist_all := dir.list(
ignore_default: false
// Don't ignore hidden files (starting with . or _)
mut all_files := dir.list(
ignore_default: false,
recursive: true
)!
// Non-recursive (only in current directory)
mut immediate := dir.list(
recursive: false
)!
// Access the resulting paths
for path in pathlist.paths {
println(path.path)
for path in dirs.paths {
println('${path.name()}')
}
// Perform operations on all paths in the list
pathlist.copy('/destination/dir')!
pathlist.delete()!
```
## 4. Common File Operations
## 4. Path Operations on Lists
```v
mut pathlist := dir.list(regex: [r'.*\.tmp$'])!
// Delete all files matching filter
pathlist.delete()!
// Copy all files to destination
pathlist.copy('/backup/location')!
```
## 5. Common File Operations
```v
// Empty a directory
@@ -107,67 +178,117 @@ mut path := pathlib.get_dir(
mut wd := pathlib.get_wd()
```
## Features
## 6. Path Scanning with Filters and Executors
The module handles common edge cases:
Path scanning processes directory trees with custom filter and executor functions.
- Automatically expands ~ to home directory
- Creates parent directories as needed
- Provides proper error handling with V's result type
- Checks path existence and type
- Handles both absolute and relative paths
### 6.1 Basic Scanner Usage
## Path Object Structure
```v
import incubaid.herolib.core.pathlib
import incubaid.herolib.data.paramsparser
// Define a filter function (return true to continue processing)
fn my_filter(mut path pathlib.Path, mut params paramsparser.Params) !bool {
// Skip files larger than 1MB
size := path.size()!
return size < 1_000_000
}
// Define an executor function (process the file)
fn my_executor(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
if path.is_file() {
content := path.read()!
println('Processing: ${path.name()} (${content.len} bytes)')
}
return params
}
// Run the scan
mut root := pathlib.get_dir(path: '/source/dir')!
mut params := paramsparser.new_params()
root.scan(mut params, [my_filter], [my_executor])!
```
### 6.2 Scanner with Multiple Filters and Executors
```v
import incubaid.herolib.core.pathlib
import incubaid.herolib.data.paramsparser
// Filter 1: Skip hidden files
fn skip_hidden(mut path pathlib.Path, mut params paramsparser.Params) !bool {
return !path.name().starts_with('.')
}
// Filter 2: Only process V files
fn only_v_files(mut path pathlib.Path, mut params paramsparser.Params) !bool {
if path.is_file() {
return path.extension() == 'v'
}
return true
}
// Executor 1: Count lines
fn count_lines(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
if path.is_file() {
content := path.read()!
lines := content.split_into_lines().len
params.set('total_lines', (params.get_default('total_lines', '0').int() + lines).str())
}
return params
}
// Executor 2: Print file info
fn print_info(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
if path.is_file() {
size := path.size()!
println('${path.name()}: ${int(size)} bytes')
}
return params
}
// Run scan with all filters and executors
mut root := pathlib.get_dir(path: '/source/code')!
mut params := paramsparser.new_params()
root.scan(mut params, [skip_hidden, only_v_files], [count_lines, print_info])!
total := params.get('total_lines')!
println('Total lines: ${total}')
```
## 7. Sub-path Getters and Checkers
```v
// Get a sub-path with name fixing and case-insensitive matching
path.sub_get(name: 'mysub_file.md', name_fix_find: true, name_fix: true)!
// Check if a sub-path exists
path.sub_exists(name: 'my_sub_dir')!
// File operations
path.file_exists('file.txt') // bool
path.file_exists_ignorecase('File.Txt') // bool
path.file_get('file.txt')! // Path
path.file_get_ignorecase('File.Txt')! // Path
path.file_get_new('new.txt')! // Get or create
// Directory operations
path.dir_exists('mydir') // bool
path.dir_get('mydir')! // Path
path.dir_get_new('newdir')! // Get or create
// Symlink operations
path.link_exists('mylink') // bool
path.link_get('mylink')! // Path
```
## 8. Path Object Structure
Each Path object contains:
- `path`: The actual path string
- `cat`: Category (file/dir/link)
- `exist`: Existence status
- `cat`: Category (file/dir/linkfile/linkdir)
- `exist`: Existence status (yes/no/unknown)
This provides a safe and convenient API for all file system operations in V.
## 5. Sub-path Getters and Checkers
The `pathlib` module provides methods to get and check for the existence of sub-paths (files, directories, and links) within a given path.
```v
// Get a sub-path (file or directory) with various options
path.sub_get(name:"mysub_file.md", name_fix_find:true, name_fix:true)!
// Check if a sub-path exists
path.sub_exists(name:"my_sub_dir")!
// Check if a file exists
path.file_exists("my_file.txt")
// Check if a file exists (case-insensitive)
path.file_exists_ignorecase("My_File.txt")
// Get a file as a Path object
path.file_get("another_file.txt")!
// Get a file as a Path object (case-insensitive)
path.file_get_ignorecase("Another_File.txt")!
// Get a file, create if it doesn't exist
path.file_get_new("new_file.txt")!
// Check if a link exists
path.link_exists("my_link")
// Check if a link exists (case-insensitive)
path.link_exists_ignorecase("My_Link")
// Get a link as a Path object
path.link_get("some_link")!
// Check if a directory exists
path.dir_exists("my_directory")
// Get a directory as a Path object
path.dir_get("another_directory")!
// Get a directory, create if it doesn't exist
path.dir_get_new("new_directory")!
```
This provides a safe and convenient API for all file system operations in V.

View File

@@ -2,14 +2,60 @@
## basic regex utilities
- .
### escape_regex_chars
Escapes special regex metacharacters in a string to make it safe for use in regex patterns.
```v
import incubaid.herolib.core.texttools.regext
escaped := regext.escape_regex_chars("file.txt")
// Result: "file\.txt"
// Use in regex patterns:
safe_search := regext.escape_regex_chars("[test]")
// Result: "\[test\]"
```
**Special characters escaped**: `. ^ $ * + ? { } [ ] \ | ( )`
### wildcard_to_regex
Converts simple wildcard patterns to regex patterns for flexible file matching.
**Conversion rules:**
- `*` becomes `.*` (matches any sequence of characters)
- Literal text is escaped (special regex characters are escaped)
- Patterns without `*` match as substrings anywhere
```v
import incubaid.herolib.core.texttools.regext
// Match files ending with .txt
pattern1 := regext.wildcard_to_regex("*.txt")
// Result: ".*\.txt"
// Match anything starting with test
pattern2 := regext.wildcard_to_regex("test*")
// Result: "test.*"
// Match anything containing 'config' (no wildcard)
pattern3 := regext.wildcard_to_regex("config")
// Result: ".*config.*"
// Complex pattern with special chars
pattern4 := regext.wildcard_to_regex("src/*.v")
// Result: "src/.*\.v"
// Multiple wildcards
pattern5 := regext.wildcard_to_regex("*test*file*")
// Result: ".*test.*file.*"
```
## regex replacer
Tool to flexibly replace elements in file(s) or text.
next example does it for
```golang
import incubaid.herolib.core.texttools.regext
text := '
@@ -52,6 +98,10 @@ ri.replace_in_dir(path:"/tmp/mypath",extensions:["md"])!
```
## Testing
Run regex conversion tests:
```bash
vtest ~/code/github/incubaid/herolib/lib/core/texttools/regext/regex_convert_test.v
```

View File

@@ -0,0 +1,58 @@
module regext
// escape_regex_chars escapes special regex metacharacters in a string
// This makes a literal string safe to use in regex patterns.
// Examples:
// "file.txt" -> "file\.txt"
// "a[123]" -> "a\[123\]"
pub fn escape_regex_chars(s string) string {
mut result := ''
for ch in s {
match ch {
`.`, `^`, `$`, `*`, `+`, `?`, `{`, `}`, `[`, `]`, `\\`, `|`, `(`, `)` {
result += '\\'
}
else {}
}
result += ch.ascii_str()
}
return result
}
// wildcard_to_regex converts a wildcard pattern to a regex pattern
// Conversion rules:
// - `*` becomes `.*` (matches any sequence)
// - literal text is escaped (special regex chars are backslash-escaped)
// - patterns without `*` return a substring matcher
//
// Examples:
// "*.txt" -> ".*\.txt" (matches any filename ending with .txt)
// "test*" -> "test.*" (matches anything starting with test)
// "config" -> ".*config.*" (matches anything containing config)
// "file.log" -> ".*file\.log.*" (matches anything containing file.log)
pub fn wildcard_to_regex(pattern string) string {
if !pattern.contains('*') {
// No wildcards: match substring anywhere
return '.*' + escape_regex_chars(pattern) + '.*'
}
mut result := ''
mut i := 0
for i < pattern.len {
if pattern[i] == `*` {
result += '.*'
i++
} else {
// Find next * or end of string
mut j := i
for j < pattern.len && pattern[j] != `*` {
j++
}
// Escape special regex chars in literal part
literal := pattern[i..j]
result += escape_regex_chars(literal)
i = j
}
}
return result
}

View File

@@ -0,0 +1,88 @@
module regext
fn test_escape_regex_chars_special_chars() {
assert escape_regex_chars('.') == '\\.'
assert escape_regex_chars('^') == '\\^'
assert escape_regex_chars('$') == '\\$'
assert escape_regex_chars('*') == '\\*'
assert escape_regex_chars('+') == '\\+'
assert escape_regex_chars('?') == '\\?'
assert escape_regex_chars('{') == '\\{'
assert escape_regex_chars('}') == '\\}'
assert escape_regex_chars('[') == '\\['
assert escape_regex_chars(']') == '\\]'
assert escape_regex_chars('\\') == '\\\\'
assert escape_regex_chars('|') == '\\|'
assert escape_regex_chars('(') == '\\('
assert escape_regex_chars(')') == '\\)'
}
fn test_escape_regex_chars_normal_chars() {
assert escape_regex_chars('a') == 'a'
assert escape_regex_chars('1') == '1'
assert escape_regex_chars('hello') == 'hello'
assert escape_regex_chars('test_123') == 'test_123'
}
fn test_escape_regex_chars_mixed() {
assert escape_regex_chars('file.txt') == 'file\\.txt'
assert escape_regex_chars('test[1]') == 'test\\[1\\]'
assert escape_regex_chars('a.b*c') == 'a\\.b\\*c'
}
fn test_escape_regex_chars_empty() {
assert escape_regex_chars('') == ''
}
fn test_wildcard_to_regex_no_wildcard() {
// Pattern without wildcards returns substring matcher
assert wildcard_to_regex('config') == '.*config.*'
assert wildcard_to_regex('test.txt') == '.*test\\.txt.*'
assert wildcard_to_regex('hello') == '.*hello.*'
}
fn test_wildcard_to_regex_start_wildcard() {
// Pattern starting with *
assert wildcard_to_regex('*.txt') == '.*\\.txt'
assert wildcard_to_regex('*.v') == '.*\\.v'
assert wildcard_to_regex('*.log') == '.*\\.log'
}
fn test_wildcard_to_regex_end_wildcard() {
// Pattern ending with *
assert wildcard_to_regex('test*') == 'test.*'
assert wildcard_to_regex('log*') == 'log.*'
assert wildcard_to_regex('file_*') == 'file_.*'
}
fn test_wildcard_to_regex_middle_wildcard() {
// Pattern with * in the middle
assert wildcard_to_regex('test*file') == 'test.*file'
assert wildcard_to_regex('src*main.v') == 'src.*main\\.v'
}
fn test_wildcard_to_regex_multiple_wildcards() {
// Pattern with multiple wildcards
assert wildcard_to_regex('*test*') == '.*test.*'
assert wildcard_to_regex('*src*.v') == '.*src.*\\.v'
assert wildcard_to_regex('*a*b*c*') == '.*a.*b.*c.*'
}
fn test_wildcard_to_regex_only_wildcard() {
// Pattern with only wildcard(s)
assert wildcard_to_regex('*') == '.*'
assert wildcard_to_regex('**') == '.*.*'
}
fn test_wildcard_to_regex_special_chars_in_pattern() {
// Patterns containing special regex characters should be escaped
assert wildcard_to_regex('[test]') == '.*\\[test\\].*'
assert wildcard_to_regex('test.file') == '.*test\\.file.*'
assert wildcard_to_regex('(test)') == '.*\\(test\\).*'
}
fn test_wildcard_to_regex_edge_cases() {
assert wildcard_to_regex('') == '.*.*'
assert wildcard_to_regex('a') == '.*a.*'
assert wildcard_to_regex('.') == '.*\\..*'
}