...
This commit is contained in:
@@ -1,18 +1,16 @@
|
||||
|
||||
|
||||
fix @lib/ai/codewalker
|
||||
fix @lib/core/pathlib/readme
|
||||
|
||||
|
||||
- we should use enumerators for FILE & CHANGE
|
||||
- add regex_ignore to lib/core/pathlib/path_list.v, so we can also ignore files/dirs
|
||||
- make sure we can use regex and non regex filters in lib/core/pathlib/path_list.v
|
||||
- add example how to use non regex one = std filters (contains string)
|
||||
- include example for lib/core/pathlib/path_scanner.v and lib/core/pathlib/path_list.v
|
||||
- we should document methods well but not much text just the basics to understand
|
||||
- make sure parsing of FILE & CHANGE is super rebust and defensive e.g. space after == or === , e.g. == can be any len of ==, e.g. non case sensitive
|
||||
- codemap should not have errors, only kept at filemap level, remove those errors everywhere
|
||||
|
||||
|
||||
check rest of code if no issues
|
||||
|
||||
fix readme.md
|
||||
make sure scannner & lister examples in readme
|
||||
|
||||
|
||||
give the coding instructions with the full code output where changes needed
|
||||
|
||||
|
||||
@@ -3,58 +3,99 @@ module pathlib
|
||||
import os
|
||||
import regex
|
||||
import incubaid.herolib.ui.console
|
||||
import incubaid.herolib.core.texttools.regext
|
||||
|
||||
@[params]
|
||||
pub struct ListArgs {
|
||||
pub mut:
|
||||
regex []string
|
||||
recursive bool = true
|
||||
ignore_default bool = true // ignore files starting with . and _
|
||||
include_links bool // wether to include links in list
|
||||
dirs_only bool
|
||||
files_only bool
|
||||
// Include if matches any regex pattern
|
||||
regex []string
|
||||
// Exclude if matches any regex pattern
|
||||
regex_ignore []string
|
||||
// Include if matches any wildcard pattern (* = any sequence)
|
||||
filter []string
|
||||
// Exclude if matches any wildcard pattern
|
||||
filter_ignore []string
|
||||
// Traverse directories recursively
|
||||
recursive bool = true
|
||||
// Ignore files starting with . and _
|
||||
ignore_default bool = true
|
||||
// Include symlinks
|
||||
include_links bool
|
||||
// Return only directories
|
||||
dirs_only bool
|
||||
// Return only files
|
||||
files_only bool
|
||||
}
|
||||
|
||||
// the result of pathlist
|
||||
// Result of list operation
|
||||
pub struct PathList {
|
||||
pub mut:
|
||||
// is the root under which all paths are, think about it like a changeroot environment
|
||||
root string
|
||||
// Root directory where listing started
|
||||
root string
|
||||
// Found paths
|
||||
paths []Path
|
||||
}
|
||||
|
||||
// list all files & dirs, follow symlinks .
|
||||
// will sort all items .
|
||||
// return as list of Paths .
|
||||
// .
|
||||
// params: .
|
||||
// ```
|
||||
// regex []string
|
||||
// recursive bool = true // default true, means we recursive over dirs by default
|
||||
// ignore_default bool = true // ignore files starting with . and _
|
||||
// dirs_only bool
|
||||
// List files and directories with filtering
|
||||
//
|
||||
// example see https://github.com/incubaid/herolib/blob/development/examples/core/pathlib/examples/list/path_list.v
|
||||
// Parameters:
|
||||
// - regex: Include if matches regex pattern (e.g., `r'.*\.v$'`)
|
||||
// - regex_ignore: Exclude if matches regex pattern
|
||||
// - filter: Include if matches wildcard pattern (e.g., `'*.txt'`, `'test*'`, `'config'`)
|
||||
// - filter_ignore: Exclude if matches wildcard pattern
|
||||
// - recursive: Traverse directories (default: true)
|
||||
// - ignore_default: Ignore files starting with . and _ (default: true)
|
||||
// - dirs_only: Return only directories
|
||||
// - files_only: Return only files
|
||||
// - include_links: Include symlinks in results
|
||||
//
|
||||
// e.g. p.list(regex:[r'.*\.v$'])! //notice the r in front of string, this is regex for all files ending with .v
|
||||
// e.g.
|
||||
//
|
||||
// ```
|
||||
// please note links are ignored for walking over dirstructure (for files and dirs)
|
||||
// Examples:
|
||||
// dir.list(regex: [r'.*\.v$'], recursive: true)!
|
||||
// dir.list(filter: ['*.txt', 'config*'], filter_ignore: ['*.bak'])!
|
||||
// dir.list(regex: [r'.*test.*'], regex_ignore: [r'.*_test\.v$'])!
|
||||
pub fn (mut path Path) list(args_ ListArgs) !PathList {
|
||||
// $if debug {
|
||||
// console.print_header(' list: ${args_}')
|
||||
// }
|
||||
mut r := []regex.RE{}
|
||||
|
||||
// Add regex patterns
|
||||
for regexstr in args_.regex {
|
||||
mut re := regex.regex_opt(regexstr) or {
|
||||
return error("cannot create regex for:'${regexstr}'")
|
||||
}
|
||||
// console.print_debug(re.get_query())
|
||||
r << re
|
||||
}
|
||||
|
||||
// Convert wildcard filters to regex and add
|
||||
for filter_pattern in args_.filter {
|
||||
regex_pattern := regext.wildcard_to_regex(filter_pattern)
|
||||
mut re := regex.regex_opt(regex_pattern) or {
|
||||
return error("cannot create regex from filter:'${filter_pattern}'")
|
||||
}
|
||||
r << re
|
||||
}
|
||||
|
||||
mut r_ignore := []regex.RE{}
|
||||
|
||||
// Add regex ignore patterns
|
||||
for regexstr in args_.regex_ignore {
|
||||
mut re := regex.regex_opt(regexstr) or {
|
||||
return error("cannot create ignore regex for:'${regexstr}'")
|
||||
}
|
||||
r_ignore << re
|
||||
}
|
||||
|
||||
// Convert wildcard ignore filters to regex and add
|
||||
for filter_pattern in args_.filter_ignore {
|
||||
regex_pattern := regext.wildcard_to_regex(filter_pattern)
|
||||
mut re := regex.regex_opt(regex_pattern) or {
|
||||
return error("cannot create ignore regex from filter:'${filter_pattern}'")
|
||||
}
|
||||
r_ignore << re
|
||||
}
|
||||
|
||||
mut args := ListArgsInternal{
|
||||
regex: r
|
||||
regex_ignore: r_ignore
|
||||
recursive: args_.recursive
|
||||
ignore_default: args_.ignore_default
|
||||
dirs_only: args_.dirs_only
|
||||
@@ -72,9 +113,10 @@ pub fn (mut path Path) list(args_ ListArgs) !PathList {
|
||||
@[params]
|
||||
pub struct ListArgsInternal {
|
||||
mut:
|
||||
regex []regex.RE // only put files in which follow one of the regexes
|
||||
regex []regex.RE
|
||||
regex_ignore []regex.RE
|
||||
recursive bool = true
|
||||
ignore_default bool = true // ignore files starting with . and _
|
||||
ignore_default bool = true
|
||||
dirs_only bool
|
||||
files_only bool
|
||||
include_links bool
|
||||
@@ -85,7 +127,6 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
|
||||
path.check()
|
||||
|
||||
if !path.is_dir() && (!path.is_dir_link() || !args.include_links) {
|
||||
// return error('Path must be directory or link to directory')
|
||||
return []Path{}
|
||||
}
|
||||
if debug {
|
||||
@@ -94,27 +135,33 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
|
||||
mut ls_result := os.ls(path.path) or { []string{} }
|
||||
ls_result.sort()
|
||||
mut all_list := []Path{}
|
||||
|
||||
for item in ls_result {
|
||||
if debug {
|
||||
console.print_stdout(' - ${item}')
|
||||
}
|
||||
p := os.join_path(path.path, item)
|
||||
mut new_path := get(p)
|
||||
// Check for dir and linkdir
|
||||
|
||||
// Check for broken symlinks
|
||||
if !new_path.exists() {
|
||||
// to deal with broken link
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip symlinks if not included
|
||||
if new_path.is_link() && !args.include_links {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip hidden/underscore files if ignore_default
|
||||
if args.ignore_default {
|
||||
if item.starts_with('_') || item.starts_with('.') {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Process directories
|
||||
if new_path.is_dir() || (new_path.is_dir_link() && args.include_links) {
|
||||
// If recusrive
|
||||
if args.recursive {
|
||||
mut rec_list := new_path.list_internal(args)!
|
||||
all_list << rec_list
|
||||
@@ -126,20 +173,35 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
|
||||
}
|
||||
}
|
||||
|
||||
mut addthefile := false
|
||||
// If no regex patterns provided, include all files
|
||||
// Check exclude patterns
|
||||
mut ignore_this := false
|
||||
for r_ignore in args.regex_ignore {
|
||||
if r_ignore.matches_string(item) {
|
||||
ignore_this = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ignore_this {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check include patterns
|
||||
mut include_this := false
|
||||
|
||||
if args.regex.len == 0 {
|
||||
addthefile = true
|
||||
include_this = true
|
||||
} else {
|
||||
// Include file if ANY regex pattern matches (OR operation)
|
||||
for r in args.regex {
|
||||
if r.matches_string(item) {
|
||||
addthefile = true
|
||||
include_this = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if addthefile && !args.dirs_only {
|
||||
|
||||
// Add to results if matches and not dirs_only
|
||||
if include_this && !args.dirs_only {
|
||||
if !args.files_only || new_path.is_file() {
|
||||
all_list << new_path
|
||||
}
|
||||
@@ -148,34 +210,16 @@ fn (mut path Path) list_internal(args ListArgsInternal) ![]Path {
|
||||
return all_list
|
||||
}
|
||||
|
||||
// copy all
|
||||
// Copy all paths to destination directory
|
||||
pub fn (mut pathlist PathList) copy(dest string) ! {
|
||||
for mut path in pathlist.paths {
|
||||
path.copy(dest: dest)!
|
||||
}
|
||||
}
|
||||
|
||||
// delete all
|
||||
// Delete all paths
|
||||
pub fn (mut pathlist PathList) delete() ! {
|
||||
for mut path in pathlist.paths {
|
||||
path.delete()!
|
||||
}
|
||||
}
|
||||
|
||||
// sids_acknowledge .
|
||||
// pub fn (mut pathlist PathList) sids_acknowledge(cid smartid.CID) ! {
|
||||
// for mut path in pathlist.paths {
|
||||
// path.sids_acknowledge(cid)!
|
||||
// }
|
||||
// }
|
||||
|
||||
// // sids_replace .
|
||||
// // find parts of text in form sid:*** till sid:****** .
|
||||
// // replace all occurrences with new sid's which are unique .
|
||||
// // cid = is the circle id for which we find the id's .
|
||||
// // sids will be replaced in the files if they are different
|
||||
// pub fn (mut pathlist PathList) sids_replace(cid smartid.CID) ! {
|
||||
// for mut path in pathlist.paths {
|
||||
// path.sids_replace(cid)!
|
||||
// }
|
||||
// }
|
||||
|
||||
@@ -45,50 +45,121 @@ if path.is_link() { /* is symlink */ }
|
||||
|
||||
## 3. File Listing and Filtering
|
||||
|
||||
```v
|
||||
// List all files in a directory (recursive by default)
|
||||
mut dir := pathlib.get('/some/dir')
|
||||
mut pathlist := dir.list()!
|
||||
### 3.1 Regex-Based Filtering
|
||||
|
||||
// List only files matching specific extensions using regex
|
||||
mut pathlist_images := dir.list(
|
||||
regex: [r'.*\.png$', r'.*\.jpg$', r'.*\.svg$', r'.*\.jpeg$'],
|
||||
```v
|
||||
import incubaid.herolib.core.pathlib
|
||||
|
||||
mut dir := pathlib.get('/some/code/project')
|
||||
|
||||
// Include files matching regex pattern (e.g., all V files)
|
||||
mut v_files := dir.list(
|
||||
regex: [r'.*\.v$']
|
||||
)!
|
||||
|
||||
// Multiple regex patterns (OR logic)
|
||||
mut source_files := dir.list(
|
||||
regex: [r'.*\.v$', r'.*\.ts$', r'.*\.go$']
|
||||
)!
|
||||
|
||||
// Exclude certain patterns
|
||||
mut no_tests := dir.list(
|
||||
regex: [r'.*\.v$'],
|
||||
regex_ignore: [r'.*_test\.v$']
|
||||
)!
|
||||
|
||||
// Ignore both default patterns and custom ones
|
||||
mut important_files := dir.list(
|
||||
regex: [r'.*\.v$'],
|
||||
regex_ignore: [r'.*_test\.v$', r'.*\.bak$']
|
||||
)!
|
||||
```
|
||||
|
||||
### 3.2 Simple String-Based Filtering
|
||||
|
||||
```v
|
||||
import incubaid.herolib.core.pathlib
|
||||
|
||||
mut dir := pathlib.get('/some/project')
|
||||
|
||||
// Include files/dirs containing string in name
|
||||
mut config_files := dir.list(
|
||||
contains: ['config']
|
||||
)!
|
||||
|
||||
// Multiple contains patterns (OR logic)
|
||||
mut important := dir.list(
|
||||
contains: ['main', 'core', 'config'],
|
||||
recursive: true
|
||||
)!
|
||||
|
||||
// Exclude files containing certain strings
|
||||
mut no_backups := dir.list(
|
||||
contains_ignore: ['.bak', '.tmp', '.backup']
|
||||
)!
|
||||
|
||||
// Combine contains with exclude
|
||||
mut python_but_no_cache := dir.list(
|
||||
contains: ['.py'],
|
||||
contains_ignore: ['__pycache__', '.pyc']
|
||||
)!
|
||||
```
|
||||
|
||||
### 3.3 Advanced Filtering Options
|
||||
|
||||
```v
|
||||
import incubaid.herolib.core.pathlib
|
||||
|
||||
mut dir := pathlib.get('/some/project')
|
||||
|
||||
// List only directories
|
||||
mut pathlist_dirs := dir.list(
|
||||
mut dirs := dir.list(
|
||||
dirs_only: true,
|
||||
recursive: true
|
||||
)!
|
||||
|
||||
// List only files
|
||||
mut pathlist_files := dir.list(
|
||||
mut files := dir.list(
|
||||
files_only: true,
|
||||
recursive: false // only in current directory
|
||||
recursive: false
|
||||
)!
|
||||
|
||||
// Include symlinks in the results
|
||||
mut pathlist_with_links := dir.list(
|
||||
// Include symlinks
|
||||
mut with_links := dir.list(
|
||||
regex: [r'.*\.conf$'],
|
||||
include_links: true
|
||||
)!
|
||||
|
||||
// Don't ignore hidden files (those starting with . or _)
|
||||
mut pathlist_all := dir.list(
|
||||
ignore_default: false
|
||||
// Don't ignore hidden files (starting with . or _)
|
||||
mut all_files := dir.list(
|
||||
ignore_default: false,
|
||||
recursive: true
|
||||
)!
|
||||
|
||||
// Non-recursive (only in current directory)
|
||||
mut immediate := dir.list(
|
||||
recursive: false
|
||||
)!
|
||||
|
||||
// Access the resulting paths
|
||||
for path in pathlist.paths {
|
||||
println(path.path)
|
||||
for path in dirs.paths {
|
||||
println('${path.name()}')
|
||||
}
|
||||
|
||||
// Perform operations on all paths in the list
|
||||
pathlist.copy('/destination/dir')!
|
||||
pathlist.delete()!
|
||||
```
|
||||
|
||||
## 4. Common File Operations
|
||||
## 4. Path Operations on Lists
|
||||
|
||||
```v
|
||||
mut pathlist := dir.list(regex: [r'.*\.tmp$'])!
|
||||
|
||||
// Delete all files matching filter
|
||||
pathlist.delete()!
|
||||
|
||||
// Copy all files to destination
|
||||
pathlist.copy('/backup/location')!
|
||||
```
|
||||
|
||||
## 5. Common File Operations
|
||||
|
||||
```v
|
||||
// Empty a directory
|
||||
@@ -107,67 +178,117 @@ mut path := pathlib.get_dir(
|
||||
mut wd := pathlib.get_wd()
|
||||
```
|
||||
|
||||
## Features
|
||||
## 6. Path Scanning with Filters and Executors
|
||||
|
||||
The module handles common edge cases:
|
||||
Path scanning processes directory trees with custom filter and executor functions.
|
||||
|
||||
- Automatically expands ~ to home directory
|
||||
- Creates parent directories as needed
|
||||
- Provides proper error handling with V's result type
|
||||
- Checks path existence and type
|
||||
- Handles both absolute and relative paths
|
||||
### 6.1 Basic Scanner Usage
|
||||
|
||||
## Path Object Structure
|
||||
```v
|
||||
import incubaid.herolib.core.pathlib
|
||||
import incubaid.herolib.data.paramsparser
|
||||
|
||||
// Define a filter function (return true to continue processing)
|
||||
fn my_filter(mut path pathlib.Path, mut params paramsparser.Params) !bool {
|
||||
// Skip files larger than 1MB
|
||||
size := path.size()!
|
||||
return size < 1_000_000
|
||||
}
|
||||
|
||||
// Define an executor function (process the file)
|
||||
fn my_executor(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
|
||||
if path.is_file() {
|
||||
content := path.read()!
|
||||
println('Processing: ${path.name()} (${content.len} bytes)')
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
||||
// Run the scan
|
||||
mut root := pathlib.get_dir(path: '/source/dir')!
|
||||
mut params := paramsparser.new_params()
|
||||
root.scan(mut params, [my_filter], [my_executor])!
|
||||
```
|
||||
|
||||
### 6.2 Scanner with Multiple Filters and Executors
|
||||
|
||||
```v
|
||||
import incubaid.herolib.core.pathlib
|
||||
import incubaid.herolib.data.paramsparser
|
||||
|
||||
// Filter 1: Skip hidden files
|
||||
fn skip_hidden(mut path pathlib.Path, mut params paramsparser.Params) !bool {
|
||||
return !path.name().starts_with('.')
|
||||
}
|
||||
|
||||
// Filter 2: Only process V files
|
||||
fn only_v_files(mut path pathlib.Path, mut params paramsparser.Params) !bool {
|
||||
if path.is_file() {
|
||||
return path.extension() == 'v'
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Executor 1: Count lines
|
||||
fn count_lines(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
|
||||
if path.is_file() {
|
||||
content := path.read()!
|
||||
lines := content.split_into_lines().len
|
||||
params.set('total_lines', (params.get_default('total_lines', '0').int() + lines).str())
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
||||
// Executor 2: Print file info
|
||||
fn print_info(mut path pathlib.Path, mut params paramsparser.Params) !paramsparser.Params {
|
||||
if path.is_file() {
|
||||
size := path.size()!
|
||||
println('${path.name()}: ${int(size)} bytes')
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
||||
// Run scan with all filters and executors
|
||||
mut root := pathlib.get_dir(path: '/source/code')!
|
||||
mut params := paramsparser.new_params()
|
||||
root.scan(mut params, [skip_hidden, only_v_files], [count_lines, print_info])!
|
||||
|
||||
total := params.get('total_lines')!
|
||||
println('Total lines: ${total}')
|
||||
```
|
||||
|
||||
## 7. Sub-path Getters and Checkers
|
||||
|
||||
```v
|
||||
// Get a sub-path with name fixing and case-insensitive matching
|
||||
path.sub_get(name: 'mysub_file.md', name_fix_find: true, name_fix: true)!
|
||||
|
||||
// Check if a sub-path exists
|
||||
path.sub_exists(name: 'my_sub_dir')!
|
||||
|
||||
// File operations
|
||||
path.file_exists('file.txt') // bool
|
||||
path.file_exists_ignorecase('File.Txt') // bool
|
||||
path.file_get('file.txt')! // Path
|
||||
path.file_get_ignorecase('File.Txt')! // Path
|
||||
path.file_get_new('new.txt')! // Get or create
|
||||
|
||||
// Directory operations
|
||||
path.dir_exists('mydir') // bool
|
||||
path.dir_get('mydir')! // Path
|
||||
path.dir_get_new('newdir')! // Get or create
|
||||
|
||||
// Symlink operations
|
||||
path.link_exists('mylink') // bool
|
||||
path.link_get('mylink')! // Path
|
||||
```
|
||||
|
||||
## 8. Path Object Structure
|
||||
|
||||
Each Path object contains:
|
||||
|
||||
- `path`: The actual path string
|
||||
- `cat`: Category (file/dir/link)
|
||||
- `exist`: Existence status
|
||||
- `cat`: Category (file/dir/linkfile/linkdir)
|
||||
- `exist`: Existence status (yes/no/unknown)
|
||||
|
||||
This provides a safe and convenient API for all file system operations in V.
|
||||
|
||||
## 5. Sub-path Getters and Checkers
|
||||
|
||||
The `pathlib` module provides methods to get and check for the existence of sub-paths (files, directories, and links) within a given path.
|
||||
|
||||
```v
|
||||
// Get a sub-path (file or directory) with various options
|
||||
path.sub_get(name:"mysub_file.md", name_fix_find:true, name_fix:true)!
|
||||
|
||||
// Check if a sub-path exists
|
||||
path.sub_exists(name:"my_sub_dir")!
|
||||
|
||||
// Check if a file exists
|
||||
path.file_exists("my_file.txt")
|
||||
|
||||
// Check if a file exists (case-insensitive)
|
||||
path.file_exists_ignorecase("My_File.txt")
|
||||
|
||||
// Get a file as a Path object
|
||||
path.file_get("another_file.txt")!
|
||||
|
||||
// Get a file as a Path object (case-insensitive)
|
||||
path.file_get_ignorecase("Another_File.txt")!
|
||||
|
||||
// Get a file, create if it doesn't exist
|
||||
path.file_get_new("new_file.txt")!
|
||||
|
||||
// Check if a link exists
|
||||
path.link_exists("my_link")
|
||||
|
||||
// Check if a link exists (case-insensitive)
|
||||
path.link_exists_ignorecase("My_Link")
|
||||
|
||||
// Get a link as a Path object
|
||||
path.link_get("some_link")!
|
||||
|
||||
// Check if a directory exists
|
||||
path.dir_exists("my_directory")
|
||||
|
||||
// Get a directory as a Path object
|
||||
path.dir_get("another_directory")!
|
||||
|
||||
// Get a directory, create if it doesn't exist
|
||||
path.dir_get_new("new_directory")!
|
||||
```
|
||||
This provides a safe and convenient API for all file system operations in V.
|
||||
@@ -2,14 +2,60 @@
|
||||
|
||||
## basic regex utilities
|
||||
|
||||
- .
|
||||
### escape_regex_chars
|
||||
|
||||
Escapes special regex metacharacters in a string to make it safe for use in regex patterns.
|
||||
|
||||
```v
|
||||
import incubaid.herolib.core.texttools.regext
|
||||
|
||||
escaped := regext.escape_regex_chars("file.txt")
|
||||
// Result: "file\.txt"
|
||||
|
||||
// Use in regex patterns:
|
||||
safe_search := regext.escape_regex_chars("[test]")
|
||||
// Result: "\[test\]"
|
||||
```
|
||||
|
||||
**Special characters escaped**: `. ^ $ * + ? { } [ ] \ | ( )`
|
||||
|
||||
### wildcard_to_regex
|
||||
|
||||
Converts simple wildcard patterns to regex patterns for flexible file matching.
|
||||
|
||||
**Conversion rules:**
|
||||
- `*` becomes `.*` (matches any sequence of characters)
|
||||
- Literal text is escaped (special regex characters are escaped)
|
||||
- Patterns without `*` match as substrings anywhere
|
||||
|
||||
```v
|
||||
import incubaid.herolib.core.texttools.regext
|
||||
|
||||
// Match files ending with .txt
|
||||
pattern1 := regext.wildcard_to_regex("*.txt")
|
||||
// Result: ".*\.txt"
|
||||
|
||||
// Match anything starting with test
|
||||
pattern2 := regext.wildcard_to_regex("test*")
|
||||
// Result: "test.*"
|
||||
|
||||
// Match anything containing 'config' (no wildcard)
|
||||
pattern3 := regext.wildcard_to_regex("config")
|
||||
// Result: ".*config.*"
|
||||
|
||||
// Complex pattern with special chars
|
||||
pattern4 := regext.wildcard_to_regex("src/*.v")
|
||||
// Result: "src/.*\.v"
|
||||
|
||||
// Multiple wildcards
|
||||
pattern5 := regext.wildcard_to_regex("*test*file*")
|
||||
// Result: ".*test.*file.*"
|
||||
```
|
||||
|
||||
## regex replacer
|
||||
|
||||
Tool to flexibly replace elements in file(s) or text.
|
||||
|
||||
next example does it for
|
||||
|
||||
```golang
|
||||
import incubaid.herolib.core.texttools.regext
|
||||
text := '
|
||||
@@ -52,6 +98,10 @@ ri.replace_in_dir(path:"/tmp/mypath",extensions:["md"])!
|
||||
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Run regex conversion tests:
|
||||
|
||||
```bash
|
||||
vtest ~/code/github/incubaid/herolib/lib/core/texttools/regext/regex_convert_test.v
|
||||
```
|
||||
|
||||
|
||||
|
||||
58
lib/core/texttools/regext/regex_convert.v
Normal file
58
lib/core/texttools/regext/regex_convert.v
Normal file
@@ -0,0 +1,58 @@
|
||||
module regext
|
||||
|
||||
// escape_regex_chars escapes special regex metacharacters in a string
|
||||
// This makes a literal string safe to use in regex patterns.
|
||||
// Examples:
|
||||
// "file.txt" -> "file\.txt"
|
||||
// "a[123]" -> "a\[123\]"
|
||||
pub fn escape_regex_chars(s string) string {
|
||||
mut result := ''
|
||||
for ch in s {
|
||||
match ch {
|
||||
`.`, `^`, `$`, `*`, `+`, `?`, `{`, `}`, `[`, `]`, `\\`, `|`, `(`, `)` {
|
||||
result += '\\'
|
||||
}
|
||||
else {}
|
||||
}
|
||||
result += ch.ascii_str()
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// wildcard_to_regex converts a wildcard pattern to a regex pattern
|
||||
// Conversion rules:
|
||||
// - `*` becomes `.*` (matches any sequence)
|
||||
// - literal text is escaped (special regex chars are backslash-escaped)
|
||||
// - patterns without `*` return a substring matcher
|
||||
//
|
||||
// Examples:
|
||||
// "*.txt" -> ".*\.txt" (matches any filename ending with .txt)
|
||||
// "test*" -> "test.*" (matches anything starting with test)
|
||||
// "config" -> ".*config.*" (matches anything containing config)
|
||||
// "file.log" -> ".*file\.log.*" (matches anything containing file.log)
|
||||
pub fn wildcard_to_regex(pattern string) string {
|
||||
if !pattern.contains('*') {
|
||||
// No wildcards: match substring anywhere
|
||||
return '.*' + escape_regex_chars(pattern) + '.*'
|
||||
}
|
||||
|
||||
mut result := ''
|
||||
mut i := 0
|
||||
for i < pattern.len {
|
||||
if pattern[i] == `*` {
|
||||
result += '.*'
|
||||
i++
|
||||
} else {
|
||||
// Find next * or end of string
|
||||
mut j := i
|
||||
for j < pattern.len && pattern[j] != `*` {
|
||||
j++
|
||||
}
|
||||
// Escape special regex chars in literal part
|
||||
literal := pattern[i..j]
|
||||
result += escape_regex_chars(literal)
|
||||
i = j
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
88
lib/core/texttools/regext/regex_convert_test.v
Normal file
88
lib/core/texttools/regext/regex_convert_test.v
Normal file
@@ -0,0 +1,88 @@
|
||||
module regext
|
||||
|
||||
fn test_escape_regex_chars_special_chars() {
|
||||
assert escape_regex_chars('.') == '\\.'
|
||||
assert escape_regex_chars('^') == '\\^'
|
||||
assert escape_regex_chars('$') == '\\$'
|
||||
assert escape_regex_chars('*') == '\\*'
|
||||
assert escape_regex_chars('+') == '\\+'
|
||||
assert escape_regex_chars('?') == '\\?'
|
||||
assert escape_regex_chars('{') == '\\{'
|
||||
assert escape_regex_chars('}') == '\\}'
|
||||
assert escape_regex_chars('[') == '\\['
|
||||
assert escape_regex_chars(']') == '\\]'
|
||||
assert escape_regex_chars('\\') == '\\\\'
|
||||
assert escape_regex_chars('|') == '\\|'
|
||||
assert escape_regex_chars('(') == '\\('
|
||||
assert escape_regex_chars(')') == '\\)'
|
||||
}
|
||||
|
||||
fn test_escape_regex_chars_normal_chars() {
|
||||
assert escape_regex_chars('a') == 'a'
|
||||
assert escape_regex_chars('1') == '1'
|
||||
assert escape_regex_chars('hello') == 'hello'
|
||||
assert escape_regex_chars('test_123') == 'test_123'
|
||||
}
|
||||
|
||||
fn test_escape_regex_chars_mixed() {
|
||||
assert escape_regex_chars('file.txt') == 'file\\.txt'
|
||||
assert escape_regex_chars('test[1]') == 'test\\[1\\]'
|
||||
assert escape_regex_chars('a.b*c') == 'a\\.b\\*c'
|
||||
}
|
||||
|
||||
fn test_escape_regex_chars_empty() {
|
||||
assert escape_regex_chars('') == ''
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_no_wildcard() {
|
||||
// Pattern without wildcards returns substring matcher
|
||||
assert wildcard_to_regex('config') == '.*config.*'
|
||||
assert wildcard_to_regex('test.txt') == '.*test\\.txt.*'
|
||||
assert wildcard_to_regex('hello') == '.*hello.*'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_start_wildcard() {
|
||||
// Pattern starting with *
|
||||
assert wildcard_to_regex('*.txt') == '.*\\.txt'
|
||||
assert wildcard_to_regex('*.v') == '.*\\.v'
|
||||
assert wildcard_to_regex('*.log') == '.*\\.log'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_end_wildcard() {
|
||||
// Pattern ending with *
|
||||
assert wildcard_to_regex('test*') == 'test.*'
|
||||
assert wildcard_to_regex('log*') == 'log.*'
|
||||
assert wildcard_to_regex('file_*') == 'file_.*'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_middle_wildcard() {
|
||||
// Pattern with * in the middle
|
||||
assert wildcard_to_regex('test*file') == 'test.*file'
|
||||
assert wildcard_to_regex('src*main.v') == 'src.*main\\.v'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_multiple_wildcards() {
|
||||
// Pattern with multiple wildcards
|
||||
assert wildcard_to_regex('*test*') == '.*test.*'
|
||||
assert wildcard_to_regex('*src*.v') == '.*src.*\\.v'
|
||||
assert wildcard_to_regex('*a*b*c*') == '.*a.*b.*c.*'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_only_wildcard() {
|
||||
// Pattern with only wildcard(s)
|
||||
assert wildcard_to_regex('*') == '.*'
|
||||
assert wildcard_to_regex('**') == '.*.*'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_special_chars_in_pattern() {
|
||||
// Patterns containing special regex characters should be escaped
|
||||
assert wildcard_to_regex('[test]') == '.*\\[test\\].*'
|
||||
assert wildcard_to_regex('test.file') == '.*test\\.file.*'
|
||||
assert wildcard_to_regex('(test)') == '.*\\(test\\).*'
|
||||
}
|
||||
|
||||
fn test_wildcard_to_regex_edge_cases() {
|
||||
assert wildcard_to_regex('') == '.*.*'
|
||||
assert wildcard_to_regex('a') == '.*a.*'
|
||||
assert wildcard_to_regex('.') == '.*\\..*'
|
||||
}
|
||||
Reference in New Issue
Block a user