the base
This commit is contained in:
77
lib/core/texttools/array.v
Normal file
77
lib/core/texttools/array.v
Normal file
@@ -0,0 +1,77 @@
|
||||
module texttools
|
||||
|
||||
// a comma or \n separated list gets converted to a list of strings .
|
||||
//'..' also gets converted to without ''
|
||||
// check also splitsmart which is more intelligent
|
||||
pub fn to_array(r string) []string {
|
||||
mut res := []string{}
|
||||
mut r2 := dedent(r)
|
||||
r2 = r2.replace(',', '\n')
|
||||
|
||||
for mut line in r2.split_into_lines() {
|
||||
line = line.trim_space()
|
||||
if line.trim('\'"') == '' {
|
||||
continue
|
||||
}
|
||||
res << line.trim("'")
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
pub fn to_array_int(r string) []int {
|
||||
mut r2 := to_array(r).map(it.int())
|
||||
return r2
|
||||
}
|
||||
|
||||
// intelligent way how to map a line to a map
|
||||
//```
|
||||
// r:=texttools.to_map("name,-,-,-,-,pid,-,-,-,-,path",
|
||||
// "root 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 /usr/sbin/distnoted\n \n")
|
||||
// assert {'name': 'root', 'pid': '1360', 'path': '/usr/sbin/distnoted'} == r
|
||||
|
||||
// r2:=texttools.to_map("name,-,-,-,-,pid,-,-,-,-,path",
|
||||
// "root 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 /usr/sbin/distnoted anotherone anotherone\n \n")
|
||||
// assert {'name': 'root', 'pid': '1360', 'path': '/usr/sbin/distnoted'} == r2
|
||||
|
||||
// r3:=texttools.to_map("name,-,-,-,-,pid,-,-,-,-,path",
|
||||
// "root 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 \n \n")
|
||||
// assert {'name': 'root', 'pid': '1360', 'path': ''} == r3
|
||||
//```
|
||||
pub fn to_map(mapstring string, line string, delimiter_ string) map[string]string {
|
||||
mapstring_array := split_smart(mapstring, '')
|
||||
mut line_array := split_smart(line, '')
|
||||
mut result := map[string]string{}
|
||||
for x in 0 .. mapstring_array.len {
|
||||
mapstring_item := mapstring_array[x] or { '' }
|
||||
if mapstring_item != '-' {
|
||||
result[mapstring_item] = line_array[x] or { '' }
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// smart way how to get useful info out of text block
|
||||
// ```
|
||||
// t:='
|
||||
// _cmiodalassistants 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 /usr/sbin/distnoted agent
|
||||
// _locationd 281 0.0 0.0 408185328 1344 ?? S 16Dec23 0:35.80 /usr/sbin/distnoted agent
|
||||
|
||||
// root 275 0.0 0.0 408311904 7296 ?? Ss 16Dec23 2:00.56 /usr/libexec/storagekitd
|
||||
// _coreaudiod 268 0.0 0.0 408185328 1344 ?? S 16Dec23 0:35.49 /usr/sbin/distnoted agent
|
||||
// '
|
||||
|
||||
// r4:=texttools.to_list_map("name,-,-,-,-,pid,-,-,-,-,path",t)
|
||||
// assert [{'name': '_cmiodalassistants', 'pid': '1360', 'path': '/usr/sbin/distnoted'},
|
||||
// {'name': '_locationd', 'pid': '1344', 'path': '/usr/sbin/distnoted'},
|
||||
// {'name': 'root', 'pid': '7296', 'path': '/usr/libexec/storagekitd'},
|
||||
// {'name': '_coreaudiod', 'pid': '1344', 'path': '/usr/sbin/distnoted'}] == r4
|
||||
// ```
|
||||
pub fn to_list_map(mapstring string, txt_ string, delimiter_ string) []map[string]string {
|
||||
mut result := []map[string]string{}
|
||||
mut txt := remove_empty_lines(txt_)
|
||||
txt = dedent(txt)
|
||||
for line in txt.split_into_lines() {
|
||||
result << to_map(mapstring, line, delimiter_)
|
||||
}
|
||||
return result
|
||||
}
|
||||
103
lib/core/texttools/clean.v
Normal file
103
lib/core/texttools/clean.v
Normal file
@@ -0,0 +1,103 @@
|
||||
// make sure that the names are always normalized so its easy to find them back
|
||||
module texttools
|
||||
|
||||
const ignore_for_name = '\\/[]()?!@#$%^&*<>:;{}|~'
|
||||
|
||||
const keep_ascii = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_-+={}[]"\':;?/>.<,|\\~` '
|
||||
|
||||
pub fn name_clean(r string) string {
|
||||
mut res := []string{}
|
||||
for ch in r {
|
||||
mut c := ch.ascii_str()
|
||||
if ignore_for_name.contains(c) {
|
||||
continue
|
||||
}
|
||||
res << c
|
||||
}
|
||||
return res.join('')
|
||||
}
|
||||
|
||||
// remove all chars which are not ascii
|
||||
pub fn ascii_clean(r string) string {
|
||||
mut res := []string{}
|
||||
for ch in r {
|
||||
mut c := ch.ascii_str()
|
||||
if keep_ascii.contains(c) {
|
||||
res << c
|
||||
}
|
||||
}
|
||||
return res.join('')
|
||||
}
|
||||
|
||||
// https://en.wikipedia.org/wiki/Unicode#Standardized_subsets
|
||||
|
||||
pub fn remove_empty_lines(text string) string {
|
||||
mut out := []string{}
|
||||
for l in text.split_into_lines() {
|
||||
if l.trim_space() == '' {
|
||||
continue
|
||||
}
|
||||
out << l
|
||||
}
|
||||
return out.join('\n')
|
||||
}
|
||||
|
||||
pub fn remove_double_lines(text string) string {
|
||||
mut out := []string{}
|
||||
mut prev := true
|
||||
for l in text.split_into_lines() {
|
||||
if l.trim_space() == '' {
|
||||
if prev {
|
||||
continue
|
||||
}
|
||||
out << ''
|
||||
prev = true
|
||||
continue
|
||||
}
|
||||
prev = false
|
||||
out << l
|
||||
}
|
||||
if out.len > 0 && out.last() == '' {
|
||||
out.pop()
|
||||
}
|
||||
return out.join('\n')
|
||||
}
|
||||
|
||||
// remove ```?? ``` , can be over multiple lines .
|
||||
// also removes double lines
|
||||
pub fn remove_empty_js_blocks(text string) string {
|
||||
mut out := []string{}
|
||||
mut block_capture_pre := ''
|
||||
mut block_capture_inside := []string{}
|
||||
mut foundblock := false
|
||||
for l in text.split_into_lines() {
|
||||
lt := l.trim_space()
|
||||
if lt.starts_with('```') || lt.starts_with("'''") || lt.starts_with('"""') {
|
||||
if foundblock {
|
||||
if block_capture_inside.filter(it.trim_space() != '').len > 0 {
|
||||
// now we know the block inside is not empty
|
||||
out << block_capture_pre
|
||||
out << block_capture_inside
|
||||
out << l // the last line
|
||||
}
|
||||
foundblock = false
|
||||
block_capture_pre = ''
|
||||
block_capture_inside = []string{}
|
||||
continue
|
||||
} else {
|
||||
foundblock = true
|
||||
block_capture_pre = l
|
||||
continue
|
||||
}
|
||||
}
|
||||
if foundblock {
|
||||
block_capture_inside << l
|
||||
} else {
|
||||
out << l
|
||||
}
|
||||
}
|
||||
if out.len > 0 && out.last() == '' {
|
||||
out.pop()
|
||||
}
|
||||
return remove_double_lines(out.join('\n'))
|
||||
}
|
||||
49
lib/core/texttools/clean_test.v
Normal file
49
lib/core/texttools/clean_test.v
Normal file
@@ -0,0 +1,49 @@
|
||||
module texttools
|
||||
|
||||
fn test_clean1() {
|
||||
mut text := "
|
||||
'''js
|
||||
|
||||
'''
|
||||
something
|
||||
yes
|
||||
|
||||
else
|
||||
|
||||
```js
|
||||
|
||||
```
|
||||
|
||||
'''js
|
||||
|
||||
inside
|
||||
'''
|
||||
|
||||
|
||||
"
|
||||
|
||||
mut result := "
|
||||
something
|
||||
yes
|
||||
|
||||
else
|
||||
|
||||
'''js
|
||||
|
||||
inside
|
||||
'''
|
||||
"
|
||||
|
||||
text = dedent(text)
|
||||
result = dedent(result)
|
||||
|
||||
text2 := remove_double_lines(remove_empty_js_blocks(text))
|
||||
|
||||
print('---')
|
||||
print(text2)
|
||||
print('---')
|
||||
print(result)
|
||||
print('---')
|
||||
|
||||
assert text2.trim_space() == result.trim_space()
|
||||
}
|
||||
106
lib/core/texttools/cmdline_parser.v
Normal file
106
lib/core/texttools/cmdline_parser.v
Normal file
@@ -0,0 +1,106 @@
|
||||
module texttools
|
||||
|
||||
enum TextArgsStatus {
|
||||
start
|
||||
quote // quote found means value in between ''
|
||||
}
|
||||
|
||||
// remove all '..' and "..." from a text, so everything in between the quotes
|
||||
pub fn text_remove_quotes(text string) string {
|
||||
mut out := ''
|
||||
mut inquote := false
|
||||
mut ch := ''
|
||||
mut char_previous := ''
|
||||
for i in 0 .. text.len {
|
||||
ch = text[i..i + 1]
|
||||
if ch in ['"', "'"] {
|
||||
if char_previous != '\\' {
|
||||
inquote = !inquote
|
||||
char_previous = ch
|
||||
continue
|
||||
}
|
||||
}
|
||||
if !inquote {
|
||||
// unmodified add, because we are in quote
|
||||
out += ch
|
||||
}
|
||||
char_previous = ch
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// test if an element off the array exists in the text but ignore quotes
|
||||
pub fn check_exists_outside_quotes(text string, items []string) bool {
|
||||
text2 := text_remove_quotes(text)
|
||||
for i in items {
|
||||
if text2.contains(i) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// convert text string to arguments
|
||||
// \n supported but will be \\n and only supported within '' or ""
|
||||
// \' not modified, same for \"
|
||||
pub fn cmd_line_args_parser(text string) ![]string {
|
||||
mut res := []string{}
|
||||
mut quote := ''
|
||||
mut char_previous := ''
|
||||
mut arg := ''
|
||||
mut ch := ''
|
||||
|
||||
if check_exists_outside_quotes(text, ['<', '>', '|']) {
|
||||
if !(text.contains(' ')) {
|
||||
return error("cannot convert text '${text}' to args because no space to split")
|
||||
}
|
||||
splitted := text.split_nth(' ', 2)
|
||||
return [splitted[0], splitted[1]]
|
||||
}
|
||||
for i in 0 .. text.len {
|
||||
ch = text[i..i + 1]
|
||||
// skip spaces which are not escaped
|
||||
if ch == ' ' && arg == '' {
|
||||
continue
|
||||
}
|
||||
|
||||
if ch in ['"', "'"] {
|
||||
if char_previous != '\\' {
|
||||
if quote == '' {
|
||||
// beginning of quote need to close off previous arg
|
||||
if arg != '' {
|
||||
res << arg.trim(' ')
|
||||
arg = ''
|
||||
}
|
||||
quote = ch
|
||||
char_previous = ch
|
||||
continue
|
||||
} else {
|
||||
// end of quote
|
||||
quote = ''
|
||||
res << arg.trim(' ')
|
||||
arg = ''
|
||||
char_previous = ch
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if quote != '' {
|
||||
// unmodified add, because we are in quote
|
||||
arg += ch
|
||||
} else {
|
||||
if ch == ' ' && arg != '' {
|
||||
res << arg.trim(' ')
|
||||
arg = ''
|
||||
} else {
|
||||
arg += ch
|
||||
}
|
||||
}
|
||||
char_previous = ch
|
||||
}
|
||||
if arg != '' {
|
||||
res << arg.trim(' ')
|
||||
}
|
||||
return res
|
||||
}
|
||||
38
lib/core/texttools/cmdline_parser_test.v
Normal file
38
lib/core/texttools/cmdline_parser_test.v
Normal file
@@ -0,0 +1,38 @@
|
||||
module texttools
|
||||
|
||||
// how to process command lines
|
||||
fn test_cmdline_args() {
|
||||
mut r := []string{}
|
||||
r = cmd_line_args_parser("'aa bb' ' cc dd' one -two") or { panic(err) }
|
||||
assert r == ['aa bb', 'cc dd', 'one', '-two']
|
||||
r = cmd_line_args_parser("'\taa bb' ' cc dd' one -two") or { panic(err) }
|
||||
assert r == ['\taa bb', 'cc dd', 'one', '-two']
|
||||
// now spaces
|
||||
r = cmd_line_args_parser(" '\taa bb' ' cc dd' one -two ") or { panic(err) }
|
||||
assert r == ['\taa bb', 'cc dd', 'one', '-two']
|
||||
// now other quote
|
||||
r = cmd_line_args_parser('"aa bb" " cc dd" one -two') or { panic(err) }
|
||||
assert r == ['aa bb', 'cc dd', 'one', '-two']
|
||||
r = cmd_line_args_parser('"aa bb" \' cc dd\' one -two') or { panic(err) }
|
||||
assert r == ['aa bb', 'cc dd', 'one', '-two']
|
||||
|
||||
r = cmd_line_args_parser('find . /tmp') or { panic(err) }
|
||||
assert r == ['find', '.', '/tmp']
|
||||
|
||||
r = cmd_line_args_parser("bash -c 'find /'") or { panic(err) }
|
||||
assert r == ['bash', '-c', 'find /']
|
||||
|
||||
mut r2 := string('')
|
||||
r2 = text_remove_quotes('echo "hi >" > /tmp/a.txt')
|
||||
assert r2 == 'echo > /tmp/a.txt'
|
||||
r2 = text_remove_quotes("echo 'hi >' > /tmp/a.txt")
|
||||
assert r2 == 'echo > /tmp/a.txt'
|
||||
r2 = text_remove_quotes("echo 'hi >' /tmp/a.txt")
|
||||
assert r2 == 'echo /tmp/a.txt'
|
||||
assert check_exists_outside_quotes("echo 'hi >' > /tmp/a.txt", ['<', '>', '|'])
|
||||
assert check_exists_outside_quotes("echo 'hi ' /tmp/a.txt |", ['<', '>', '|'])
|
||||
assert !check_exists_outside_quotes("echo 'hi >' /tmp/a.txt", ['<', '>', '|'])
|
||||
|
||||
r = cmd_line_args_parser('echo "hi" > /tmp/a.txt') or { panic(err) }
|
||||
assert r == ['echo', '"hi" > /tmp/a.txt']
|
||||
}
|
||||
13
lib/core/texttools/expand.v
Normal file
13
lib/core/texttools/expand.v
Normal file
@@ -0,0 +1,13 @@
|
||||
module texttools
|
||||
|
||||
// texttools.expand('|', 20, ' ')
|
||||
pub fn expand(txt_ string, l int, expand_with string) string {
|
||||
mut txt := txt_
|
||||
for _ in 0 .. l {
|
||||
txt += expand_with
|
||||
}
|
||||
if txt.len > l {
|
||||
txt = txt[0..l]
|
||||
}
|
||||
return txt
|
||||
}
|
||||
46
lib/core/texttools/indent.v
Normal file
46
lib/core/texttools/indent.v
Normal file
@@ -0,0 +1,46 @@
|
||||
module texttools
|
||||
|
||||
pub fn indent(text string, prefix string) string {
|
||||
mut res := []string{}
|
||||
for line in text.split_into_lines() {
|
||||
res << prefix + line
|
||||
}
|
||||
mut t := res.join_lines()
|
||||
if !t.ends_with('\n') {
|
||||
t += '\n'
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// remove all leading spaces at same level
|
||||
pub fn dedent(text string) string {
|
||||
mut pre := 999
|
||||
mut pre_current := 0
|
||||
mut res := []string{}
|
||||
text_lines := text.split_into_lines()
|
||||
|
||||
for line2 in text_lines {
|
||||
if line2.trim_space() == '' {
|
||||
continue
|
||||
}
|
||||
line2_expanded_tab := line2.replace('\t', ' ')
|
||||
line2_expanded_tab_trimmed := line2_expanded_tab.trim_left(' ')
|
||||
pre_current = line2_expanded_tab.len - line2_expanded_tab_trimmed.len
|
||||
if pre > pre_current {
|
||||
pre = pre_current
|
||||
}
|
||||
}
|
||||
// now remove the prefix length
|
||||
for line2 in text_lines {
|
||||
line2_expanded_tab := line2.replace('\t', ' ') // important to deal with tabs
|
||||
line2_expanded_tab_trimmed := line2.trim_space()
|
||||
|
||||
if line2_expanded_tab_trimmed == '' {
|
||||
res << ''
|
||||
} else {
|
||||
res << line2_expanded_tab[pre..]
|
||||
}
|
||||
}
|
||||
final_result := res.join_lines()
|
||||
return final_result
|
||||
}
|
||||
15
lib/core/texttools/indent_test.v
Normal file
15
lib/core/texttools/indent_test.v
Normal file
@@ -0,0 +1,15 @@
|
||||
module texttools
|
||||
|
||||
fn test_dedent() {
|
||||
mut text := '
|
||||
a
|
||||
b
|
||||
|
||||
c
|
||||
d
|
||||
|
||||
|
||||
'
|
||||
text = dedent(text)
|
||||
assert text.len == 20
|
||||
}
|
||||
31
lib/core/texttools/is_tools.v
Normal file
31
lib/core/texttools/is_tools.v
Normal file
@@ -0,0 +1,31 @@
|
||||
module texttools
|
||||
|
||||
pub fn is_int(text string) bool {
|
||||
for cha in text {
|
||||
if cha < 48 || cha > 57 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
pub fn is_upper_text(text string) bool {
|
||||
for cha in text {
|
||||
if cha < 65 || cha > 90 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// fn sid_check(sid string) bool {
|
||||
// if sid.len > 6 || sid.len < 2 {
|
||||
// return false
|
||||
// }
|
||||
// for cha in sid {
|
||||
// if (cha < 48 || cha > 57) && (cha < 97 || cha > 122) {
|
||||
// return false
|
||||
// }
|
||||
// }
|
||||
// return true
|
||||
// }
|
||||
18
lib/core/texttools/is_tools_test.v
Normal file
18
lib/core/texttools/is_tools_test.v
Normal file
@@ -0,0 +1,18 @@
|
||||
module texttools
|
||||
|
||||
fn test_istest1() {
|
||||
assert is_int('0000')
|
||||
assert is_int('999')
|
||||
assert is_int('0')
|
||||
assert is_int('9')
|
||||
assert is_int('00 00') == false
|
||||
assert is_int('00a00') == false
|
||||
|
||||
assert is_upper_text('A')
|
||||
assert is_upper_text('Z')
|
||||
assert is_upper_text('AAZZZZAAA')
|
||||
assert is_upper_text('z') == false
|
||||
assert is_upper_text('AAZZZZaAA') == false
|
||||
assert is_upper_text('AAZZZZ?AA') == false
|
||||
assert is_upper_text("AAZZZZ'AA") == false
|
||||
}
|
||||
163
lib/core/texttools/multiline.v
Normal file
163
lib/core/texttools/multiline.v
Normal file
@@ -0,0 +1,163 @@
|
||||
module texttools
|
||||
|
||||
pub enum MultiLineStatus {
|
||||
start
|
||||
multiline
|
||||
comment
|
||||
}
|
||||
|
||||
// converst a multiline to a single line, keeping all relevant information
|
||||
// empty lines removed (unless if in parameter)
|
||||
// commented lines removed as well (starts with // and #)
|
||||
// multiline to 'line1\\nline2\\n'
|
||||
// dedent also done before putting in '...'
|
||||
// tabs also replaced to 4x space
|
||||
pub fn multiline_to_single(text string) !string {
|
||||
mut multiline_first := ''
|
||||
mut multiline := ''
|
||||
// mut comment_first:=""
|
||||
mut comment := []string{}
|
||||
mut line2 := ''
|
||||
mut res := []string{}
|
||||
mut state := MultiLineStatus.start
|
||||
for line in text.split_into_lines() {
|
||||
line2 = line
|
||||
line2 = line2.replace('\t', ' ')
|
||||
mut line2_trimmed := line2.trim_space()
|
||||
if state == .multiline {
|
||||
if multiline_end_check(line2_trimmed) {
|
||||
// means we are out of multiline
|
||||
res << multiline_end(multiline_first, multiline)
|
||||
multiline_first = ''
|
||||
multiline = ''
|
||||
state = .start
|
||||
} else {
|
||||
multiline += '${line2}\n'
|
||||
}
|
||||
continue
|
||||
}
|
||||
if state == .comment {
|
||||
if comment_end_check(line2_trimmed) {
|
||||
// means we are out of multiline
|
||||
res << comment_end(comment)
|
||||
comment = []string{}
|
||||
state = .start
|
||||
} else {
|
||||
comment << line2_trimmed
|
||||
continue
|
||||
}
|
||||
}
|
||||
if state == .start {
|
||||
if line2_trimmed == '' {
|
||||
continue
|
||||
}
|
||||
// deal with comments
|
||||
mut commentpart := ''
|
||||
line2_trimmed, commentpart = comment_start_check(mut res, line2_trimmed)
|
||||
if commentpart.len > 0 {
|
||||
state = .comment
|
||||
comment = []string{}
|
||||
comment << commentpart
|
||||
continue
|
||||
}
|
||||
if multiline_start_check(line2_trimmed) {
|
||||
// means is multiline
|
||||
state = .multiline
|
||||
multiline_first = line2_trimmed
|
||||
continue
|
||||
}
|
||||
res << line2_trimmed.trim('\n ')
|
||||
}
|
||||
}
|
||||
// last one
|
||||
if state == .multiline {
|
||||
res << multiline_end(multiline_first, multiline)
|
||||
}
|
||||
if state == .comment {
|
||||
res << comment_end(comment)
|
||||
}
|
||||
return res.join(' ')
|
||||
}
|
||||
|
||||
fn multiline_end(multiline_first string, multiline string) string {
|
||||
mut multiline2 := multiline
|
||||
|
||||
multiline2 = dedent(multiline2)
|
||||
multiline2 = multiline2.replace('\n', '\\\\n')
|
||||
multiline2 = multiline2.replace("'", '"')
|
||||
|
||||
firstline_content := multiline_first.all_after_first(':').trim_left('" \'')
|
||||
name := multiline_first.all_before(':').trim_space()
|
||||
|
||||
if firstline_content.trim_space() != '' {
|
||||
multiline2 = "${name}:'${multiline_first}\\n${multiline2}'"
|
||||
} else {
|
||||
multiline2 = "${name}:'${multiline2}'"
|
||||
}
|
||||
return multiline2
|
||||
}
|
||||
|
||||
// check that there is multiline start
|
||||
fn multiline_start_check(text_ string) bool {
|
||||
if text_ == '' {
|
||||
return false
|
||||
}
|
||||
text := text_.replace(': ', ':').replace(': ', ':').replace(': ', ':')
|
||||
for tocheck in [":'", ':"', ':"""', ":'''"] {
|
||||
if text.ends_with(tocheck) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
fn multiline_end_check(text string) bool {
|
||||
if text == "'" || text == '"' || text == '"""' || text == "'''" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// return all before comment and if comment
|
||||
// return trimmedtext,commentpart
|
||||
fn comment_start_check(mut res []string, text_ string) (string, string) {
|
||||
mut text := text_
|
||||
if text.starts_with('<!--') {
|
||||
text = text.replace('<!--', '').trim_space()
|
||||
return '', text
|
||||
}
|
||||
if !(text.contains('//')) {
|
||||
return text, ''
|
||||
}
|
||||
mightbecomment := text.all_after_last('//')
|
||||
if !(mightbecomment.contains("'")) {
|
||||
// means we found a comment at end of line, and is not part of string statement (value)
|
||||
text = text.all_before_last('//').trim_space()
|
||||
if text.len > 0 {
|
||||
res << '//${mightbecomment}-/'
|
||||
return text, ''
|
||||
} else {
|
||||
return '', mightbecomment
|
||||
}
|
||||
}
|
||||
return text, ''
|
||||
}
|
||||
|
||||
fn comment_end_check(text string) bool {
|
||||
if text.ends_with('-->') {
|
||||
return true
|
||||
}
|
||||
if !text.starts_with('//') {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
fn comment_end(comment []string) string {
|
||||
mut out := []string{}
|
||||
for line in comment {
|
||||
out << line.trim(' <->/\n')
|
||||
}
|
||||
mut outstr := out.join('\\\\n')
|
||||
return '//${outstr}-/'
|
||||
}
|
||||
205
lib/core/texttools/mutliline_test.v
Normal file
205
lib/core/texttools/mutliline_test.v
Normal file
@@ -0,0 +1,205 @@
|
||||
module texttools
|
||||
|
||||
fn check_result(tocheck_ string, output string) {
|
||||
mut tocheck := tocheck_
|
||||
tocheck = tocheck.replace('\\n', '\\\\n')
|
||||
// tocheck=tocheck.replace("\'","\\'")
|
||||
tocheck = tocheck.trim_space()
|
||||
if tocheck == output.trim_space() {
|
||||
return
|
||||
}
|
||||
|
||||
panic('required result not correct.')
|
||||
}
|
||||
|
||||
fn test_multiline1() {
|
||||
mut text := "
|
||||
id:a1
|
||||
name:'need to do something 1'
|
||||
description:'
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
|
||||
'yes, this needs to work too'
|
||||
|
||||
- a
|
||||
- something else
|
||||
- 'something
|
||||
|
||||
### subtitle
|
||||
|
||||
```python
|
||||
#even code block in the other block, crazy parsing for sure
|
||||
def test():
|
||||
|
||||
```
|
||||
'
|
||||
"
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := 'id:a1 name:\'need to do something 1\' description:\'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens\\n\\n"yes, this needs to work too"\\n\\n- a\\n- something else\\n- "something\\n\\n### subtitle\\n\\n```python\\n#even code block in the other block, crazy parsing for sure\\ndef test():\\n\\n```\''
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
fn test_multiline2() {
|
||||
mut text := '
|
||||
id:a1
|
||||
name:\'need to do something 1\'
|
||||
description:"
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
\'
|
||||
'
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := "id:a1 name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens'"
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
fn test_multiline3() {
|
||||
mut text := '
|
||||
id:a1
|
||||
name:\'need to do something 1\'
|
||||
description: """
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
\'
|
||||
'
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := "id:a1 name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens'"
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
fn test_multiline4() {
|
||||
mut text := '
|
||||
id:a1
|
||||
name:\'need to do something 1\'
|
||||
description: """
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
"""
|
||||
'
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := "id:a1 name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens'"
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
fn test_multiline5() {
|
||||
mut text := "
|
||||
id:a1 //comment1
|
||||
// a comment
|
||||
name:'need to do something 1'
|
||||
description: '
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
'
|
||||
//another comment
|
||||
"
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := "//comment1-/ id:a1 //a comment-/ name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens' //another comment-/"
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
fn test_multiline6() {
|
||||
mut text := "
|
||||
id:a1 //comment1
|
||||
|
||||
// comment m 1
|
||||
// comment m 2
|
||||
//
|
||||
// comment m 3
|
||||
//
|
||||
|
||||
name:'need to do something 1'
|
||||
description: '
|
||||
## markdown works in it
|
||||
|
||||
description can be multiline
|
||||
lets see what happens
|
||||
'
|
||||
<!--another comment-->
|
||||
"
|
||||
text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
required_result := "//comment1-/ id:a1 //comment m 1\\ncomment m 2\\n\\ncomment m 3\\n-/ name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens' //another comment-/"
|
||||
|
||||
check_result(required_result, text)
|
||||
}
|
||||
|
||||
// @[assert_continues]
|
||||
// fn test_comment_start_check() {
|
||||
// // TEST: `hello // world, this is mario'`, `hello //world //this is mario`
|
||||
// mut res := []string{}
|
||||
// mut str := "hello // world, this is mario'"
|
||||
// mut text, mut comment := comment_start_check(mut res, str)
|
||||
|
||||
// assert text == 'hello'
|
||||
// assert res == ["// world, this is mario'-/"]
|
||||
// assert comment == ''
|
||||
|
||||
// res = []string{}
|
||||
// str = 'hello //world //this is mario'
|
||||
// text, comment = comment_start_check(mut res, str)
|
||||
|
||||
// assert text == 'hello'
|
||||
// assert res == ['//world //this is mario-/']
|
||||
// assert comment == ''
|
||||
// }
|
||||
|
||||
// @[assert_continues]
|
||||
// fn test_multiline_start_check() {
|
||||
// // TEST: `hello '''world:'''`, `hello ' world:'`, `hello " world:"`, `hello """ world: """`
|
||||
// mut text := ["hello '''world:'''", "hello ' world:'", 'hello " world:"', 'hello """ world: """',
|
||||
// 'hello world: """\n"""']
|
||||
// expected := [false, false, false, false, true]
|
||||
// for idx, input in text {
|
||||
// got := multiline_start_check(input)
|
||||
// assert got == expected[idx]
|
||||
// }
|
||||
// }
|
||||
|
||||
// TODO: not supported yet, requires a Comment Struct, which knows its <!-- format
|
||||
// fn test_multiline7() {
|
||||
// mut text := "
|
||||
// id:a1 //comment1
|
||||
|
||||
// <!-- comment m 1
|
||||
// comment m 2
|
||||
|
||||
// comment m 3
|
||||
// -->
|
||||
|
||||
// name:'need to do something 1'
|
||||
// description: '
|
||||
// ## markdown works in it
|
||||
|
||||
// description can be multiline
|
||||
// lets see what happens
|
||||
// '
|
||||
// <!--another comment-->
|
||||
// "
|
||||
// text = multiline_to_single(text) or { panic(err) }
|
||||
|
||||
// required_result:="//comment1-/ id:a1 //comment m 1\\ncomment m 2\\n\\ncomment m 3\\n-/ name:'need to do something 1' description:'## markdown works in it\\n\\ndescription can be multiline\\nlets see what happens' //another comment-/"
|
||||
|
||||
// check_result(required_result,text)
|
||||
|
||||
// }
|
||||
178
lib/core/texttools/namefix.v
Normal file
178
lib/core/texttools/namefix.v
Normal file
@@ -0,0 +1,178 @@
|
||||
// make sure that the names are always normalized so its easy to find them back
|
||||
module texttools
|
||||
|
||||
import os
|
||||
|
||||
pub fn email_fix(name string) !string {
|
||||
mut name2 := name.to_lower().trim_space()
|
||||
if name2.contains('<') {
|
||||
name2 = name2.split('<')[1].split('<')[0]
|
||||
}
|
||||
if !name2.is_ascii() {
|
||||
return error('email needs to be ascii, was ${name}')
|
||||
}
|
||||
if name2.contains(' ') {
|
||||
return error('email cannot have spaces, was ${name}')
|
||||
}
|
||||
return name2
|
||||
}
|
||||
|
||||
// like name_fix but _ becomes space
|
||||
pub fn name_fix_keepspace(name string) !string {
|
||||
mut name2 := name_fix(name)
|
||||
name2 = name2.replace('_', ' ')
|
||||
return name2
|
||||
}
|
||||
|
||||
// fix string which represenst a tel nr
|
||||
pub fn tel_fix(name_ string) !string {
|
||||
mut name := name_.to_lower().trim_space()
|
||||
for x in ['[', ']', '{', '}', '(', ')', '*', '-', '.', ' '] {
|
||||
name = name.replace(x, '')
|
||||
}
|
||||
if !name.is_ascii() {
|
||||
return error('email needs to be ascii, was ${name}')
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
pub fn wiki_fix(content_ string) string {
|
||||
mut content := content_
|
||||
for _ in 0 .. 5 {
|
||||
content = content.replace('\n\n\n', '\n\n')
|
||||
}
|
||||
content = content.replace('\n\n-', '\n-')
|
||||
return content
|
||||
}
|
||||
|
||||
pub fn action_multiline_fix(content string) string {
|
||||
if content.trim_space().contains('\n') {
|
||||
splitted := content.split('\n')
|
||||
mut out := '\n'
|
||||
for item in splitted {
|
||||
out += ' ${item}\n'
|
||||
}
|
||||
return out
|
||||
}
|
||||
return content.trim_space()
|
||||
}
|
||||
|
||||
pub fn name_fix(name string) string {
|
||||
name2 := name_fix_keepext(name)
|
||||
return name2
|
||||
}
|
||||
|
||||
pub fn name_fix_list(name string) []string {
|
||||
name2 := name_fix_keepext(name)
|
||||
return name2.split(',').map(it.trim_space()).map(name_fix(it))
|
||||
}
|
||||
|
||||
// get name back keep extensions and underscores, but when end on .md then remove extension
|
||||
pub fn name_fix_no_md(name string) string {
|
||||
name2 := name_fix_keepext(name)
|
||||
if name2.ends_with('.md') {
|
||||
name3 := name2[0..name2.len - 3]
|
||||
return name3
|
||||
}
|
||||
return name2
|
||||
}
|
||||
|
||||
pub fn name_fix_no_underscore(name string) string {
|
||||
mut name2 := name_fix_keepext(name)
|
||||
x := name2.replace('_', '')
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
pub fn name_fix_snake_to_pascal(name string) string {
|
||||
x := name.replace('_', ' ')
|
||||
p := x.title().replace(' ', '')
|
||||
return p
|
||||
}
|
||||
|
||||
pub fn name_fix_dot_notation_to_pascal(name string) string {
|
||||
x := name.replace('.', ' ')
|
||||
p := x.title().replace(' ', '')
|
||||
return p
|
||||
}
|
||||
|
||||
pub fn name_fix_pascal(name string) string {
|
||||
name_ := name_fix_snake_to_pascal(name)
|
||||
return name_fix_dot_notation_to_pascal(name_)
|
||||
}
|
||||
|
||||
pub fn name_fix_pascal_to_snake(name string) string {
|
||||
mut fixed := ''
|
||||
for i, c in name {
|
||||
if c.is_capital() && i != 0 {
|
||||
fixed += '_'
|
||||
}
|
||||
fixed += c.ascii_str()
|
||||
}
|
||||
return fixed.to_lower()
|
||||
}
|
||||
|
||||
pub fn name_fix_dot_notation_to_snake_case(name string) string {
|
||||
return name.replace('.', '_')
|
||||
}
|
||||
|
||||
// remove underscores and extension
|
||||
pub fn name_fix_no_underscore_no_ext(name_ string) string {
|
||||
return name_fix_keepext(name_).all_before_last('.').replace('_', '')
|
||||
}
|
||||
|
||||
// remove underscores and extension
|
||||
pub fn name_fix_no_ext(name_ string) string {
|
||||
return name_fix_keepext(name_).all_before_last('.').trim_right('_')
|
||||
}
|
||||
|
||||
pub fn name_fix_keepext(name_ string) string {
|
||||
mut name := name_.to_lower().trim_space()
|
||||
if name.contains('#') {
|
||||
old_name := name
|
||||
name = old_name.split('#')[0]
|
||||
}
|
||||
|
||||
// need to replace . to _ but not the last one (because is ext)
|
||||
fext := os.file_ext(name)
|
||||
extension := fext.trim('.')
|
||||
if extension != '' {
|
||||
name = name[..(name.len - extension.len - 1)]
|
||||
}
|
||||
|
||||
to_replace_ := '-;:. '
|
||||
mut to_replace := []u8{}
|
||||
for i in to_replace_ {
|
||||
to_replace << i
|
||||
}
|
||||
|
||||
mut out := []u8{}
|
||||
mut prev := u8(0)
|
||||
for u in name {
|
||||
if u == 95 { // underscore
|
||||
if prev != 95 {
|
||||
// only when previous is not _
|
||||
out << u
|
||||
}
|
||||
} else if u > 47 && u < 58 { // see https://www.charset.org/utf-8
|
||||
out << u
|
||||
} else if u > 96 && u < 123 {
|
||||
out << u
|
||||
} else if u in to_replace {
|
||||
if prev != 95 {
|
||||
out << u8(95)
|
||||
}
|
||||
} else {
|
||||
// means previous one should not be used
|
||||
continue
|
||||
}
|
||||
prev = u
|
||||
}
|
||||
name = out.bytestr()
|
||||
|
||||
// name = name.trim(' _') //DONT DO final _ is ok to keep
|
||||
if extension.len > 0 {
|
||||
name += '.${extension}'
|
||||
}
|
||||
return name
|
||||
}
|
||||
8
lib/core/texttools/namefix_test.v
Normal file
8
lib/core/texttools/namefix_test.v
Normal file
@@ -0,0 +1,8 @@
|
||||
module texttools
|
||||
|
||||
fn test_main() {
|
||||
assert name_fix_keepext('\$sds__ 4F') == 'sds_4f'
|
||||
assert name_fix_keepext('\$sds_?__ 4F') == 'sds_4f'
|
||||
assert name_fix_keepext('\$sds_?_!"`{_ 4F') == 'sds_4f'
|
||||
assert name_fix_keepext('\$sds_?_!"`{_ 4F.jpg') == 'sds_4f.jpg'
|
||||
}
|
||||
56
lib/core/texttools/namesplit.v
Normal file
56
lib/core/texttools/namesplit.v
Normal file
@@ -0,0 +1,56 @@
|
||||
module texttools
|
||||
|
||||
import os
|
||||
|
||||
// return (sitename,pagename)
|
||||
// sitename will be empty string if not specified with site:... or site__...
|
||||
pub fn name_split(name string) !(string, string) {
|
||||
mut objname := name.trim(' ')
|
||||
objname = objname.trim_left('.')
|
||||
|
||||
if name.contains('__') {
|
||||
parts := name.split('__')
|
||||
if parts.len != 2 {
|
||||
return error('filename not well formatted. Needs to have 2 parts around "__". Now ${name}.')
|
||||
}
|
||||
objname = '${parts[0].trim(' ')}:${parts[1].trim(' ')}'
|
||||
}
|
||||
|
||||
// to deal with things like "img/tf_world.jpg ':size=300x160'"
|
||||
splitted0 := objname.split(' ')
|
||||
if splitted0.len > 0 {
|
||||
objname = splitted0[0]
|
||||
}
|
||||
objname = name_fix(objname)
|
||||
mut sitename := ''
|
||||
splitted := objname.split(':')
|
||||
if splitted.len == 1 {
|
||||
objname = splitted[0]
|
||||
} else if splitted.len == 2 {
|
||||
sitename = splitted[0]
|
||||
objname = splitted[1]
|
||||
} else {
|
||||
return error("name needs to be in format 'sitename:filename' or 'filename', now '${objname}'")
|
||||
}
|
||||
objname = objname.trim_left('.')
|
||||
if objname.contains('/') {
|
||||
objname = os.base(objname)
|
||||
if objname.trim(' ') == '' {
|
||||
return error('objname empty for os.base')
|
||||
}
|
||||
}
|
||||
// make sure we don't have the e.g. img/ in
|
||||
if objname.trim('/ ') == '' {
|
||||
return error('objname empty: ${name}')
|
||||
}
|
||||
if objname.ends_with('/') {
|
||||
return error("objname cannot end with /: now '${name}'")
|
||||
}
|
||||
if objname.trim(' ') == '' {
|
||||
return error('objname empty: ${name}')
|
||||
}
|
||||
|
||||
// eprintln(" >> namesplit: '$sitename' '$objname'")
|
||||
|
||||
return sitename, objname
|
||||
}
|
||||
146
lib/core/texttools/readme.md
Normal file
146
lib/core/texttools/readme.md
Normal file
@@ -0,0 +1,146 @@
|
||||
# TextTools Module
|
||||
|
||||
The TextTools module provides a comprehensive set of utilities for text manipulation and processing in V. It includes functions for cleaning, parsing, formatting, and transforming text in various ways.
|
||||
|
||||
## Features
|
||||
|
||||
### Array Operations
|
||||
- `to_array(r string) []string` - Converts a comma or newline separated list to an array of strings
|
||||
- `to_array_int(r string) []int` - Converts a text list to an array of integers
|
||||
- `to_map(mapstring string, line string, delimiter_ string) map[string]string` - Intelligent mapping of a line to a map based on a template
|
||||
|
||||
### Text Cleaning
|
||||
- `name_clean(r string) string` - Normalizes names by removing special characters
|
||||
- `ascii_clean(r string) string` - Removes all non-ASCII characters
|
||||
- `remove_empty_lines(text string) string` - Removes empty lines from text
|
||||
- `remove_double_lines(text string) string` - Removes consecutive empty lines
|
||||
- `remove_empty_js_blocks(text string) string` - Removes empty code blocks (```...```)
|
||||
|
||||
### Command Line Parsing
|
||||
- `cmd_line_args_parser(text string) ![]string` - Parses command line arguments with support for quotes and escaping
|
||||
- `text_remove_quotes(text string) string` - Removes quoted sections from text
|
||||
- `check_exists_outside_quotes(text string, items []string) bool` - Checks if items exist in text outside of quotes
|
||||
|
||||
### Text Expansion
|
||||
- `expand(txt_ string, l int, expand_with string) string` - Expands text to a specified length with a given character
|
||||
|
||||
### Indentation
|
||||
- `indent(text string, prefix string) string` - Adds indentation prefix to each line
|
||||
- `dedent(text string) string` - Removes common leading whitespace from every line
|
||||
|
||||
### String Validation
|
||||
- `is_int(text string) bool` - Checks if text contains only digits
|
||||
- `is_upper_text(text string) bool` - Checks if text contains only uppercase letters
|
||||
|
||||
### Multiline Processing
|
||||
- `multiline_to_single(text string) !string` - Converts multiline text to a single line with proper escaping
|
||||
- Handles comments, code blocks, and preserves formatting
|
||||
|
||||
### Name/Path Processing
|
||||
- `name_fix(name string) string` - Normalizes filenames and paths
|
||||
- `name_fix_keepspace(name string) !string` - Like name_fix but preserves spaces
|
||||
- `name_fix_no_ext(name_ string) string` - Removes file extension
|
||||
- `name_fix_snake_to_pascal(name string) string` - Converts snake_case to PascalCase
|
||||
- `name_fix_pascal_to_snake(name string) string` - Converts PascalCase to snake_case
|
||||
- `name_split(name string) !(string, string)` - Splits name into site and page components
|
||||
|
||||
### Text Splitting
|
||||
- `split_smart(t string, delimiter_ string) []string` - Intelligent string splitting that respects quotes
|
||||
|
||||
### Tokenization
|
||||
- `tokenize(text_ string) TokenizerResult` - Tokenizes text into meaningful parts
|
||||
- `text_token_replace(text string, tofind string, replacewith string) !string` - Replaces tokens in text
|
||||
|
||||
### Version Parsing
|
||||
- `version(text_ string) int` - Converts version strings to comparable integers
|
||||
- Example: "v0.4.36" becomes 4036
|
||||
- Example: "v1.4.36" becomes 1004036
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Array Operations
|
||||
```v
|
||||
// Convert comma-separated list to array
|
||||
text := "item1,item2,item3"
|
||||
array := texttools.to_array(text)
|
||||
// Result: ['item1', 'item2', 'item3']
|
||||
|
||||
// Smart mapping
|
||||
r := texttools.to_map("name,-,-,-,-,pid,-,-,-,-,path",
|
||||
"root 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 /usr/sbin/distnoted")
|
||||
// Result: {'name': 'root', 'pid': '1360', 'path': '/usr/sbin/distnoted'}
|
||||
```
|
||||
|
||||
### Text Cleaning
|
||||
```v
|
||||
// Clean name
|
||||
name := texttools.name_clean("Hello@World!")
|
||||
// Result: "HelloWorld"
|
||||
|
||||
// Remove empty lines
|
||||
text := texttools.remove_empty_lines("line1\n\nline2\n\n\nline3")
|
||||
// Result: "line1\nline2\nline3"
|
||||
```
|
||||
|
||||
### Command Line Parsing
|
||||
```v
|
||||
// Parse command line with quotes
|
||||
args := texttools.cmd_line_args_parser("'arg with spaces' --flag=value")
|
||||
// Result: ['arg with spaces', '--flag=value']
|
||||
```
|
||||
|
||||
### Indentation
|
||||
```v
|
||||
// Add indentation
|
||||
text := texttools.indent("line1\nline2", " ")
|
||||
// Result: " line1\n line2\n"
|
||||
|
||||
// Remove common indentation
|
||||
text := texttools.dedent(" line1\n line2")
|
||||
// Result: "line1\nline2"
|
||||
```
|
||||
|
||||
### Name Processing
|
||||
```v
|
||||
// Convert to snake case
|
||||
name := texttools.name_fix_pascal_to_snake("HelloWorld")
|
||||
// Result: "hello_world"
|
||||
|
||||
// Convert to pascal case
|
||||
name := texttools.name_fix_snake_to_pascal("hello_world")
|
||||
// Result: "HelloWorld"
|
||||
```
|
||||
|
||||
### Version Parsing
|
||||
```v
|
||||
// Parse version string
|
||||
ver := texttools.version("v0.4.36")
|
||||
// Result: 4036
|
||||
|
||||
ver := texttools.version("v1.4.36")
|
||||
// Result: 1004036
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
Many functions in the module return a Result type (indicated by `!` in the function signature). These functions can return errors that should be handled appropriately:
|
||||
|
||||
```v
|
||||
// Example of error handling
|
||||
name := texttools.name_fix_keepspace("some@name") or {
|
||||
println("Error: ${err}")
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. Always use appropriate error handling for functions that return Results
|
||||
2. Consider using `dedent()` before processing multiline text to ensure consistent formatting
|
||||
3. When working with filenames, use the appropriate name_fix variant based on your needs
|
||||
4. For command line parsing, be aware of quote handling and escaping rules
|
||||
5. When using tokenization, consider the context and whether smart splitting is needed
|
||||
|
||||
## Contributing
|
||||
|
||||
The TextTools module is part of the heroLib project. Contributions are welcome through pull requests.
|
||||
46
lib/core/texttools/regext/readme.md
Normal file
46
lib/core/texttools/regext/readme.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# regex
|
||||
|
||||
## basic regex utilities
|
||||
|
||||
- .
|
||||
|
||||
## regex replacer
|
||||
|
||||
Tool to flexibly replace elements in file(s) or text.
|
||||
|
||||
next example does it for
|
||||
|
||||
```golang
|
||||
import freeflowuniverse.herolib.core.texttools.regext
|
||||
text := '
|
||||
|
||||
this is test_1 SomeTest
|
||||
this is Test 1 SomeTest
|
||||
|
||||
need to replace TF to ThreeFold
|
||||
need to replace ThreeFold0 to ThreeFold
|
||||
need to replace ThreeFold1 to ThreeFold
|
||||
|
||||
'
|
||||
|
||||
text_out := '
|
||||
|
||||
this is TTT SomeTest
|
||||
this is TTT SomeTest
|
||||
|
||||
need to replace ThreeFold to ThreeFold
|
||||
need to replace ThreeFold to ThreeFold
|
||||
need to replace ThreeFold to ThreeFold
|
||||
|
||||
'
|
||||
|
||||
mut ri := regext.regex_instructions_new()
|
||||
ri.add(['TF:ThreeFold0:ThreeFold1:ThreeFold']) or { panic(err) }
|
||||
ri.add_item('test_1', 'TTT') or { panic(err) }
|
||||
ri.add_item('^Stest 1', 'TTT') or { panic(err) } //will be case insensitive search
|
||||
|
||||
mut text_out2 := ri.replace(text: text, dedent: true) or { panic(err) }
|
||||
|
||||
```
|
||||
|
||||
|
||||
41
lib/core/texttools/regext/regexgroups.v
Normal file
41
lib/core/texttools/regext/regexgroups.v
Normal file
@@ -0,0 +1,41 @@
|
||||
module regext
|
||||
|
||||
import regex
|
||||
|
||||
// find parts of text which are in form {NAME}
|
||||
// .
|
||||
// NAME is as follows: .
|
||||
// Lowercase letters: a-z .
|
||||
// Digits: 0-9 .
|
||||
// Underscore: _ .
|
||||
// .
|
||||
// will return list of the found NAME's
|
||||
pub fn find_simple_vars(txt string) []string {
|
||||
pattern := r'\{(\w+)\}'
|
||||
mut re := regex.regex_opt(pattern) or { panic(err) }
|
||||
|
||||
mut words := re.find_all_str(txt)
|
||||
|
||||
words = words.map(it.trim('{} '))
|
||||
return words
|
||||
}
|
||||
|
||||
fn remove_sid(c string) string {
|
||||
if c.starts_with('sid:') {
|
||||
return c[4..].trim_space()
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// find parts of text in form sid:abc till sid:abcde (can be a...z 0...9) .
|
||||
// return list of the found elements .
|
||||
// to make all e.g. lowercase do e.g. words = words.map(it.to_lower()) after it
|
||||
pub fn find_sid(txt string) []string {
|
||||
pattern := r'sid:[a-zA-Z0-9]{3,5}[\s$]'
|
||||
mut re := regex.regex_opt(pattern) or { panic(err) }
|
||||
|
||||
mut words := re.find_all_str(txt)
|
||||
// words = words.map(it.to_lower())
|
||||
words = words.map(remove_sid(it))
|
||||
return words
|
||||
}
|
||||
47
lib/core/texttools/regext/regexgroups_test.v
Normal file
47
lib/core/texttools/regext/regexgroups_test.v
Normal file
@@ -0,0 +1,47 @@
|
||||
module regext
|
||||
|
||||
fn test_stdtext() {
|
||||
// this is test without much fancyness, just rext replace, no regex, all case sensitive
|
||||
|
||||
text := '
|
||||
|
||||
!!action.something sid:aa733
|
||||
|
||||
sid:aa733
|
||||
|
||||
...sid:aa733 ss
|
||||
|
||||
...sid:rrrrrr ss
|
||||
sid:997
|
||||
|
||||
sid:s d
|
||||
sid:s_d
|
||||
|
||||
'
|
||||
|
||||
r := find_sid(text)
|
||||
|
||||
assert r == ['aa733', 'aa733', 'aa733', '997']
|
||||
}
|
||||
|
||||
fn test_find_simple_vars() {
|
||||
text := '
|
||||
|
||||
!!action.something {sid}
|
||||
|
||||
sid:aa733
|
||||
|
||||
{a}
|
||||
|
||||
...sid:rrrrrr ss {a_sdsdsdsd_e__f_g}
|
||||
sid:997
|
||||
|
||||
sid:s d
|
||||
sid:s_d
|
||||
|
||||
'
|
||||
|
||||
r := find_simple_vars(text)
|
||||
|
||||
assert r == ['sid', 'a', 'a_sdsdsdsd_e__f_g']
|
||||
}
|
||||
272
lib/core/texttools/regext/regexreplacer.v
Normal file
272
lib/core/texttools/regext/regexreplacer.v
Normal file
@@ -0,0 +1,272 @@
|
||||
module regext
|
||||
|
||||
import freeflowuniverse.herolib.core.texttools
|
||||
import regex
|
||||
import freeflowuniverse.herolib.ui.console
|
||||
import os
|
||||
|
||||
pub struct ReplaceInstructions {
|
||||
pub mut:
|
||||
instructions []ReplaceInstruction
|
||||
}
|
||||
|
||||
pub struct ReplaceInstruction {
|
||||
pub:
|
||||
regex_str string
|
||||
find_str string
|
||||
replace_with string
|
||||
pub mut:
|
||||
regex regex.RE
|
||||
}
|
||||
|
||||
fn (mut self ReplaceInstructions) get_regex_queries() []string {
|
||||
mut res := []string{}
|
||||
for i in self.instructions {
|
||||
res << i.regex.get_query()
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// rewrite a filter string to a regex .
|
||||
// each char will be checked for in lower case as well as upper case (will match both) .
|
||||
// will only look at ascii .
|
||||
//'_- ' will be replaced to match one or more spaces .
|
||||
// the returned result is a regex string
|
||||
pub fn regex_rewrite(r string) !string {
|
||||
r2 := r.to_lower()
|
||||
mut res := []string{}
|
||||
for ch in r2 {
|
||||
mut c := ch.ascii_str()
|
||||
if 'abcdefghijklmnopqrstuvwxyz'.contains(c) {
|
||||
char_upper := c.to_upper()
|
||||
res << '[' + c + char_upper + ']'
|
||||
} else if '0123456789'.contains(c) {
|
||||
res << c
|
||||
} else if '_- '.contains(c) {
|
||||
// res << r"\[\\s _\\-\]*"
|
||||
res << r' *'
|
||||
} else if '\'"'.contains(c) {
|
||||
continue
|
||||
} else if '^&![]'.contains(c) {
|
||||
return error('cannot rewrite regex: ${r}, found illegal char ^&![]')
|
||||
}
|
||||
}
|
||||
return res.join('')
|
||||
//+r"[\\n \:\!\.\?;,\\(\\)\\[\\]]"
|
||||
}
|
||||
|
||||
// regex string see https://github.com/vlang/v/blob/master/vlib/regex/README.md .
|
||||
// find_str is a normal search (text) .
|
||||
// replace is the string we want to replace the match with
|
||||
fn (mut self ReplaceInstructions) add_item(regex_find_str string, replace_with string) ! {
|
||||
mut item := regex_find_str
|
||||
if item.starts_with('^R') {
|
||||
item = item[2..] // remove ^r
|
||||
r := regex.regex_opt(item) or { panic('regex_opt failed') }
|
||||
self.instructions << ReplaceInstruction{
|
||||
regex_str: item
|
||||
regex: r
|
||||
replace_with: replace_with
|
||||
}
|
||||
} else if item.starts_with('^S') {
|
||||
item = item[2..] // remove ^S
|
||||
item2 := regex_rewrite(item)!
|
||||
r := regex.regex_opt(item2) or { panic('regex_opt failed') }
|
||||
self.instructions << ReplaceInstruction{
|
||||
regex_str: item
|
||||
regex: r
|
||||
replace_with: replace_with
|
||||
}
|
||||
} else {
|
||||
self.instructions << ReplaceInstruction{
|
||||
replace_with: replace_with
|
||||
find_str: item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// each element of the list can have more search statements .
|
||||
// a search statement can have 3 forms.
|
||||
// - regex start with ^R see https://github.com/vlang/v/blob/master/vlib/regex/README.md .
|
||||
// - case insensitive string find start with ^S (will internally convert to regex).
|
||||
// - just a string, this is a literal find (case sensitive) .
|
||||
// input is ["^Rregex:replacewith",...] .
|
||||
// input is ["^Rregex:^Rregex2:replacewith"] .
|
||||
// input is ["findstr:findstr:replacewith"] .
|
||||
// input is ["findstr:^Rregex2:replacewith"] .
|
||||
pub fn (mut ri ReplaceInstructions) add(replacelist []string) ! {
|
||||
for i in replacelist {
|
||||
splitted := i.split(':')
|
||||
replace_with := splitted[splitted.len - 1]
|
||||
// last one not to be used
|
||||
if splitted.len < 2 {
|
||||
return error("Cannot add ${i} because needs to have 2 parts, wrong syntax, to regex instructions:\n\"${replacelist}\"")
|
||||
}
|
||||
for item in splitted[0..(splitted.len - 1)] {
|
||||
ri.add_item(item, replace_with)!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// a text input file where each line has one of the following
|
||||
// - regex start with ^R see https://github.com/vlang/v/blob/master/vlib/regex/README.md .
|
||||
// - case insensitive string find start with ^S (will internally convert to regex).
|
||||
// - just a string, this is a literal find (case sensitive) .
|
||||
// example input
|
||||
// '''
|
||||
// ^Rregex:replacewith
|
||||
// ^Rregex:^Rregex2:replacewith
|
||||
// ^Sfindstr:replacewith
|
||||
// findstr:findstr:replacewith
|
||||
// findstr:^Rregex2:replacewith
|
||||
// ^Sfindstr:^Sfindstr2::^Rregex2:replacewith
|
||||
// ''''
|
||||
pub fn (mut ri ReplaceInstructions) add_from_text(txt string) ! {
|
||||
mut replacelist := []string{}
|
||||
for line in txt.split_into_lines() {
|
||||
if line.trim_space() == '' {
|
||||
continue
|
||||
}
|
||||
if line.contains(':') {
|
||||
replacelist << line
|
||||
}
|
||||
}
|
||||
ri.add(replacelist)!
|
||||
}
|
||||
|
||||
@[params]
|
||||
pub struct ReplaceArgs {
|
||||
pub mut:
|
||||
text string
|
||||
dedent bool
|
||||
}
|
||||
|
||||
// this is the actual function which will take text as input and return the replaced result
|
||||
// does the matching line per line .
|
||||
// will use dedent function, on text
|
||||
pub fn (mut self ReplaceInstructions) replace(args ReplaceArgs) !string {
|
||||
mut gi := 0
|
||||
mut text2 := args.text
|
||||
if args.dedent {
|
||||
text2 = texttools.dedent(text2)
|
||||
}
|
||||
mut line2 := ''
|
||||
mut res := []string{}
|
||||
|
||||
if text2.len == 0 {
|
||||
return ''
|
||||
}
|
||||
// check if there is \n at end of text, because of splitlines would be lost
|
||||
mut endline := false
|
||||
if text2.ends_with('\n') {
|
||||
endline = true
|
||||
}
|
||||
for line in text2.split_into_lines() {
|
||||
line2 = line
|
||||
|
||||
// mut tl := tokenize(line)
|
||||
|
||||
for mut i in self.instructions {
|
||||
if i.find_str == '' {
|
||||
all := i.regex.find_all(line)
|
||||
for gi < all.len {
|
||||
gi += 2
|
||||
}
|
||||
line2 = i.regex.replace(line2, i.replace_with)
|
||||
} else {
|
||||
// line2 = line2.replace(i.find_str, i.replace_with)
|
||||
// line2 = tl.replace(line2, i.find_str, i.replace_with) ?
|
||||
|
||||
line2 = line2.replace(i.find_str, i.replace_with)
|
||||
}
|
||||
}
|
||||
res << line2
|
||||
}
|
||||
|
||||
mut x := res.join('\n')
|
||||
if !endline {
|
||||
x = x.trim_right('\n')
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
@[params]
|
||||
pub struct ReplaceDirArgs {
|
||||
pub mut:
|
||||
path string
|
||||
extensions []string
|
||||
dryrun bool
|
||||
}
|
||||
|
||||
// if dryrun is true then will not replace but just show
|
||||
pub fn (mut self ReplaceInstructions) replace_in_dir(args ReplaceDirArgs) !int {
|
||||
mut count := 0
|
||||
// create list of unique extensions all lowercase
|
||||
mut extensions := []string{}
|
||||
for ext in args.extensions {
|
||||
if ext !in extensions {
|
||||
mut ext2 := ext.to_lower()
|
||||
if ext2.starts_with('.') {
|
||||
ext2 = ext2[1..]
|
||||
}
|
||||
extensions << ext2
|
||||
}
|
||||
}
|
||||
|
||||
mut done := []string{}
|
||||
count += self.replace_in_dir_recursive(args.path, extensions, args.dryrun, mut done)!
|
||||
return count
|
||||
}
|
||||
|
||||
// returns how many files changed
|
||||
fn (mut self ReplaceInstructions) replace_in_dir_recursive(path1 string, extensions []string, dryrun bool, mut done []string) !int {
|
||||
items := os.ls(path1) or {
|
||||
return error('cannot load folder for replace because cannot find ${path1}')
|
||||
}
|
||||
mut pathnew := ''
|
||||
mut count := 0
|
||||
|
||||
for item in items {
|
||||
pathnew = os.join_path(path1, item)
|
||||
// CAN DO THIS LATER IF NEEDED
|
||||
// if pathnew in done{
|
||||
// continue
|
||||
// }
|
||||
// done << pathnew
|
||||
if os.is_dir(pathnew) {
|
||||
if item.starts_with('.') {
|
||||
continue
|
||||
}
|
||||
if item.starts_with('_') {
|
||||
continue
|
||||
}
|
||||
|
||||
self.replace_in_dir_recursive(pathnew, extensions, dryrun, mut done)!
|
||||
} else {
|
||||
ext := os.file_ext(pathnew)[1..].to_lower()
|
||||
if extensions == [] || ext in extensions {
|
||||
// means we match a file
|
||||
|
||||
txtold := os.read_file(pathnew)!
|
||||
txtnew := self.replace(text: txtold, dedent: false)!
|
||||
if txtnew.trim(' \n') == txtold.trim(' \n') {
|
||||
// panic("need to move this file to other lib can't use print_header")
|
||||
console.print_header(' nothing to do : ${pathnew}')
|
||||
} else {
|
||||
// panic("need to move this file to other lib can't use print_header")
|
||||
console.print_header(' replace done : ${pathnew}')
|
||||
count++
|
||||
if !dryrun {
|
||||
// now write the file back
|
||||
os.write_file(pathnew, txtnew)!
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
pub fn regex_instructions_new() ReplaceInstructions {
|
||||
return ReplaceInstructions{}
|
||||
}
|
||||
115
lib/core/texttools/regext/regexreplacer_test.v
Normal file
115
lib/core/texttools/regext/regexreplacer_test.v
Normal file
@@ -0,0 +1,115 @@
|
||||
module regext
|
||||
|
||||
import os
|
||||
import freeflowuniverse.herolib.core.texttools { dedent }
|
||||
|
||||
fn test_stdtext() {
|
||||
// this is test without much fancyness, just rext replace, no regex, all case sensitive
|
||||
|
||||
text := '
|
||||
|
||||
this is test_1 SomeTest
|
||||
this is test 1 SomeTest
|
||||
|
||||
need to replace TF to ThreeFold
|
||||
need to replace ThreeFold0 to ThreeFold
|
||||
need to replace ThreeFold1 to ThreeFold
|
||||
|
||||
'
|
||||
|
||||
text_out := '
|
||||
|
||||
this is TTT SomeTest
|
||||
this is TTT SomeTest
|
||||
|
||||
need to replace ThreeFold to ThreeFold
|
||||
need to replace ThreeFold to ThreeFold
|
||||
need to replace ThreeFold to ThreeFold
|
||||
|
||||
'
|
||||
|
||||
mut ri := regex_instructions_new()
|
||||
ri.add(['TF:ThreeFold0:ThreeFold1:ThreeFold']) or { panic(err) }
|
||||
ri.add_item('test_1', 'TTT') or { panic(err) }
|
||||
ri.add_item('test 1', 'TTT') or { panic(err) }
|
||||
|
||||
mut text_out2 := ri.replace(text: text, dedent: true) or { panic(err) }
|
||||
|
||||
assert dedent(text_out2).trim('\n') == dedent(text_out).trim('\n')
|
||||
}
|
||||
|
||||
fn test_dirreplace() {
|
||||
// this is test without much fancyness, just rext replace, no regex, all case sensitive
|
||||
|
||||
// get path where to look for text
|
||||
mut p := @FILE.split('/')
|
||||
p = p[0..p.len - 1].clone()
|
||||
mut path := os.real_path(os.join_path(p.join('/'), 'testdata'))
|
||||
|
||||
mut ri := regex_instructions_new()
|
||||
|
||||
ri.add(['key_bob:KEY_BOB', 'key_alice:KEY_ALICE']) or { panic(err) }
|
||||
|
||||
count := ri.replace_in_dir(path: path, extensions: ['v'], dryrun: true) or { panic(err) }
|
||||
|
||||
assert count == 2
|
||||
}
|
||||
|
||||
// fn test_regex1() {
|
||||
// text := '
|
||||
|
||||
// this is test_1 SomeTest
|
||||
// this is test 1 SomeTest
|
||||
|
||||
// need to replace TF to ThreeFold
|
||||
// need to replace ThreeFold0 to ThreeFold
|
||||
// need to replace ThreeFold1 to ThreeFold
|
||||
|
||||
// '
|
||||
|
||||
// text_out := '
|
||||
|
||||
// this is TTT SomeTest
|
||||
// this is TTT SomeTest
|
||||
|
||||
// need to replace ThreeFold to ThreeFold
|
||||
// need to replace ThreeFold to ThreeFold
|
||||
// need to replace ThreeFold to ThreeFold
|
||||
|
||||
// '
|
||||
|
||||
// mut ri := regex_instructions_new(['tf:threefold0:^R ThreeFold1:ThreeFold']) or {
|
||||
// panic(err)
|
||||
// }
|
||||
// ri.add('^Rtest[ _]1', 'TTT') or { panic(err) }
|
||||
// mut text_out2 := ri.replace(text) or { panic(err) }
|
||||
|
||||
// assert dedent(text_out2).trim('\n') == dedent(text_out).trim('\n')
|
||||
// // panic('s')
|
||||
// }
|
||||
|
||||
// fn test_regex2() {
|
||||
// text := '
|
||||
|
||||
// this is test_1 SomeTest
|
||||
// this is test 1 SomeTest
|
||||
|
||||
// need to replace ThreeFold 0 to ThreeFold
|
||||
// need to replace ThreeFold0 to ThreeFold
|
||||
// no need to replace ThreeFold1; to ThreeFold
|
||||
|
||||
// '
|
||||
|
||||
// text_out := '
|
||||
|
||||
// '
|
||||
|
||||
// mut ri := regex_instructions_new(['^Sthreefold 0:bluelagoon']) or {
|
||||
// panic(err)
|
||||
// }
|
||||
|
||||
// mut text_out2 := ri.replace(text) or { panic(err) }
|
||||
|
||||
// assert dedent(text_out2).trim('\n') == dedent(text_out).trim('\n')
|
||||
// // panic('s')
|
||||
// }
|
||||
3
lib/core/texttools/regext/testdata/testfile1.v
vendored
Normal file
3
lib/core/texttools/regext/testdata/testfile1.v
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
fn testfunction1() {
|
||||
key_bob = 'bobs key'
|
||||
}
|
||||
3
lib/core/texttools/regext/testdata/testfile2.v
vendored
Normal file
3
lib/core/texttools/regext/testdata/testfile2.v
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
fn testfunction2() {
|
||||
key_alice := 'mock key for regex_test'
|
||||
}
|
||||
51
lib/core/texttools/split.v
Normal file
51
lib/core/texttools/split.v
Normal file
@@ -0,0 +1,51 @@
|
||||
module texttools
|
||||
|
||||
enum SplitState {
|
||||
start
|
||||
string
|
||||
}
|
||||
|
||||
// split strings in intelligent ways, taking into consideration '"`
|
||||
// ```
|
||||
// r0:=texttools.split_smart("'root' 304 0.0 0.0 408185328 1360 ?? S 16Dec23 0:34.06 /usr/sbin/distnoted\n \n")
|
||||
// assert ['root', '304', '0.0', '0.0', '408185328', '1360', '??', 'S', '16Dec23', '0:34.06', '/usr/sbin/distnoted']==r0
|
||||
// ```
|
||||
pub fn split_smart(t string, delimiter_ string) []string {
|
||||
mut st := SplitState.start
|
||||
mut last := []string{}
|
||||
mut result := []string{}
|
||||
mut delimiter := delimiter_
|
||||
if delimiter.len == 0 {
|
||||
delimiter = ',| '
|
||||
}
|
||||
for c in t.trim_space().split('') {
|
||||
if st == .start && '`\'"'.contains(c) {
|
||||
// means we are at start if quoted string
|
||||
st = .string
|
||||
continue
|
||||
}
|
||||
if st == .string && '`\'"'.contains(c) {
|
||||
// means we are at end of quoted string
|
||||
st = .start
|
||||
result << last.join('').trim_space()
|
||||
last = []string{}
|
||||
continue
|
||||
}
|
||||
if st == .string {
|
||||
last << c
|
||||
continue
|
||||
}
|
||||
if delimiter.contains(c) {
|
||||
if last.len > 0 {
|
||||
result << last.join('').trim_space()
|
||||
}
|
||||
last = []string{}
|
||||
continue
|
||||
}
|
||||
last << c
|
||||
}
|
||||
if last.len > 0 {
|
||||
result << last.join('').trim_space()
|
||||
}
|
||||
return result
|
||||
}
|
||||
13
lib/core/texttools/template.v
Normal file
13
lib/core/texttools/template.v
Normal file
@@ -0,0 +1,13 @@
|
||||
module texttools
|
||||
|
||||
// replace '^^', '@' .
|
||||
// replace '??', '$' .
|
||||
// replace '\t', ' ' .
|
||||
pub fn template_replace(template_ string) string {
|
||||
mut template := template_
|
||||
template = template.replace('^^', '@')
|
||||
template = template.replace('???', '$(')
|
||||
template = template.replace('??', '$')
|
||||
template = template.replace('\t', ' ')
|
||||
return template
|
||||
}
|
||||
182
lib/core/texttools/tokens.v
Normal file
182
lib/core/texttools/tokens.v
Normal file
@@ -0,0 +1,182 @@
|
||||
module texttools
|
||||
|
||||
// import regex
|
||||
|
||||
pub struct TokenizerResult {
|
||||
pub mut:
|
||||
items []TokenizerItem
|
||||
}
|
||||
|
||||
pub struct TokenizerItem {
|
||||
pub mut:
|
||||
toreplace string
|
||||
// is the most fixed string
|
||||
matchstring string
|
||||
}
|
||||
|
||||
pub fn text_token_replace(text string, tofind string, replacewith string) !string {
|
||||
mut tr := tokenize(text)
|
||||
text2 := tr.replace(text, tofind, replacewith)!
|
||||
return text2
|
||||
}
|
||||
|
||||
pub fn (mut tr TokenizerResult) replace(text string, tofind string, replacewith string) !string {
|
||||
tofind2 := name_fix_no_underscore_token(tofind)
|
||||
mut text2 := text
|
||||
for item in tr.items {
|
||||
if item.matchstring == tofind2 {
|
||||
// text2 = text2.replace(item.toreplace, replacewith)
|
||||
new_text := text2.replace(item.toreplace, replacewith)
|
||||
text2 = new_text
|
||||
|
||||
///WAS TO GET FULL WORDS TO WORK, IS NOT WORKING !!!!
|
||||
// if item.matchstring == tofind2 {
|
||||
// mut new_text := ''
|
||||
// mut words := text2.split(' ')
|
||||
// for word in words {
|
||||
// if word.to_lower() == item.toreplace.to_lower(){
|
||||
// new_text += word.replace(item.toreplace, replacewith)
|
||||
// }else {
|
||||
// new_text += word
|
||||
// }
|
||||
|
||||
// new_text += ' '
|
||||
// }
|
||||
// text2 = new_text.trim(' ')
|
||||
}
|
||||
// } else {
|
||||
|
||||
// }
|
||||
}
|
||||
return text2
|
||||
}
|
||||
|
||||
pub fn name_fix_no_underscore_token(name string) string {
|
||||
item := name_fix_token(name)
|
||||
newitem := item.replace('_', '')
|
||||
return newitem
|
||||
}
|
||||
|
||||
// needs to be 2x because can be 3 to 2 to 1
|
||||
const name_fix_replaces = [
|
||||
' ',
|
||||
'_',
|
||||
'-',
|
||||
'_',
|
||||
'__',
|
||||
'_',
|
||||
'__',
|
||||
'_',
|
||||
'::',
|
||||
'_',
|
||||
';',
|
||||
'_',
|
||||
':',
|
||||
'_',
|
||||
'.',
|
||||
'_',
|
||||
]
|
||||
|
||||
pub fn name_fix_token(name string) string {
|
||||
item := name.to_lower()
|
||||
item_replaced := item.replace_each(name_fix_replaces)
|
||||
newitem := item_replaced.trim(' ._')
|
||||
return newitem
|
||||
}
|
||||
|
||||
fn word_skip(text string) bool {
|
||||
lower_text := text.to_lower()
|
||||
if lower_text in ['the', 'some', 'and', 'plus', 'will', 'do', 'are', 'these'] {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
pub fn tokenize(text_ string) TokenizerResult {
|
||||
text := dedent(text_)
|
||||
|
||||
mut skip := false
|
||||
mut skipline := false
|
||||
mut prev := ''
|
||||
mut word := ''
|
||||
mut islink := false
|
||||
mut tr := TokenizerResult{}
|
||||
mut done := []string{}
|
||||
lines := text.split('\n')
|
||||
//
|
||||
for original_line in lines {
|
||||
line := original_line.trim(' ')
|
||||
|
||||
if line.starts_with('!') {
|
||||
continue
|
||||
}
|
||||
|
||||
if line.starts_with('http') {
|
||||
continue
|
||||
}
|
||||
if line.contains("'''") || line.contains('```') || line.contains('"""') {
|
||||
skipline = !skipline
|
||||
}
|
||||
if skipline {
|
||||
continue
|
||||
}
|
||||
prev = ''
|
||||
word = ''
|
||||
skip = false
|
||||
splitted_line := line.split('')
|
||||
for ch in splitted_line {
|
||||
if '[({'.contains(ch) {
|
||||
skip = true
|
||||
continue
|
||||
}
|
||||
if skip {
|
||||
if ')]}'.contains(ch) {
|
||||
skip = false
|
||||
prev = ''
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if islink {
|
||||
if ch == ' ' {
|
||||
islink = false
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if 'abcdefghijklmnopqrstuvwxyz0123456789_-'.contains(ch.to_lower()) {
|
||||
if word.len > 0 || prev == '' || '\t\n ,:;.?!#|'.contains(prev) {
|
||||
word += ch
|
||||
}
|
||||
if word.starts_with('http') {
|
||||
islink = true
|
||||
}
|
||||
} else if '\t\n ,:;.?!#|'.contains(ch) {
|
||||
// only when end is newline tab or whitespace or ...
|
||||
if word.len > 1 && !word_skip(word) && word !in done {
|
||||
word_with_no_underscores := name_fix_no_underscore_token(word)
|
||||
tr.items << TokenizerItem{
|
||||
toreplace: word
|
||||
matchstring: word_with_no_underscores.clone()
|
||||
}
|
||||
done << word
|
||||
}
|
||||
word = ''
|
||||
prev = ''
|
||||
continue
|
||||
} else {
|
||||
word = ''
|
||||
}
|
||||
prev = ch
|
||||
}
|
||||
}
|
||||
if word.len > 1 && !word_skip(word) && word !in done {
|
||||
word_with_no_underscores := name_fix_no_underscore_token(word)
|
||||
tr.items << TokenizerItem{
|
||||
toreplace: word
|
||||
matchstring: word_with_no_underscores.clone()
|
||||
}
|
||||
done << word
|
||||
}
|
||||
}
|
||||
return tr
|
||||
}
|
||||
111
lib/core/texttools/tokens_test.v
Normal file
111
lib/core/texttools/tokens_test.v
Normal file
@@ -0,0 +1,111 @@
|
||||
module texttools
|
||||
|
||||
fn test_tokens() {
|
||||
mut text := '
|
||||
these; Are Some ramdom words!
|
||||
blue lagoon
|
||||
Blue lagoon
|
||||
blue_lagoon
|
||||
blue_Lagoon
|
||||
lagoon
|
||||
blueLagoon
|
||||
&redlagoon
|
||||
|
||||
'
|
||||
r := tokenize(text)
|
||||
|
||||
r2 := TokenizerResult{
|
||||
items: [TokenizerItem{
|
||||
toreplace: 'ramdom'
|
||||
matchstring: 'ramdom'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'words'
|
||||
matchstring: 'words'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'blue'
|
||||
matchstring: 'blue'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'lagoon'
|
||||
matchstring: 'lagoon'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'Blue'
|
||||
matchstring: 'blue'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'blue_lagoon'
|
||||
matchstring: 'bluelagoon'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'blue_Lagoon'
|
||||
matchstring: 'bluelagoon'
|
||||
}, TokenizerItem{
|
||||
toreplace: 'blueLagoon'
|
||||
matchstring: 'bluelagoon'
|
||||
}]
|
||||
}
|
||||
|
||||
assert r == r2
|
||||
}
|
||||
|
||||
// fn test_tokens2() {
|
||||
// mut text := '
|
||||
// these; Are Some ramdom words!
|
||||
// blue lagoon
|
||||
// Blue lagoon
|
||||
// red_dragon
|
||||
// reddragon
|
||||
// blue_lagoon
|
||||
// blue_Lagoon
|
||||
// lagoon
|
||||
// ;bluelagoon
|
||||
|
||||
// '
|
||||
|
||||
// mut ri := regex_instructions_new()
|
||||
// ri.add(['bluelagoon:red_dragon:ThreeFold']) or { panic(err) }
|
||||
|
||||
// mut text_out2 := ri.replace(text:text) or { panic(err) }
|
||||
|
||||
// compare := '
|
||||
// these; Are Some ramdom words!
|
||||
// blue lagoon
|
||||
// Blue lagoon
|
||||
// ThreeFold
|
||||
// ThreeFold
|
||||
// ThreeFold
|
||||
// ThreeFold
|
||||
// lagoon
|
||||
// ;ThreeFold
|
||||
|
||||
// '
|
||||
|
||||
// a := dedent(text_out2).trim(' \n')
|
||||
// b := dedent(compare).trim(' \n')
|
||||
|
||||
// assert a == b
|
||||
// }
|
||||
|
||||
fn test_tokens3() {
|
||||
mut text := r'
|
||||
- [Definitions](tftech:definitions)
|
||||
(koekoe)
|
||||
(great )
|
||||
{great }
|
||||
- [Disclaimer](disclaimer)
|
||||
- [farmer_terms_conditions](terms_conditions_farmer)
|
||||
- [terms_conditions_websites](terms_conditions_websites) test
|
||||
- [terms_conditions_griduser](terms_conditions_griduser)
|
||||
- [privacypolicy](privacypolicy)
|
||||
|
||||
http://localhost:9998/threefold/#/farming_certification
|
||||
https://greencloud
|
||||
|
||||
'
|
||||
|
||||
r := tokenize(text)
|
||||
|
||||
assert r == TokenizerResult{
|
||||
items: [TokenizerItem{
|
||||
toreplace: 'test'
|
||||
matchstring: 'test'
|
||||
}]
|
||||
}
|
||||
}
|
||||
21
lib/core/texttools/version.v
Normal file
21
lib/core/texttools/version.v
Normal file
@@ -0,0 +1,21 @@
|
||||
module texttools
|
||||
|
||||
import math
|
||||
|
||||
// v0.4.36 becomes 4036 .
|
||||
// v1.4.36 becomes 1004036
|
||||
|
||||
pub fn version(text_ string) int {
|
||||
text := text_.to_lower().replace('v', '')
|
||||
splitted := text.split('.').filter(it.trim_space() != '').reverse().map(it.trim_space().int())
|
||||
mut nr := 0
|
||||
mut level := 0
|
||||
|
||||
for item in splitted {
|
||||
mut power := math.powi(1000, level)
|
||||
|
||||
nr += item * int(power)
|
||||
level += 1
|
||||
}
|
||||
return nr
|
||||
}
|
||||
15
lib/core/texttools/version_test.v
Normal file
15
lib/core/texttools/version_test.v
Normal file
@@ -0,0 +1,15 @@
|
||||
module texttools
|
||||
|
||||
fn test_version() {
|
||||
assert version(' v0. 0.36 ') == 36
|
||||
assert version(' v0.36 ') == 36
|
||||
assert version(' 36 ') == 36
|
||||
assert version(' v0. 4.36 ') == 4036
|
||||
assert version(' v2. 4.36 ') == 2004036
|
||||
assert version(' 0.18.0 ') == 18000
|
||||
|
||||
assert version('
|
||||
|
||||
v2. 4.36
|
||||
') == 2004036
|
||||
}
|
||||
Reference in New Issue
Block a user