This commit is contained in:
2025-08-05 15:15:36 +02:00
parent 4bd960ed05
commit 7fabb4163a
192 changed files with 14901 additions and 0 deletions

View File

@@ -0,0 +1,88 @@
import re
import os
# remoces pub, mut, non needed code, ...
def cleaner(code: str):
lines = code.split("\n")
processed_lines = []
in_function = False
in_struct_or_enum = False
for line in lines:
line = line.replace("\t", " ")
stripped_line = line.strip()
# Skip lines starting with 'pub mut:'
if re.match(r"^\s*pub\s*(\s+mut\s*)?:", stripped_line):
continue
# Remove 'pub ' at the start of struct and function lines
if stripped_line.startswith("pub "):
line = line.lstrip()[4:] # Remove leading spaces and 'pub '
# Check if we're entering or exiting a struct or enum
if re.match(r"(struct|enum)\s+\w+\s*{", stripped_line):
in_struct_or_enum = True
processed_lines.append(line)
elif in_struct_or_enum and "}" in stripped_line:
in_struct_or_enum = False
processed_lines.append(line)
elif in_struct_or_enum:
# Ensure consistent indentation within structs and enums
processed_lines.append(line)
else:
# Handle function declarations
if "fn " in stripped_line:
if "{" in stripped_line:
# Function declaration and opening brace on the same line
in_function = True
processed_lines.append(line)
else:
return Exception(f"accolade needs to be in fn line.\n{line}")
elif in_function:
if stripped_line == "}":
# Closing brace of the function
in_function = False
processed_lines.append("}")
# Skip all other lines inside the function
else:
processed_lines.append(line)
return "\n".join(processed_lines)
def load(path: str) -> str:
# walk over directory find all .v files, recursive
# ignore all imports (import at start of line)
# ignore all module ... (module at start of line)
path = os.path.expanduser(path)
if not os.path.exists(path):
raise FileNotFoundError(f"The path '{path}' does not exist.")
all_code = []
# Walk over directory recursively
for root, _, files in os.walk(path):
for file in files:
if file.endswith(".v"):
file_path = os.path.join(root, file)
with open(file_path, "r") as f:
lines = f.readlines()
# Filter out import and module lines
filtered_lines = [
line
for line in lines
if not line.strip().startswith(("import", "module"))
]
all_code.append("".join(filtered_lines))
return "\n\n".join(all_code)
if __name__ == "__main__":
# from heroserver.openrpc.parser.example import load_example
code = load("~/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples")
# Parse the code
code = cleaner(code)
print(code)

View File

@@ -0,0 +1,92 @@
module main
import os
import regex as re
// Removes pub, mut, unneeded code, etc.
fn cleaner(code string) string {
lines := code.split_into_lines()
mut processed_lines := []string{}
mut in_function := false
mut in_struct_or_enum := false
for line in lines {
line = line.replace('\t', ' ')
stripped_line := line.trim_space()
// Skip lines starting with 'pub mut:'
if stripped_line.starts_with('pub mut:') {
continue
}
// Remove 'pub ' at the start of struct and function lines
if stripped_line.starts_with('pub ') {
line = line.trim_left()[4..] // Remove leading spaces and 'pub '
}
// Check if we're entering or exiting a struct or enum
mut r := re.regex_opt(r'(struct|enum)\s+\w+\s*{') or { panic(err) }
if r.matches_string(stripped_line) {
in_struct_or_enum = true
processed_lines << line
} else if in_struct_or_enum && '}' in stripped_line {
in_struct_or_enum = false
processed_lines << line
} else if in_struct_or_enum {
// Ensure consistent indentation within structs and enums
processed_lines << line
} else {
// Handle function declarations
r = re.regex_opt(r'fn\s+\w+') or { panic(err) }
if r.matches_string(stripped_line) {
if '{' in stripped_line {
// Function declaration and opening brace on the same line
in_function = true
processed_lines << line
} else {
return error('accolade needs to be in fn line.\n${line}')
}
} else if in_function {
if stripped_line == '}' {
// Closing brace of the function
in_function = false
processed_lines << '}'
}
// Skip all other lines inside the function
} else {
processed_lines << line
}
}
}
return processed_lines.join('\n')
}
fn load(path string) !string {
// Walk over directory, find all .v files recursively.
// Ignore all imports (import at start of line)
// Ignore all module ... (module at start of line)
path = os.expand_env(path)
if !os.exists(path) {
panic('The path "${path}" does not exist.')
}
// Walk over directory recursively
os.walk_ext(path, '.v', fn (path string, _ []os.FileInfo) {
t+=process_file(path)!
}
fn process_file(file_path string) !string {
lines := os.read_lines(file_path) or { return err }
// Filter out import and module lines
filtered_lines := lines.filter(it !in ['import', 'module'].map(it.trim_space()))
return filtered_lines.join('\n')
}
fn main() {
// from heroserver.openrpc.parser.example import load_example
code := load('~/code/git.threefold.info/hero/hero_server/lib/openrpclib/parser/examples')
// Parse the code
code = cleaner(code)!
println(code)
}

View File

@@ -0,0 +1,27 @@
import os
import sys
def load_example() -> str:
# Start from the current working directory
current_dir = os.path.dirname(os.path.abspath(__file__))
examples_dir = os.path.join(current_dir, "examples")
examples = ""
if os.path.isdir(examples_dir):
examples = load_v_files(examples_dir)
return examples
def load_v_files(path: str) -> str:
examples = ""
for entry in os.listdir(path):
if os.path.isdir(entry):
examples += load_v_files(entry) + "\n\n"
elif entry.endswith(".v"):
with open(entry, "r") as file:
examples += file.read() + "\n"
return examples

View File

@@ -0,0 +1,107 @@
import os
def includes_process_text(text):
lines = text.split('\n')
result = {}
current_block = None
current_content = []
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith('<') and stripped_line.endswith('>') and not stripped_line.startswith('<END'):
if current_block:
raise Exception(f"should not come here, there needs to be <END> after a block.\n{line}")
# result[current_block.upper()] = '\n'.join(current_content).rstrip()
current_block = stripped_line[1:-1] # Remove '<' and '>'
current_content = []
elif stripped_line == '<END>':
if current_block:
result[current_block] = '\n'.join(current_content).rstrip()
current_block = None
current_content = []
elif current_block is not None:
current_content.append(line)
if current_block:
raise Exception(f"should not come here, there needs to be <END> after a block.\n{line}")
result[current_block] = '\n'.join(current_content).rstrip()
return result
def include_process_directory(path):
path = os.path.expanduser(path)
if not os.path.exists(path):
raise FileNotFoundError(f"The path '{path}' does not exist.")
all_blocks = {}
for root, dirs, files in os.walk(path):
for file in files:
if file.startswith('include_'):
file_path = os.path.join(root, file)
print(f" -- include {file_path}")
with open(file_path, 'r') as f:
content = f.read()
blocks = includes_process_text(content)
all_blocks.update(blocks)
return all_blocks
def include_process_text(input_text, block_dict):
lines = input_text.split('\n')
result_lines = []
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith('//include<') and stripped_line.endswith('>'):
key = stripped_line[10:-1].upper() # Extract and uppercase the key
if key in block_dict:
# Include the block exactly as it is in the dictionary
result_lines.append(block_dict[key])
else:
result_lines.append(f"// ERROR: Block '{key}' not found in dictionary")
else:
result_lines.append(line)
return '\n'.join(result_lines)
if __name__ == "__main__":
# Example usage
input_text = """
<BASE>
oid string //is unique id for user in a circle, example=a7c *
name string //short name for swimlane'
time_creation int //time when signature was created, in epoch example=1711442827 *
comments []string //list of oid's of comments linked to this story
<END>
<MYNAME>
this is my name, one line only
<END>
"""
#parsed_blocks = include_parse_blocks(input_text)
includes_dict = include_process_directory("~/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples")
for key, value in includes_dict.items():
print(f"{key}:")
print(value)
print() # Add a blank line between blocks for readability
input_text = '''
//we didn't do anything for comments yet
//
//this needs to go to description in openrpc spec
//
@[rootobject]
struct Story {
//include<BASE>
content string //description of the milestone example="this is example content which gives more color" *
owners []string //list of users (oid) who are the owners of this project example="10a,g6,aa1" *
notifications []string //list of users (oid) who want to be informed of changes of this milestone example="ad3"
deadline int //epoch deadline for the milestone example="1711442827" *
projects []string //link to a projects this story belongs too
milestones []string //link to the mulestones this story belongs too
}
'''
result = include_process_text(input_text, includes_dict)
print(result)

View File

@@ -0,0 +1,245 @@
import json
import re
from typing import List, Tuple
import yaml # type: ignore
from heroserver.openrpc.parser.cleaner import cleaner, load
from heroserver.openrpc.parser.includes import include_process_directory, include_process_text, includes_process_text
from heroserver.openrpc.parser.splitter import CodeType, splitter
# use https://regex101.com/
def parse_field_description(field_description):
# Initialize the result dictionary
result = {"description": "", "index": False, "example": None}
# Check if the field is indexed
if field_description.strip().endswith("*"):
result["index"] = True
field_description = field_description.strip()[:-1].strip()
# Split the description and example
parts = field_description.split("example=", 1)
# Set the description
result["description"] = parts[0].strip()
# Extract the example if it exists
if len(parts) > 1:
example_value = parts[1].strip()
if example_value.startswith("[") and example_value.endswith("]"):
result["example"] = json.loads(example_value)
elif example_value.isdigit():
result["example"] = int(example_value)
else:
example_match = re.search(r'["\'](.+?)["\']', example_value)
if example_match:
result["example"] = example_match.group(1)
return result
def parse_struct(struct_def):
struct_name = re.search(r"struct (\w+)", struct_def).group(1)
fields = re.findall(r"\s+(\w+)\s+([\w\[\]]+)(?:\s*\/\/(.+))?", struct_def)
return struct_name, fields
def parse_enum(enum_def):
enum_name = re.search(r"enum (\w+)", enum_def).group(1)
values = re.findall(r"\n\s+(\w+)", enum_def)
return enum_name, values
def parse_function(func_def):
# Match the function signature
match = re.search(r"fn (\w+)\((.*?)\)\s*(!?\w*)", func_def)
if match:
func_name = match.group(1)
params_str = match.group(2).strip()
return_type = match.group(3).strip()
if return_type.startswith("RO_"):
return_type = return_type[3:]
if return_type.startswith("!RO_"):
return_type = return_type[4:]
if return_type.startswith("?RO_"):
return_type = return_type[4:]
# print(f" -- return type: {return_type}")
# Parse parameters
params = []
if params_str:
# This regex handles parameters with or without type annotations
param_pattern = re.compile(r"(\w+)(?:\s+(\w+))?")
for param_match in param_pattern.finditer(params_str):
param_name, param_type = param_match.groups()
if param_type.startswith("RO_"):
param_type = param_type[3:]
params.append((param_name, param_type if param_type else None))
return func_name, params, return_type
return None, None, None
def get_type_schema(type_name):
if type_name.startswith("[]"):
item_type = type_name[2:]
return {"type": "array", "items": get_type_schema(item_type)}
elif type_name in ["string"]:
return {"type": "string"}
elif type_name in ["f64", "float", "f32", "f16"]:
return {"type": "number"}
elif type_name in ["int"]:
return {"type": "integer"}
elif type_name == "bool":
return {"type": "boolean"}
elif type_name == "":
return {"type": "null"}
else:
return {"$ref": f"#/components/schemas/{type_name}"}
def parser(code: str = "", path: str = "") -> dict:
if len(code) > 0 and len(path) > 0:
raise Exception("cannot have code and path filled in at same time")
if len(path) > 0:
code = load(path)
includes_dict = include_process_directory(path)
else:
includes_dict = includes_process_text(path)
openrpc_spec = {
"openrpc": "1.2.6",
"info": {"title": "V Code API", "version": "1.0.0"},
"methods": [],
"components": {"schemas": {}},
}
# this function just cleans the code so we have a proper input for the parser
code = cleaner(code)
# this function is a pre-processor, it finds include blocks and adds them in
code = include_process_text(code, includes_dict)
codeblocks = splitter(code)
structs: List[Tuple[dict, List[str]]] = list()
enums = list()
functions = list()
for item in codeblocks:
if item["type"] == CodeType.STRUCT:
structs.append((item["block"], item["comments"]))
if item["type"] == CodeType.ENUM:
enums.append((item["block"], item["comments"]))
if item["type"] == CodeType.FUNCTION:
functions.append((item["block"], item["comments"]))
# Process structs and enums
for item in structs:
struct_name, fields = parse_struct(item[0])
rootobject = False
if struct_name.startswith("RO_"):
rootobject = True
struct_name = struct_name[3:]
openrpc_spec["components"]["schemas"][struct_name] = {
"type": "object",
"properties": {},
}
for field in fields:
field_name, field_type, field_description = field
parsed_description = parse_field_description(field_description)
field_schema = {
**get_type_schema(field_type),
"description": parsed_description["description"],
}
if parsed_description["example"]:
field_schema["example"] = parsed_description["example"]
if parsed_description["index"]:
field_schema["x-tags"] = field_schema.get("x-tags", []) + ["indexed"]
openrpc_spec["components"]["schemas"][struct_name]["properties"][field_name] = field_schema
if rootobject:
openrpc_spec["components"]["schemas"][struct_name]["x-tags"] = ["rootobject"]
functions.append((f"fn {struct_name.lower()}_get(id string) {struct_name}", []))
functions.append((f"fn {struct_name.lower()}_set(obj {struct_name})", []))
functions.append((f"fn {struct_name.lower()}_delete(id string)", []))
for item in enums:
enum_name, values = parse_enum(item[0])
openrpc_spec["components"]["schemas"][enum_name] = {
"type": "string",
"enum": values,
}
# print(functions)
# from IPython import embed; embed()
# Process functions
for item in functions:
func_name, params, return_type = parse_function(item[0])
print(f"debugzooo {func_name} {params}")
if return_type:
return_type = return_type.lstrip("!")
else:
return_type = ""
if func_name:
descr_return = f"Result of the {func_name} function is {return_type}"
descr_function = f"Executes the {func_name} function"
if len(item[1]) > 0:
if isinstance(item[1], list):
descr_function = "\n".join(item[1])
else:
descr_function = "\n".join(str(element) for element in item[1:])
method = {
"name": func_name,
"description": descr_function,
"params": [],
"result": {
"name": "result",
"description": descr_return,
"schema": get_type_schema(return_type),
},
}
for param in params:
# from IPython import embed; embed()
if len(param) == 2:
param_name, param_type = param
method["params"].append(
{
"name": param_name,
"description": f"Parameter {param_name} of type {param_type}",
"schema": get_type_schema(param_type),
}
)
openrpc_spec["methods"].append(method) # do it in the openrpc model
return openrpc_spec
if __name__ == "__main__":
openrpc_spec = parser(path="~/code/git.threefold.info/projectmycelium/hero_server/generatorexamples/example1/specs")
out = json.dumps(openrpc_spec, indent=2)
# print(out)
filename = "/tmp/openrpc_spec.json"
# Write the spec to the file
with open(filename, "w") as f:
f.write(out)
print(f"OpenRPC specification (JSON) has been written to: {filename}")
yaml_filename = "/tmp/openrpc_spec.yaml"
with open(yaml_filename, "w") as f:
yaml.dump(openrpc_spec, f, sort_keys=False)
print(f"OpenRPC specification (YAML) has been written to: {yaml_filename}")

View File

@@ -0,0 +1,80 @@
from enum import Enum
from heroserver.openrpc.parser.cleaner import cleaner
class CodeType(Enum):
STRUCT = "struct"
ENUM = "enum"
FUNCTION = "function"
def splitter(code: str):
lines = code.split("\n")
result = []
current_block = None
current_comments = []
for line in lines:
line = line.replace("\t", " ")
stripped_line = line.strip()
if stripped_line.startswith("//"):
current_comments.append(stripped_line[2:].strip())
elif stripped_line.startswith("struct "):
if current_block:
result.append(current_block)
current_block = {
"type": CodeType.STRUCT,
"comments": current_comments,
"block": line,
}
current_comments = []
elif stripped_line.startswith("enum "):
if current_block:
result.append(current_block)
current_block = {
"type": CodeType.ENUM,
"comments": current_comments,
"block": line,
}
current_comments = []
elif stripped_line.startswith("fn "):
if current_block:
result.append(current_block)
current_block = {
"type": CodeType.FUNCTION,
"comments": current_comments,
"block": line.split("{")[0].strip(),
}
current_comments = []
elif current_block:
if current_block["type"] == CodeType.STRUCT and stripped_line == "}":
current_block["block"] += "\n" + line
result.append(current_block)
current_block = None
elif current_block["type"] == CodeType.ENUM and stripped_line == "}":
current_block["block"] += "\n" + line
result.append(current_block)
current_block = None
elif current_block["type"] in [CodeType.STRUCT, CodeType.ENUM]:
current_block["block"] += "\n" + line
if current_block:
result.append(current_block)
return result
if __name__ == "__main__":
from heroserver.openrpc.parser.cleaner import load
code = load("/root/code/git.threefold.info/projectmycelium/hero_server/lib/openrpclib/parser/examples")
code = cleaner(code)
# Test the function
parsed_code = splitter(code)
for item in parsed_code:
print(f"Type: {item['type']}")
print(f"Comments: {item['comments']}")
print(f"Block:\n{item['block']}")
print("-" * 50)