fixed a few bugs related to vector embedding + added additional end to end documentation to showcase local and external embedders step-by-step + added example mock embedder python script

This commit is contained in:
Maxime Van Hees
2025-10-16 15:30:45 +02:00
parent a8720c06db
commit df780e20a2
5 changed files with 1188 additions and 198 deletions

34
mock_embedder.py Normal file
View File

@@ -0,0 +1,34 @@
from flask import Flask, request, jsonify
import numpy as np
app = Flask(__name__)
@app.route('/v1/embeddings', methods=['POST'])
def embeddings():
data = request.json
inputs = data.get('input', [])
if isinstance(inputs, str):
inputs = [inputs]
# Generate deterministic 768-dim embeddings (hash-based)
embeddings = []
for text in inputs:
# Simple hash to vector
vec = np.zeros(768)
for i, char in enumerate(text[:768]):
vec[i % 768] += ord(char) / 255.0
# Normalize
norm = np.linalg.norm(vec)
if norm > 0:
vec = vec / norm
embeddings.append(vec.tolist())
return jsonify({
"data": [{"embedding": emb} for emb in embeddings],
"model": data.get('model', 'mock'),
"usage": {"total_tokens": sum(len(t) for t in inputs)}
})
if __name__ == '__main__':
app.run(host='127.0.0.1', port=8081)