fixed a few bugs related to vector embedding + added additional end to end documentation to showcase local and external embedders step-by-step + added example mock embedder python script
This commit is contained in:
34
mock_embedder.py
Normal file
34
mock_embedder.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import numpy as np
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/v1/embeddings', methods=['POST'])
|
||||
def embeddings():
|
||||
data = request.json
|
||||
inputs = data.get('input', [])
|
||||
if isinstance(inputs, str):
|
||||
inputs = [inputs]
|
||||
|
||||
# Generate deterministic 768-dim embeddings (hash-based)
|
||||
embeddings = []
|
||||
for text in inputs:
|
||||
# Simple hash to vector
|
||||
vec = np.zeros(768)
|
||||
for i, char in enumerate(text[:768]):
|
||||
vec[i % 768] += ord(char) / 255.0
|
||||
# Normalize
|
||||
norm = np.linalg.norm(vec)
|
||||
if norm > 0:
|
||||
vec = vec / norm
|
||||
embeddings.append(vec.tolist())
|
||||
|
||||
return jsonify({
|
||||
"data": [{"embedding": emb} for emb in embeddings],
|
||||
"model": data.get('model', 'mock'),
|
||||
"usage": {"total_tokens": sum(len(t) for t in inputs)}
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='127.0.0.1', port=8081)
|
||||
|
Reference in New Issue
Block a user