Skip to content

Commit e622b4f

Browse files
committed
feat: drop memory index
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 933f686 commit e622b4f

8 files changed

Lines changed: 703 additions & 157 deletions

File tree

rag/engine.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ type Engine interface {
1414
Count() int
1515
Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error
1616
GetByID(id string) (types.Result, error)
17+
GetBySource(source string) ([]types.Result, error)
1718
}

rag/engine/chromem.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,32 @@ func (c *ChromemDB) GetByID(id string) (types.Result, error) {
167167
return types.Result{ID: res.ID, Metadata: res.Metadata, Content: res.Content}, nil
168168
}
169169

170+
func (c *ChromemDB) GetBySource(source string) ([]types.Result, error) {
171+
ctx := context.Background()
172+
count := c.collection.Count()
173+
if count == 0 {
174+
return nil, nil
175+
}
176+
177+
// Use Query with a where filter to find documents by source metadata.
178+
// We use a dummy query and request all documents, relying on the where
179+
// filter to narrow results.
180+
res, err := c.collection.Query(ctx, ".", count, map[string]string{"source": source}, nil)
181+
if err != nil {
182+
return nil, fmt.Errorf("error querying by source: %v", err)
183+
}
184+
185+
var results []types.Result
186+
for _, r := range res {
187+
results = append(results, types.Result{
188+
ID: r.ID,
189+
Metadata: r.Metadata,
190+
Content: r.Content,
191+
})
192+
}
193+
return results, nil
194+
}
195+
170196
func (c *ChromemDB) Search(s string, similarEntries int) ([]types.Result, error) {
171197
res, err := c.collection.Query(context.Background(), s, similarEntries, nil, nil)
172198
if err != nil {

rag/engine/localai.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ func (db *LocalAIRAGDB) GetByID(id string) (types.Result, error) {
8888
return types.Result{}, fmt.Errorf("not implemented")
8989
}
9090

91+
func (db *LocalAIRAGDB) GetBySource(source string) ([]types.Result, error) {
92+
return nil, fmt.Errorf("not implemented")
93+
}
94+
9195
func (db *LocalAIRAGDB) Search(s string, similarEntries int) ([]types.Result, error) {
9296
resp, err := db.openaiClient.CreateEmbeddings(context.TODO(),
9397
openai.EmbeddingRequestStrings{

rag/engine/mock.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package engine
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"sync"
7+
8+
"github.com/mudler/localrecall/rag/types"
9+
)
10+
11+
// MockEngine is a simple in-memory engine for testing. It requires no
12+
// external dependencies (no LocalAI, no embeddings).
13+
type MockEngine struct {
14+
mu sync.Mutex
15+
docs map[string]types.Result
16+
index int
17+
}
18+
19+
func NewMockEngine() *MockEngine {
20+
return &MockEngine{
21+
docs: make(map[string]types.Result),
22+
index: 1,
23+
}
24+
}
25+
26+
func (m *MockEngine) Store(s string, metadata map[string]string) (Result, error) {
27+
results, err := m.StoreDocuments([]string{s}, metadata)
28+
if err != nil {
29+
return Result{}, err
30+
}
31+
return results[0], nil
32+
}
33+
34+
func (m *MockEngine) StoreDocuments(s []string, metadata map[string]string) ([]Result, error) {
35+
m.mu.Lock()
36+
defer m.mu.Unlock()
37+
38+
if len(s) == 0 {
39+
return nil, fmt.Errorf("empty input")
40+
}
41+
42+
results := make([]Result, len(s))
43+
for i, content := range s {
44+
id := fmt.Sprintf("%d", m.index)
45+
// Copy metadata so each doc has its own map
46+
meta := make(map[string]string, len(metadata))
47+
for k, v := range metadata {
48+
meta[k] = v
49+
}
50+
m.docs[id] = types.Result{
51+
ID: id,
52+
Content: content,
53+
Metadata: meta,
54+
}
55+
results[i] = Result{ID: id}
56+
m.index++
57+
}
58+
return results, nil
59+
}
60+
61+
func (m *MockEngine) Search(s string, similarEntries int) ([]types.Result, error) {
62+
m.mu.Lock()
63+
defer m.mu.Unlock()
64+
65+
var results []types.Result
66+
for _, doc := range m.docs {
67+
if strings.Contains(strings.ToLower(doc.Content), strings.ToLower(s)) {
68+
results = append(results, doc)
69+
}
70+
}
71+
// If no substring match, return all (useful for generic searches)
72+
if len(results) == 0 {
73+
for _, doc := range m.docs {
74+
results = append(results, doc)
75+
}
76+
}
77+
if len(results) > similarEntries {
78+
results = results[:similarEntries]
79+
}
80+
return results, nil
81+
}
82+
83+
func (m *MockEngine) Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error {
84+
m.mu.Lock()
85+
defer m.mu.Unlock()
86+
87+
// Delete by IDs
88+
if len(ids) > 0 {
89+
for _, id := range ids {
90+
delete(m.docs, id)
91+
}
92+
return nil
93+
}
94+
95+
// Delete by metadata where filter
96+
if len(where) > 0 {
97+
for id, doc := range m.docs {
98+
match := true
99+
for k, v := range where {
100+
if doc.Metadata[k] != v {
101+
match = false
102+
break
103+
}
104+
}
105+
if match {
106+
delete(m.docs, id)
107+
}
108+
}
109+
}
110+
111+
return nil
112+
}
113+
114+
func (m *MockEngine) GetByID(id string) (types.Result, error) {
115+
m.mu.Lock()
116+
defer m.mu.Unlock()
117+
118+
doc, ok := m.docs[id]
119+
if !ok {
120+
return types.Result{}, fmt.Errorf("document not found: %s", id)
121+
}
122+
return doc, nil
123+
}
124+
125+
func (m *MockEngine) GetBySource(source string) ([]types.Result, error) {
126+
m.mu.Lock()
127+
defer m.mu.Unlock()
128+
129+
var results []types.Result
130+
for _, doc := range m.docs {
131+
if doc.Metadata["source"] == source {
132+
results = append(results, doc)
133+
}
134+
}
135+
return results, nil
136+
}
137+
138+
func (m *MockEngine) Count() int {
139+
m.mu.Lock()
140+
defer m.mu.Unlock()
141+
142+
return len(m.docs)
143+
}
144+
145+
func (m *MockEngine) Reset() error {
146+
m.mu.Lock()
147+
defer m.mu.Unlock()
148+
149+
m.docs = make(map[string]types.Result)
150+
m.index = 1
151+
return nil
152+
}
153+
154+
func (m *MockEngine) GetEmbeddingDimensions() (int, error) {
155+
return 384, nil
156+
}

rag/engine/postgres.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,40 @@ func (p *PostgresDB) GetByID(id string) (types.Result, error) {
586586
return result, nil
587587
}
588588

589+
func (p *PostgresDB) GetBySource(source string) ([]types.Result, error) {
590+
ctx := context.Background()
591+
592+
rows, err := p.pool.Query(ctx, fmt.Sprintf(`
593+
SELECT id::text, COALESCE(title, '') as title, content, metadata
594+
FROM %s WHERE metadata->>'source' = $1
595+
`, p.tableName), source)
596+
if err != nil {
597+
return nil, fmt.Errorf("failed to query by source: %w", err)
598+
}
599+
defer rows.Close()
600+
601+
var results []types.Result
602+
for rows.Next() {
603+
var r types.Result
604+
var title string
605+
var metadataJSON []byte
606+
607+
if err := rows.Scan(&r.ID, &title, &r.Content, &metadataJSON); err != nil {
608+
continue
609+
}
610+
611+
r.Metadata = make(map[string]string)
612+
if len(metadataJSON) > 0 {
613+
json.Unmarshal(metadataJSON, &r.Metadata)
614+
}
615+
if title != "" {
616+
r.Metadata["title"] = title
617+
}
618+
results = append(results, r)
619+
}
620+
return results, nil
621+
}
622+
589623
func (p *PostgresDB) Search(s string, similarEntries int) ([]types.Result, error) {
590624
ctx := context.Background()
591625

0 commit comments

Comments
 (0)