Skip to content

Commit e72f7c2

Browse files
MB-62230 - Pre-filtering Optimisation (blevesearch#2098)
This PR - 1. Avoids creating document matches for the pre-filter phase, when IDs suffice. 2. Re-uses document matches by adding them to the doc match pool after each hit. --------- Co-authored-by: Abhinav Dangeti <abhinav@couchbase.com>
1 parent 902051d commit e72f7c2

8 files changed

Lines changed: 23 additions & 53 deletions

File tree

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ require (
2424
github.com/blevesearch/zapx/v13 v13.3.10
2525
github.com/blevesearch/zapx/v14 v14.3.10
2626
github.com/blevesearch/zapx/v15 v15.3.16
27-
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb
27+
github.com/blevesearch/zapx/v16 v16.1.8
2828
github.com/couchbase/moss v0.2.0
2929
github.com/golang/protobuf v1.3.2
3030
github.com/spf13/cobra v1.7.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
4343
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
4444
github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
4545
github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
46-
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb h1:+LkKIOe8vnyxmHLI8iOa8vpv9h46qYait5znwcl7Utg=
47-
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8=
46+
github.com/blevesearch/zapx/v16 v16.1.8 h1:Bxzpw6YQpFs7UjoCV1+RvDw6fmAT2GZxldwX8b3wVBM=
47+
github.com/blevesearch/zapx/v16 v16.1.8/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8=
4848
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
4949
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
5050
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=

search/collector/eligible.go

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ type EligibleCollector struct {
2929
took time.Duration
3030
results search.DocumentMatchCollection
3131

32-
store collectorStore
32+
ids []index.IndexInternalID
3333
}
3434

3535
func NewEligibleCollector(size int) *EligibleCollector {
@@ -38,13 +38,9 @@ func NewEligibleCollector(size int) *EligibleCollector {
3838

3939
func newEligibleCollector(size int) *EligibleCollector {
4040
// No sort order & skip always 0 since this is only to filter eligible docs.
41-
ec := &EligibleCollector{size: size}
42-
43-
// comparator is a dummy here
44-
ec.store = getOptimalCollectorStore(size, 0, func(i, j *search.DocumentMatch) int {
45-
return 0
46-
})
47-
41+
ec := &EligibleCollector{size: size,
42+
ids: make([]index.IndexInternalID, 0, size),
43+
}
4844
return ec
4945
}
5046

@@ -55,8 +51,13 @@ func makeEligibleDocumentMatchHandler(ctx *search.SearchContext) (search.Documen
5551
return nil
5652
}
5753

58-
// No elements removed from the store here.
59-
_ = ec.store.Add(d)
54+
copyOfID := make([]byte, len(d.IndexInternalID))
55+
copy(copyOfID, d.IndexInternalID)
56+
ec.ids = append(ec.ids, copyOfID)
57+
58+
// recycle the DocumentMatch
59+
ctx.DocumentMatchPool.Put(d)
60+
6061
return nil
6162
}, nil
6263
}
@@ -122,26 +123,15 @@ func (ec *EligibleCollector) Collect(ctx context.Context, searcher search.Search
122123
// compute search duration
123124
ec.took = time.Since(startTime)
124125

125-
// finalize actual results
126-
err = ec.finalizeResults(reader)
127-
if err != nil {
128-
return err
129-
}
130126
return nil
131127
}
132128

133-
func (ec *EligibleCollector) finalizeResults(r index.IndexReader) error {
134-
var err error
135-
ec.results, err = ec.store.Final(0, func(doc *search.DocumentMatch) error {
136-
// Adding the results to the store without any modifications since we don't
137-
// require the external ID of the filtered hits.
138-
return nil
139-
})
140-
return err
129+
func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
130+
return nil
141131
}
142132

143-
func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
144-
return ec.results
133+
func (ec *EligibleCollector) IDs() []index.IndexInternalID {
134+
return ec.ids
145135
}
146136

147137
func (ec *EligibleCollector) Total() uint64 {

search/collector/heap.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
3434
return rv
3535
}
3636

37-
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) *search.DocumentMatch {
38-
c.add(doc)
39-
return nil
40-
}
41-
4237
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
4338
size int) *search.DocumentMatch {
4439
c.add(doc)

search/collector/list.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
3434
return rv
3535
}
3636

37-
func (c *collectStoreList) Add(doc *search.DocumentMatch, size int) *search.DocumentMatch {
38-
c.results.PushBack(doc)
39-
return nil
40-
}
41-
4237
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
4338
c.add(doc)
4439
if c.len() > size {

search/collector/slice.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
package collector
1616

17-
import "github.com/blevesearch/bleve/v2/search"
17+
import (
18+
"github.com/blevesearch/bleve/v2/search"
19+
)
1820

1921
type collectStoreSlice struct {
2022
slice search.DocumentMatchCollection
@@ -29,11 +31,6 @@ func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
2931
return rv
3032
}
3133

32-
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) *search.DocumentMatch {
33-
c.slice = append(c.slice, doc)
34-
return nil
35-
}
36-
3734
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
3835
size int) *search.DocumentMatch {
3936
c.add(doc)

search/collector/topn.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@ func init() {
3333
}
3434

3535
type collectorStore interface {
36-
// Adds a doc to the store without considering size.
37-
// Returns nil if the doc was added successfully.
38-
Add(doc *search.DocumentMatch) *search.DocumentMatch
39-
4036
// Add the document, and if the new store size exceeds the provided size
4137
// the last element is removed and returned. If the size has not been
4238
// exceeded, nil is returned.

search_knn.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,12 +404,9 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
404404
if err != nil {
405405
return nil, err
406406
}
407-
filterHits := filterColl.Results()
407+
filterHits := filterColl.IDs()
408408
if len(filterHits) > 0 {
409-
filterHitsMap[idx] = make([]index.IndexInternalID, len(filterHits))
410-
for i, docMatch := range filterHits {
411-
filterHitsMap[idx][i] = docMatch.IndexInternalID
412-
}
409+
filterHitsMap[idx] = filterHits
413410
}
414411
// set requiresFiltering regardless of whether there're filtered hits or
415412
// not to later decide whether to consider the knnQuery or not

0 commit comments

Comments
 (0)