Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit fae95fd

Browse files
authored
Search: boost phrases in hybrid search (#64367)
This PR passes the 'boost' annotation down to searcher, so that it can apply phrase boosting. For now, we just pass the boost to the Zoekt query in hybrid search, which already gives a nice benefit since the Zoekt results are streamed back first. Note: this doesn't completely implement boosting in searcher, but it was really simple and seemed worth it. We're purposefully not investing in big searcher ranking improvements, since we think a better investment is to unify logic across Zoekt + searcher.
1 parent 02ac021 commit fae95fd

File tree

10 files changed

+210
-166
lines changed

10 files changed

+210
-166
lines changed

cmd/searcher/internal/search/matchtree.go

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ import (
88
"strings"
99

1010
"github.com/grafana/regexp"
11-
"github.com/sourcegraph/zoekt/query"
1211
zoektquery "github.com/sourcegraph/zoekt/query"
1312

1413
"github.com/sourcegraph/sourcegraph/internal/search/casetransform"
14+
"github.com/sourcegraph/sourcegraph/internal/search/query"
1515
"github.com/sourcegraph/sourcegraph/internal/searcher/protocol"
1616
"github.com/sourcegraph/sourcegraph/lib/errors"
1717
)
@@ -98,7 +98,7 @@ func toRegexpTree(node *protocol.PatternNode, isCaseSensitive bool) (matchTree,
9898

9999
// OptimizeRegexp currently only converts capture groups into
100100
// non-capture groups (faster for stdlib regexp to execute).
101-
re = query.OptimizeRegexp(re, syntax.Perl)
101+
re = zoektquery.OptimizeRegexp(re, syntax.Perl)
102102

103103
pattern = re.String()
104104
}
@@ -124,6 +124,7 @@ func toRegexpTree(node *protocol.PatternNode, isCaseSensitive bool) (matchTree,
124124
re: re,
125125
ignoreCase: !isCaseSensitive,
126126
isNegated: node.IsNegated,
127+
boost: node.Boost,
127128
literalSubstring: literalSubstring,
128129
}, nil
129130
}
@@ -185,6 +186,9 @@ type regexMatchTree struct {
185186
// isNegated indicates whether matches on the pattern should be negated (representing a 'NOT' in the query)
186187
isNegated bool
187188

189+
// boost indicates whether the regexp should have its score boosted in Zoekt ranking
190+
boost bool
191+
188192
// literalSubstring is used to test if a file is worth considering for
189193
// matches. literalSubstring is guaranteed to appear in any match found by
190194
// re. It is the output of the longestLiteral function. It is only set if
@@ -236,8 +240,9 @@ func (rm *regexMatchTree) ToZoektQuery(matchContent bool, matchPath bool) (zoekt
236240
}
237241
re = zoektquery.OptimizeRegexp(re, syntax.Perl)
238242

243+
var result zoektquery.Q
239244
if matchContent && matchPath {
240-
return zoektquery.NewOr(
245+
result = zoektquery.NewOr(
241246
rm.negateIfNeeded(
242247
&zoektquery.Regexp{
243248
Regexp: re,
@@ -250,16 +255,21 @@ func (rm *regexMatchTree) ToZoektQuery(matchContent bool, matchPath bool) (zoekt
250255
FileName: true,
251256
CaseSensitive: !rm.ignoreCase,
252257
}),
253-
), nil
258+
)
259+
} else {
260+
result = rm.negateIfNeeded(
261+
&zoektquery.Regexp{
262+
Regexp: re,
263+
Content: matchContent,
264+
FileName: matchPath,
265+
CaseSensitive: !rm.ignoreCase,
266+
})
254267
}
255268

256-
return rm.negateIfNeeded(
257-
&zoektquery.Regexp{
258-
Regexp: re,
259-
Content: matchContent,
260-
FileName: matchPath,
261-
CaseSensitive: !rm.ignoreCase,
262-
}), nil
269+
if rm.boost {
270+
result = &zoektquery.Boost{Child: result, Boost: query.ZoektScoreBoost}
271+
}
272+
return result, nil
263273
}
264274

265275
func (rm *regexMatchTree) negateIfNeeded(q zoektquery.Q) zoektquery.Q {

cmd/searcher/internal/search/matchtree_test.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66

77
"github.com/grafana/regexp"
8+
"github.com/hexops/autogold/v2"
89
"github.com/sourcegraph/zoekt/query"
910
"github.com/stretchr/testify/require"
1011
)
@@ -59,7 +60,8 @@ func TestToZoektQuery(t *testing.T) {
5960
isNegated: true,
6061
},
6162
&regexMatchTree{
62-
re: regexp.MustCompile("bbbb*"),
63+
re: regexp.MustCompile("bbbb*"),
64+
boost: true,
6365
},
6466
},
6567
},
@@ -74,24 +76,23 @@ func TestToZoektQuery(t *testing.T) {
7476
name string
7577
matchContent bool
7678
matchPath bool
77-
want string
79+
want autogold.Value
7880
}{{
7981
name: "matches content only",
8082
matchContent: true,
8183
matchPath: false,
82-
want: `(and (or (not case_regex:"aaaaa") case_regex:"bbbb*") regex:"cccc?")`,
84+
want: autogold.Expect(`(and (or (not case_regex:"aaaaa") (boost 20.00 case_regex:"bbbb*")) regex:"cccc?")`),
8385
}, {
8486
name: "matches path only",
8587
matchContent: false,
8688
matchPath: true,
87-
want: `(and (or (not case_file_regex:"aaaaa") case_file_regex:"bbbb*") file_regex:"cccc?")`,
89+
want: autogold.Expect(`(and (or (not case_file_regex:"aaaaa") (boost 20.00 case_file_regex:"bbbb*")) file_regex:"cccc?")`),
90+
}, {
91+
name: "matches content and path",
92+
matchContent: true,
93+
matchPath: true,
94+
want: autogold.Expect(`(and (or (not case_regex:"aaaaa") (not case_file_regex:"aaaaa") (boost 20.00 (or case_regex:"bbbb*" case_file_regex:"bbbb*"))) (or regex:"cccc?" file_regex:"cccc?"))`),
8895
},
89-
{
90-
name: "matches content and path",
91-
matchContent: true,
92-
matchPath: true,
93-
want: `(and (or (not case_regex:"aaaaa") (not case_file_regex:"aaaaa") case_regex:"bbbb*" case_file_regex:"bbbb*") (or regex:"cccc?" file_regex:"cccc?"))`,
94-
},
9596
}
9697

9798
for _, c := range cases {
@@ -100,7 +101,7 @@ func TestToZoektQuery(t *testing.T) {
100101
if err != nil {
101102
t.Fatal(err)
102103
}
103-
require.Equal(t, c.want, query.Simplify(got).String())
104+
c.want.Equal(t, query.Simplify(got).String())
104105
})
105106
}
106107
}

internal/gitserver/v1/gitserver_grpc.pb.go

Lines changed: 22 additions & 40 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)