Determine fuzziness of bleve indexer by keyword length ()

also bleve did match on fuzzy search and the other way around. this also fix that bug.

(cherry picked from commit b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c)

Conflicts:
	tests/integration/repo_search_test.go
	simple conflict resolution in the tests
This commit is contained in:
6543 2024-03-23 16:45:13 +01:00 committed by Earl Warren
parent b73117127c
commit ab5f0b7558
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
4 changed files with 29 additions and 37 deletions
modules/indexer
code/bleve
internal/bleve
issues/bleve
tests/integration

View file

@ -39,6 +39,8 @@ import (
const (
unicodeNormalizeName = "unicodeNormalize"
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
keywordQuery query.Query
)
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
}
if len(opts.RepoIDs) > 0 {

View file

@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
}
// PrefixQuery generates a match prefix query for the given prefix and field
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
q := bleve.NewPrefixQuery(matchPrefix)
q.FieldVal = field
q.Fuzziness = fuzziness
return q
}

View file

@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
const maxBatchSize = 16
const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query
if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
}...))
} else {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.PrefixQuery(options.Keyword, "title"),
inner_bleve.PrefixQuery(options.Keyword, "content"),
inner_bleve.PrefixQuery(options.Keyword, "comments"),
}...))
fuzziness = len(options.Keyword) / fuzzyDenominator
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
}
if len(options.RepoIDs) > 0 || options.AllPublic {

View file

@ -46,7 +46,7 @@ func testSearchRepo(t *testing.T, useExternalIndexer bool) {
if useExternalIndexer {
gitReference = "/commit/"
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
code_indexer.UpdateRepoIndexer(repo)
}
testSearch(t, "/user2/repo1/search?q=Description&page=1", gitReference, []string{"README.md"})
@ -58,12 +58,14 @@ func testSearchRepo(t *testing.T, useExternalIndexer bool) {
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
assert.NoError(t, err)
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
code_indexer.UpdateRepoIndexer(repo)
testSearch(t, "/user2/glob/search?q=loren&page=1", gitReference, []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", gitReference, []string{"x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", gitReference, []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1", gitReference, []string{})
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", gitReference, []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", gitReference, []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", gitReference, []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", gitReference, []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", gitReference, []string{"x/b.txt", "a.txt"})
}
}
@ -88,7 +90,3 @@ func testSearch(t *testing.T, url, gitRef string, expected []string) {
checkResultLinks(t, gitRef, doc)
}
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
op(repo)
}