feat: improve search experience

This commit is contained in:
Thea Schöbl
2025-09-09 18:37:32 +00:00
20 changed files with 230 additions and 158 deletions

View File

@@ -8,8 +8,7 @@ const config = {
database: {
name: 'elasticsearch',
query: {
minMatch: '60%',
queryType: 'query_string',
fields: ["name"]
},
},
},

View File

@@ -39,7 +39,7 @@ const boostings = {
type: SCThingType.AcademicEvent,
},
{
factor: 1.6,
factor: 2,
type: SCThingType.Building,
},
{
@@ -85,7 +85,7 @@ const boostings = {
],
place: [
{
factor: 2,
factor: 3,
type: SCThingType.Building,
},
{

View File

@@ -17,12 +17,21 @@ const config = {
name: 'elasticsearch',
version: '8.4.2',
query: {
minMatch: '75%',
queryType: 'dis_max',
matchBoosting: 1.3,
fuzziness: 'AUTO',
cutoffFrequency: 0,
tieBreaker: 0,
type: 'best_fields',
fields: [
'identifiers^20',
'name^10',
'translations.*.name^10',
'alternateNames^10',
'translations.*.alternateNames^10',
'description^2',
'translations.*.description^2',
'categories^5',
],
},
searchAsYouTypeQuery: {
type: 'phrase_prefix',
fields: ['name.completion', 'name.completion._2gram', 'name.completion._3gram'],
},
},
},

View File

@@ -20,6 +20,8 @@ import {
IndicesGetAliasResponse,
SearchHit,
SearchResponse,
SearchTermSuggest,
SearchTermSuggestOption,
} from '@elastic/elasticsearch/lib/api/types.js';
import {SCConfigFile, SCSearchQuery, SCSearchResponse, SCThings, SCUuid} from '@openstapps/core';
import {Logger} from '@openstapps/logger';
@@ -47,6 +49,9 @@ import {
import {noUndefined} from './util/no-undefined.js';
import {retryCatch, RetryOptions} from './util/retry.js';
import {Feature, Point, Polygon} from 'geojson';
import {parseSuggestions} from './util/parse-suggestions.js';
import {buildScoringFunctions} from './query/boost/scoring-functions.js';
import {buildFilter} from './query/filter.js';
/**
* A database interface for elasticsearch
@@ -355,6 +360,39 @@ export class Elasticsearch implements Database {
throw new Error('You tried to PUT an non-existing object. PUT is only supported on existing objects.');
}
public async searchAsYouType(parameters: SCSearchQuery): Promise<SCSearchResponse> {
const result = await this.client.search({
_source: 'name',
query: {
function_score: {
functions: buildScoringFunctions(this.config.internal.boostings, parameters.context),
query: {
bool: {
must: {
multi_match: {
query: parameters.query,
type: 'bool_prefix',
fields: ['name.completion', 'name.completion._2gram', 'name.completion._3gram'],
},
},
should: [],
filter: parameters.filter === undefined ? undefined : buildFilter(parameters.filter),
},
},
score_mode: 'max',
boost_mode: 'multiply',
},
},
index: ACTIVE_INDICES_ALIAS,
allow_no_indices: true,
size: 5,
});
const suggestions = result.hits.hits.map(it => (it._source as any).name);
console.log(suggestions);
console.log(result.took);
}
/**
* Search all indexed data
* @param parameters search query
@@ -364,18 +402,23 @@ export class Elasticsearch implements Database {
throw new TypeError('Database is undefined. You have to configure the query build');
}
const esConfig: ElasticsearchConfig = {
name: this.config.internal.database.name as 'elasticsearch',
version: this.config.internal.database.version as string,
query: this.config.internal.database.query as
| ElasticsearchQueryDisMaxConfig
| ElasticsearchQueryQueryStringConfig
| undefined,
};
const esConfig = this.config.internal.database as object as ElasticsearchConfig;
const response: SearchResponse<SCThings> = await this.client.search({
aggs: aggregations,
query: buildQuery(parameters, this.config, esConfig),
suggest:
parameters.query === undefined
? undefined
: {
text: parameters.query,
terms: {
term: {
field: 'name',
suggest_mode: 'missing',
},
},
},
from: parameters.from,
index: ACTIVE_INDICES_ALIAS,
allow_no_indices: true,
@@ -395,6 +438,7 @@ export class Elasticsearch implements Database {
response.aggregations === undefined
? []
: parseAggregations(response.aggregations as Record<AggregateName, AggregationsMultiTermsBucket>),
suggestions: response.suggest === undefined ? undefined : parseSuggestions(response.suggest),
pagination: {
count: response.hits.hits.length,
offset: typeof parameters.from === 'number' ? parameters.from : 0,

View File

@@ -30,84 +30,21 @@ export const buildQuery = function buildQuery(
defaultConfig: SCConfigFile,
elasticsearchConfig: ElasticsearchConfig,
): QueryDslQueryContainer {
// if config provides a minMatch parameter, we use query_string instead of a match query
let query;
if (elasticsearchConfig.query === undefined) {
query = {
query_string: {
analyzer: 'search_german',
default_field: 'name',
minimum_should_match: '90%',
query: typeof parameters.query === 'string' ? parameters.query : '*',
},
};
} else if (elasticsearchConfig.query.queryType === 'query_string') {
query = {
query_string: {
analyzer: 'search_german',
default_field: 'name',
minimum_should_match: elasticsearchConfig.query.minMatch,
query: typeof parameters.query === 'string' ? parameters.query : '*',
},
};
} else if (elasticsearchConfig.query.queryType === 'dis_max') {
if (typeof parameters.query === 'string' && parameters.query !== '*') {
query = {
dis_max: {
boost: 1.2,
queries: [
{
match: {
name: {
boost: elasticsearchConfig.query.matchBoosting,
fuzziness: elasticsearchConfig.query.fuzziness,
query: parameters.query,
},
},
},
{
query_string: {
default_field: 'name',
minimum_should_match: elasticsearchConfig.query.minMatch,
query: parameters.query,
},
},
],
tie_breaker: elasticsearchConfig.query.tieBreaker,
},
};
}
} else {
throw new Error(
'Unsupported query type. Check your config file and reconfigure your elasticsearch query',
);
}
const functionScoreQuery: QueryDslQueryContainer = {
return {
function_score: {
functions: buildScoringFunctions(defaultConfig.internal.boostings, parameters.context),
query: {
bool: {
minimum_should_match: 0, // if we have no should, nothing can match
must: [],
must:
parameters.query === undefined || parameters.query === '' || parameters.query === '*'
? {match_all: {}}
: {multi_match: {...elasticsearchConfig.query, query: parameters.query}},
should: [],
filter: parameters.filter === undefined ? undefined : buildFilter(parameters.filter),
},
},
score_mode: 'multiply',
score_mode: 'max',
boost_mode: 'multiply',
},
};
const mustMatch = functionScoreQuery.function_score?.query?.bool?.must;
if (Array.isArray(mustMatch)) {
if (query !== undefined) {
mustMatch.push(query);
}
if (parameters.filter !== undefined) {
mustMatch.push(buildFilter(parameters.filter));
}
}
return functionScoreQuery;
};

View File

@@ -13,68 +13,7 @@
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* A configuration for using the Dis Max Query
*
* See https://www.elastic.co/guide/en/elasticsearch/reference/5.5/query-dsl-dis-max-query.html for further
* explanation of what the parameters mean
*/
export interface ElasticsearchQueryDisMaxConfig {
/**
* Relative (to a total number of documents) or absolute number to exclude meaningless matches that frequently appear
*/
cutoffFrequency: number;
/**
* The maximum allowed Levenshtein Edit Distance (or number of edits)
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/common-options.html#fuzziness
*/
fuzziness: number | string;
/**
* Increase the importance (relevance score) of a field
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/mapping-boost.html
*/
matchBoosting: number;
/**
* Minimal number (or percentage) of words that should match in a query
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-minimum-should-match.html
*/
minMatch: string;
/**
* Type of the query - in this case 'dis_max' which is a union of its subqueries
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-dis-max-query.html
*/
queryType: 'dis_max';
/**
* Changes behavior of default calculation of the score when multiple results match
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-multi-match-query.html#tie-breaker
*/
tieBreaker: number;
}
/**
* A configuration for using Query String Query
*
* See https://www.elastic.co/guide/en/elasticsearch/reference/5.5/query-dsl-query-string-query.html for further
* explanation of what the parameters mean
*/
export interface ElasticsearchQueryQueryStringConfig {
/**
* Minimal number (or percentage) of words that should match in a query
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-minimum-should-match.html
*/
minMatch: string;
/**
* Type of the query - in this case 'query_string' which uses a query parser in order to parse content
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-query-string-query.html
*/
queryType: 'query_string';
}
import {QueryDslMultiMatchQuery} from '@elastic/elasticsearch/lib/api/types.js';
/**
* An config file for the elasticsearch database interface
@@ -105,7 +44,12 @@ export interface ElasticsearchConfig {
/**
* Configuration for using queries
*/
query?: ElasticsearchQueryDisMaxConfig | ElasticsearchQueryQueryStringConfig;
query: Omit<QueryDslMultiMatchQuery, 'query'>;
/**
*
*/
searchAsYouTypeQuery: Omit<QueryDslMultiMatchQuery, 'query'>;
/**
* Version of the used elasticsearch

View File

@@ -0,0 +1,28 @@
import {
SearchSuggest,
SearchTermSuggest,
SearchTermSuggestOption,
SuggestionName,
} from '@elastic/elasticsearch/lib/api/types.js';
import {SCSearchSuggestions} from '@openstapps/core';
/**
* Parse ES Suggestions to SC Search Suggestions
*/
export function parseSuggestions(suggest: Record<SuggestionName, SearchSuggest[]>): SCSearchSuggestions {
const termsSuggestions =
suggest.terms === undefined
? []
: (suggest.terms as SearchTermSuggest[])
?.map(
({text, options}) =>
[
text,
(options as SearchTermSuggestOption[] | undefined)?.map(({text}) => text) ?? [],
] as const,
)
.filter(([, suggestions]) => suggestions.length > 0) ?? [];
return {
terms: termsSuggestions.length === 0 ? undefined : Object.fromEntries(termsSuggestions),
};
}