mirror of
https://gitlab.com/openstapps/openstapps.git
synced 2026-01-19 16:13:06 +00:00
feat: improve search experience
This commit is contained in:
@@ -8,8 +8,7 @@ const config = {
|
||||
database: {
|
||||
name: 'elasticsearch',
|
||||
query: {
|
||||
minMatch: '60%',
|
||||
queryType: 'query_string',
|
||||
fields: ["name"]
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -39,7 +39,7 @@ const boostings = {
|
||||
type: SCThingType.AcademicEvent,
|
||||
},
|
||||
{
|
||||
factor: 1.6,
|
||||
factor: 2,
|
||||
type: SCThingType.Building,
|
||||
},
|
||||
{
|
||||
@@ -85,7 +85,7 @@ const boostings = {
|
||||
],
|
||||
place: [
|
||||
{
|
||||
factor: 2,
|
||||
factor: 3,
|
||||
type: SCThingType.Building,
|
||||
},
|
||||
{
|
||||
|
||||
@@ -17,12 +17,21 @@ const config = {
|
||||
name: 'elasticsearch',
|
||||
version: '8.4.2',
|
||||
query: {
|
||||
minMatch: '75%',
|
||||
queryType: 'dis_max',
|
||||
matchBoosting: 1.3,
|
||||
fuzziness: 'AUTO',
|
||||
cutoffFrequency: 0,
|
||||
tieBreaker: 0,
|
||||
type: 'best_fields',
|
||||
fields: [
|
||||
'identifiers^20',
|
||||
'name^10',
|
||||
'translations.*.name^10',
|
||||
'alternateNames^10',
|
||||
'translations.*.alternateNames^10',
|
||||
'description^2',
|
||||
'translations.*.description^2',
|
||||
'categories^5',
|
||||
],
|
||||
},
|
||||
searchAsYouTypeQuery: {
|
||||
type: 'phrase_prefix',
|
||||
fields: ['name.completion', 'name.completion._2gram', 'name.completion._3gram'],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -20,6 +20,8 @@ import {
|
||||
IndicesGetAliasResponse,
|
||||
SearchHit,
|
||||
SearchResponse,
|
||||
SearchTermSuggest,
|
||||
SearchTermSuggestOption,
|
||||
} from '@elastic/elasticsearch/lib/api/types.js';
|
||||
import {SCConfigFile, SCSearchQuery, SCSearchResponse, SCThings, SCUuid} from '@openstapps/core';
|
||||
import {Logger} from '@openstapps/logger';
|
||||
@@ -47,6 +49,9 @@ import {
|
||||
import {noUndefined} from './util/no-undefined.js';
|
||||
import {retryCatch, RetryOptions} from './util/retry.js';
|
||||
import {Feature, Point, Polygon} from 'geojson';
|
||||
import {parseSuggestions} from './util/parse-suggestions.js';
|
||||
import {buildScoringFunctions} from './query/boost/scoring-functions.js';
|
||||
import {buildFilter} from './query/filter.js';
|
||||
|
||||
/**
|
||||
* A database interface for elasticsearch
|
||||
@@ -355,6 +360,39 @@ export class Elasticsearch implements Database {
|
||||
throw new Error('You tried to PUT an non-existing object. PUT is only supported on existing objects.');
|
||||
}
|
||||
|
||||
public async searchAsYouType(parameters: SCSearchQuery): Promise<SCSearchResponse> {
|
||||
const result = await this.client.search({
|
||||
_source: 'name',
|
||||
query: {
|
||||
function_score: {
|
||||
functions: buildScoringFunctions(this.config.internal.boostings, parameters.context),
|
||||
query: {
|
||||
bool: {
|
||||
must: {
|
||||
multi_match: {
|
||||
query: parameters.query,
|
||||
type: 'bool_prefix',
|
||||
fields: ['name.completion', 'name.completion._2gram', 'name.completion._3gram'],
|
||||
},
|
||||
},
|
||||
should: [],
|
||||
filter: parameters.filter === undefined ? undefined : buildFilter(parameters.filter),
|
||||
},
|
||||
},
|
||||
score_mode: 'max',
|
||||
boost_mode: 'multiply',
|
||||
},
|
||||
},
|
||||
index: ACTIVE_INDICES_ALIAS,
|
||||
allow_no_indices: true,
|
||||
size: 5,
|
||||
});
|
||||
|
||||
const suggestions = result.hits.hits.map(it => (it._source as any).name);
|
||||
console.log(suggestions);
|
||||
console.log(result.took);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search all indexed data
|
||||
* @param parameters search query
|
||||
@@ -364,18 +402,23 @@ export class Elasticsearch implements Database {
|
||||
throw new TypeError('Database is undefined. You have to configure the query build');
|
||||
}
|
||||
|
||||
const esConfig: ElasticsearchConfig = {
|
||||
name: this.config.internal.database.name as 'elasticsearch',
|
||||
version: this.config.internal.database.version as string,
|
||||
query: this.config.internal.database.query as
|
||||
| ElasticsearchQueryDisMaxConfig
|
||||
| ElasticsearchQueryQueryStringConfig
|
||||
| undefined,
|
||||
};
|
||||
const esConfig = this.config.internal.database as object as ElasticsearchConfig;
|
||||
|
||||
const response: SearchResponse<SCThings> = await this.client.search({
|
||||
aggs: aggregations,
|
||||
query: buildQuery(parameters, this.config, esConfig),
|
||||
suggest:
|
||||
parameters.query === undefined
|
||||
? undefined
|
||||
: {
|
||||
text: parameters.query,
|
||||
terms: {
|
||||
term: {
|
||||
field: 'name',
|
||||
suggest_mode: 'missing',
|
||||
},
|
||||
},
|
||||
},
|
||||
from: parameters.from,
|
||||
index: ACTIVE_INDICES_ALIAS,
|
||||
allow_no_indices: true,
|
||||
@@ -395,6 +438,7 @@ export class Elasticsearch implements Database {
|
||||
response.aggregations === undefined
|
||||
? []
|
||||
: parseAggregations(response.aggregations as Record<AggregateName, AggregationsMultiTermsBucket>),
|
||||
suggestions: response.suggest === undefined ? undefined : parseSuggestions(response.suggest),
|
||||
pagination: {
|
||||
count: response.hits.hits.length,
|
||||
offset: typeof parameters.from === 'number' ? parameters.from : 0,
|
||||
|
||||
@@ -30,84 +30,21 @@ export const buildQuery = function buildQuery(
|
||||
defaultConfig: SCConfigFile,
|
||||
elasticsearchConfig: ElasticsearchConfig,
|
||||
): QueryDslQueryContainer {
|
||||
// if config provides a minMatch parameter, we use query_string instead of a match query
|
||||
let query;
|
||||
if (elasticsearchConfig.query === undefined) {
|
||||
query = {
|
||||
query_string: {
|
||||
analyzer: 'search_german',
|
||||
default_field: 'name',
|
||||
minimum_should_match: '90%',
|
||||
query: typeof parameters.query === 'string' ? parameters.query : '*',
|
||||
},
|
||||
};
|
||||
} else if (elasticsearchConfig.query.queryType === 'query_string') {
|
||||
query = {
|
||||
query_string: {
|
||||
analyzer: 'search_german',
|
||||
default_field: 'name',
|
||||
minimum_should_match: elasticsearchConfig.query.minMatch,
|
||||
query: typeof parameters.query === 'string' ? parameters.query : '*',
|
||||
},
|
||||
};
|
||||
} else if (elasticsearchConfig.query.queryType === 'dis_max') {
|
||||
if (typeof parameters.query === 'string' && parameters.query !== '*') {
|
||||
query = {
|
||||
dis_max: {
|
||||
boost: 1.2,
|
||||
queries: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
boost: elasticsearchConfig.query.matchBoosting,
|
||||
fuzziness: elasticsearchConfig.query.fuzziness,
|
||||
query: parameters.query,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
query_string: {
|
||||
default_field: 'name',
|
||||
minimum_should_match: elasticsearchConfig.query.minMatch,
|
||||
query: parameters.query,
|
||||
},
|
||||
},
|
||||
],
|
||||
tie_breaker: elasticsearchConfig.query.tieBreaker,
|
||||
},
|
||||
};
|
||||
}
|
||||
} else {
|
||||
throw new Error(
|
||||
'Unsupported query type. Check your config file and reconfigure your elasticsearch query',
|
||||
);
|
||||
}
|
||||
|
||||
const functionScoreQuery: QueryDslQueryContainer = {
|
||||
return {
|
||||
function_score: {
|
||||
functions: buildScoringFunctions(defaultConfig.internal.boostings, parameters.context),
|
||||
query: {
|
||||
bool: {
|
||||
minimum_should_match: 0, // if we have no should, nothing can match
|
||||
must: [],
|
||||
must:
|
||||
parameters.query === undefined || parameters.query === '' || parameters.query === '*'
|
||||
? {match_all: {}}
|
||||
: {multi_match: {...elasticsearchConfig.query, query: parameters.query}},
|
||||
should: [],
|
||||
filter: parameters.filter === undefined ? undefined : buildFilter(parameters.filter),
|
||||
},
|
||||
},
|
||||
score_mode: 'multiply',
|
||||
score_mode: 'max',
|
||||
boost_mode: 'multiply',
|
||||
},
|
||||
};
|
||||
|
||||
const mustMatch = functionScoreQuery.function_score?.query?.bool?.must;
|
||||
|
||||
if (Array.isArray(mustMatch)) {
|
||||
if (query !== undefined) {
|
||||
mustMatch.push(query);
|
||||
}
|
||||
|
||||
if (parameters.filter !== undefined) {
|
||||
mustMatch.push(buildFilter(parameters.filter));
|
||||
}
|
||||
}
|
||||
|
||||
return functionScoreQuery;
|
||||
};
|
||||
|
||||
@@ -13,68 +13,7 @@
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A configuration for using the Dis Max Query
|
||||
*
|
||||
* See https://www.elastic.co/guide/en/elasticsearch/reference/5.5/query-dsl-dis-max-query.html for further
|
||||
* explanation of what the parameters mean
|
||||
*/
|
||||
export interface ElasticsearchQueryDisMaxConfig {
|
||||
/**
|
||||
* Relative (to a total number of documents) or absolute number to exclude meaningless matches that frequently appear
|
||||
*/
|
||||
cutoffFrequency: number;
|
||||
|
||||
/**
|
||||
* The maximum allowed Levenshtein Edit Distance (or number of edits)
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/common-options.html#fuzziness
|
||||
*/
|
||||
fuzziness: number | string;
|
||||
|
||||
/**
|
||||
* Increase the importance (relevance score) of a field
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/mapping-boost.html
|
||||
*/
|
||||
matchBoosting: number;
|
||||
|
||||
/**
|
||||
* Minimal number (or percentage) of words that should match in a query
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-minimum-should-match.html
|
||||
*/
|
||||
minMatch: string;
|
||||
|
||||
/**
|
||||
* Type of the query - in this case 'dis_max' which is a union of its subqueries
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-dis-max-query.html
|
||||
*/
|
||||
queryType: 'dis_max';
|
||||
|
||||
/**
|
||||
* Changes behavior of default calculation of the score when multiple results match
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-multi-match-query.html#tie-breaker
|
||||
*/
|
||||
tieBreaker: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A configuration for using Query String Query
|
||||
*
|
||||
* See https://www.elastic.co/guide/en/elasticsearch/reference/5.5/query-dsl-query-string-query.html for further
|
||||
* explanation of what the parameters mean
|
||||
*/
|
||||
export interface ElasticsearchQueryQueryStringConfig {
|
||||
/**
|
||||
* Minimal number (or percentage) of words that should match in a query
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-minimum-should-match.html
|
||||
*/
|
||||
minMatch: string;
|
||||
|
||||
/**
|
||||
* Type of the query - in this case 'query_string' which uses a query parser in order to parse content
|
||||
* @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-query-string-query.html
|
||||
*/
|
||||
queryType: 'query_string';
|
||||
}
|
||||
import {QueryDslMultiMatchQuery} from '@elastic/elasticsearch/lib/api/types.js';
|
||||
|
||||
/**
|
||||
* An config file for the elasticsearch database interface
|
||||
@@ -105,7 +44,12 @@ export interface ElasticsearchConfig {
|
||||
/**
|
||||
* Configuration for using queries
|
||||
*/
|
||||
query?: ElasticsearchQueryDisMaxConfig | ElasticsearchQueryQueryStringConfig;
|
||||
query: Omit<QueryDslMultiMatchQuery, 'query'>;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
searchAsYouTypeQuery: Omit<QueryDslMultiMatchQuery, 'query'>;
|
||||
|
||||
/**
|
||||
* Version of the used elasticsearch
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
import {
|
||||
SearchSuggest,
|
||||
SearchTermSuggest,
|
||||
SearchTermSuggestOption,
|
||||
SuggestionName,
|
||||
} from '@elastic/elasticsearch/lib/api/types.js';
|
||||
import {SCSearchSuggestions} from '@openstapps/core';
|
||||
|
||||
/**
|
||||
* Parse ES Suggestions to SC Search Suggestions
|
||||
*/
|
||||
export function parseSuggestions(suggest: Record<SuggestionName, SearchSuggest[]>): SCSearchSuggestions {
|
||||
const termsSuggestions =
|
||||
suggest.terms === undefined
|
||||
? []
|
||||
: (suggest.terms as SearchTermSuggest[])
|
||||
?.map(
|
||||
({text, options}) =>
|
||||
[
|
||||
text,
|
||||
(options as SearchTermSuggestOption[] | undefined)?.map(({text}) => text) ?? [],
|
||||
] as const,
|
||||
)
|
||||
.filter(([, suggestions]) => suggestions.length > 0) ?? [];
|
||||
return {
|
||||
terms: termsSuggestions.length === 0 ? undefined : Object.fromEntries(termsSuggestions),
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user