feat: add support for generated elasticsearch mappings

Fixes #38
This commit is contained in:
Wieland Schöbl
2019-05-29 13:42:41 +02:00
committed by Rainer Killinger
parent 7e04fad28b
commit 8eab6b8531
30 changed files with 303 additions and 867 deletions

View File

@@ -39,7 +39,7 @@ import {
} from './common';
import * as Monitoring from './monitoring';
import {buildQuery, buildSort} from './query';
import {putTemplate} from './templating';
import {checkESTemplate, putTemplate} from './templating';
/**
* Matches index names such as stapps_<type>_<source>_<random suffix>
@@ -107,8 +107,12 @@ export class Elasticsearch implements Database {
* @param bulk bulk process which created this index
*/
static getIndex(type: SCThingType, source: string, bulk: SCBulkResponse) {
return `stapps_${type.toLowerCase()
.replace(' ', '_')}_${source}_${Elasticsearch.getIndexUID(bulk.uid)}`;
let out = type.toLowerCase();
while (out.includes(' ')) {
out = out.replace(' ', '_');
}
return `stapps_${out}_${source}_${Elasticsearch.getIndexUID(bulk.uid)}`;
}
/**
@@ -134,7 +138,10 @@ export class Elasticsearch implements Database {
*/
static removeAliasChars(alias: string, uid: string | undefined): string {
// spaces are included in some types, so throwing an error in this case would clutter up the log unnecessarily
let formattedAlias = alias.replace(' ', '');
let formattedAlias = alias;
while (formattedAlias.includes(' ')) {
formattedAlias = formattedAlias.replace(' ', '');
}
// List of invalid characters: https://www.elastic.co/guide/en/elasticsearch/reference/6.6/indices-create-index.html
['\\', '/', '*', '?', '"', '<', '>', '|', ',', '#'].forEach((value) => {
if (formattedAlias.includes(value)) {
@@ -468,6 +475,9 @@ export class Elasticsearch implements Database {
Monitoring.setUp(monitoringConfiguration, this.client, this.mailQueue);
}
checkESTemplate(typeof process.env.ES_FORCE_MAPPING_UPDATE !== 'undefined' ?
process.env.ES_FORCE_MAPPING_UPDATE === 'true' : false);
return this.getAliasMap();
}

View File

@@ -1,19 +0,0 @@
{
"properties": {
"addressCountry": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"addressLocality": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"addressRegion": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"streetAddress": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"postalCode": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}

View File

@@ -1,25 +0,0 @@
{
"properties": {
"authors": {
"_typeRef": "person.sc-type.template.json"
},
"publishers": {
"_typeRef": [
"person.sc-type.template.json",
"organization.sc-type.template.json"
]
},
"datePublished": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"inLanguages": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"keywords": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"articleBody": {
"_fieldRef": "text.field.template.json"
}
}
}

View File

@@ -1,91 +0,0 @@
{
"template": "stapps_*",
"settings": {
"max_result_window": 30000,
"mapping.total_fields.limit": 2000,
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"german_stemmer": {
"type": "stemmer",
"language": "german"
},
"german_stop": {
"type": "stop",
"stopwords": "_german_"
},
"german_phonebook": {
"type": "icu_collation",
"language": "de",
"country": "DE",
"variant": "@collation=phonebook"
}
},
"tokenizer": {
"stapps_ngram": {
"type": "ngram",
"min_gram": 4,
"max_gram": 7
}
},
"analyzer": {
"search_german": {
"tokenizer": "stapps_ngram",
"filter": [
"lowercase",
"german_stop",
"german_stemmer"
]
},
"ducet_sort": {
"tokenizer": "keyword",
"filter": [
"german_phonebook"
]
}
}
}
},
"mappings": {
"_default_": {
"properties": {
"creation_date": {
"type": "date",
"store": true
},
"uid": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"type": {
"_fieldRef": "sortableKeyword.field.template.json"
},
"name": {
"_fieldRef": "text.field.template.json"
},
"alternateNames": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"categories": {
"_fieldRef": "sortableKeyword.field.template.json"
},
"url": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"description": {
"_fieldRef": "text.field.template.json"
},
"image": {
"_fieldRef": "filterableKeyword.field.template.json"
}
},
"_source": {
"excludes": [
"creation_date"
]
},
"date_detection": false,
"dynamic_templates": []
}
}
}

View File

@@ -1,31 +0,0 @@
{
"properties": {
"authors": {
"_typeRef": "person.sc-type.template.json"
},
"bookEdition": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"isbn": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"numberOfPages": {
"type": "integer"
},
"publishers": {
"_typeRef": "organization.sc-type.template.json"
},
"datePublished": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"inLanguages": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"keywords": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"articleBody": {
"_fieldRef": "text.field.template.json"
}
}
}

View File

@@ -1,22 +0,0 @@
{
"properties": {
"level": {
"type": "integer",
"fields": {
"raw": {
"type": "integer"
}
}
},
"semester": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"superCatalog": {
"_typeRef": "catalog.sc-type.template.json"
},
"superCatalogs": {
"type": "nested",
"_typeRef": "catalog.sc-type.template.json"
}
}
}

View File

@@ -1,47 +0,0 @@
{
"properties": {
"startDate": {
"_fieldRef": "filterableDate.field.template.json"
},
"endDate": {
"_fieldRef": "filterableDate.field.template.json"
},
"startTime": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"endTime": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"dayOfWeek": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"place": {
"_typeRef": "place.sc-type.template.json"
},
"performers": {
"type": "nested",
"_typeRef": "person.sc-type.template.json"
},
"duration": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"frequency": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"frequencyMultiplier": {
"type": "float"
},
"repeatFrequency": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"superEvent": {
"_typeRef": "event.sc-type.template.json"
},
"exceptions": {
"_fieldRef": "filterableDate.field.template.json"
},
"dates": {
"_fieldRef": "filterableDate.field.template.json"
}
}
}

View File

@@ -1,10 +0,0 @@
{
"properties": {
"dateCreated": {
"_fieldRef": "filterableDate.field.template.json"
},
"action": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}

View File

@@ -1,22 +0,0 @@
{
"properties": {
"availabilityStarts": {
"_fieldRef": "filterableDate.field.template.json"
},
"availabilityEnds": {
"_fieldRef": "filterableDate.field.template.json"
},
"offers": {
"_typeRef": "offers.sc-type.template.json"
},
"characteristics": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"additives": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"place": {
"_typeRef": "place.sc-type.template.json"
}
}
}

View File

@@ -1,47 +0,0 @@
{
"properties": {
"subType": {
"_fieldRef": "sortableKeyword.field.template.json"
},
"categories": {
"_fieldRef": "sortableKeyword.field.template.json"
},
"previousStartDate": {
"_fieldRef": "filterableDate.field.template.json"
},
"place": {
"_typeRef": "place.sc-type.template.json"
},
"organizers": {
"type": "nested",
"_typeRef": "person.sc-type.template.json"
},
"performers": {
"type": "nested",
"_typeRef": "person.sc-type.template.json"
},
"attendees": {
"type": "nested",
"_typeRef": "person.sc-type.template.json"
},
"catalogs": {
"type": "nested",
"_typeRef": "catalog.sc-type.template.json"
},
"maximumParticipants": {
"type": "integer"
},
"superEvent": {
"_typeRef": "event.sc-type.template.json"
},
"subProperties": {
"_fieldRef": "eventSubProperties.field.template.json"
},
"startDate": {
"_fieldRef": "filterableDate.field.template.json"
},
"endDate": {
"_fieldRef": "filterableDate.field.template.json"
}
}
}

View File

@@ -1,16 +0,0 @@
{
"properties": {
"id": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"semester": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"majors": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"originalCategory": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}

View File

@@ -1,8 +0,0 @@
{
"type": "date",
"fields": {
"raw": {
"type": "keyword"
}
}
}

View File

@@ -1,8 +0,0 @@
{
"type": "keyword",
"fields": {
"raw": {
"type": "keyword"
}
}
}

View File

@@ -1,10 +0,0 @@
{
"properties": {
"place": {
"_typeRef": "place.sc-type.template.json"
},
"floor": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}

View File

@@ -1,26 +0,0 @@
{
"properties": {
"jobTitle": {
"type": "text"
},
"worksFor": {
"_typeRef": "organization.sc-type.template.json"
},
"workLocation": {
"properties": {
"email": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"faxNumber": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"telephone": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"openingHours": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}
}
}

View File

@@ -1,24 +0,0 @@
{
"properties": {
"price": {
"type": "double"
},
"prices": {
"type": "nested",
"properties": {
"alumni": {
"type": "double"
},
"student": {
"type": "double"
},
"employee": {
"type": "double"
},
"guest": {
"type": "double"
}
}
}
}
}

View File

@@ -1,7 +0,0 @@
{
"properties": {
"address": {
"_fieldRef": "address.field.template.json"
}
}
}

View File

@@ -1,47 +0,0 @@
{
"properties": {
"honorificPrefix": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"honorificSuffix": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"givenName": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"additionalName": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"familyName": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"birthDate": {
"_fieldRef": "filterableDate.field.template.json"
},
"gender": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"email": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"faxNumber": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"telephone": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"adress": {
"type": "text"
},
"affiliations": {
"type": "nested",
"_typeRef": "organization.sc-type.template.json"
},
"homeLocation": {
"_typeRef": "place.sc-type.template.json"
},
"nationality": {
"_fieldRef": "filterableKeyword.field.template.json"
}
}
}

View File

@@ -1,33 +0,0 @@
{
"properties": {
"subType": {
"_fieldRef": "sortableKeyword.field.template.json"
},
"address": {
"_fieldRef": "address.field.template.json"
},
"openingHours": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"geo": {
"properties": {
"point": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"polygon": {
"type": "geo_shape"
}
}
},
"superPlace": {
"_typeRef": "place.sc-type.template.json"
},
"subProperties": {
"_fieldRef": "placeSubProperties.field.template.json"
}
}
}

View File

@@ -1,38 +0,0 @@
{
"properties": {
"floors": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"floor": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"paymentAccepted": {
"_fieldRef": "filterableKeyword.field.template.json"
},
"roomCharacterization": {
"type": "nested",
"properties": {
"inventory": {
"properties": {
"key": {
"type": "keyword",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"value": {
"type": "integer",
"fields": {
"raw": {
"type": "integer"
}
}
}
}
}
}
}
}
}

View File

@@ -1,14 +0,0 @@
{
"type": "nested",
"properties": {
"student": {
"type": "float"
},
"employee": {
"type": "float"
},
"guest": {
"type": "float"
}
}
}

View File

@@ -1,15 +0,0 @@
{
"type": "text",
"analyzer": "search_german",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 10000
},
"sort": {
"fielddata": true,
"type": "text",
"analyzer": "ducet_sort"
}
}
}

View File

@@ -1,10 +0,0 @@
{
"type": "text",
"fielddata": true,
"analyzer": "search_german",
"fields": {
"raw": {
"type": "keyword"
}
}
}

View File

@@ -13,133 +13,54 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {getProjectReflection} from '@openstapps/core-tools/lib/common';
import {generateTemplate} from '@openstapps/core-tools/lib/mapping';
import {Logger} from '@openstapps/logger';
import {Client} from 'elasticsearch';
import {readdir, readFile} from 'fs-extra';
import {existsSync, writeFileSync} from 'fs';
import {readFile} from 'fs-extra';
import {resolve} from 'path';
import {coreVersion} from '../../common';
const dirPath = resolve('src', 'storage', 'elasticsearch', 'templates');
const templatePath = resolve(dirPath, `template_${coreVersion}.json`);
const errorPath = resolve(dirPath, `failed_template_${coreVersion}.json`);
const errorReportPath = resolve(dirPath, `error_report_${coreVersion}.txt`);
const ignoredTags = ['minlength', 'pattern', 'see'];
/**
* Assembles an elasticsearch template with all resolved subType-references
* @param templateType Type used in the elasticsearch mapping
* @param templates Templates (elasticsearch mappings)
* @param inline Level of hierarchy
* @deprecated
* Check if the correct template exists
*/
function assembleElasticsearchTemplate(
templateType: string,
// tslint:disable-next-line: no-any
templates: {[key: string]: any; },
inline: number): object {
const templateBase = JSON.parse(JSON.stringify(templates[templateType]));
if (typeof inline !== 'undefined') {
delete templateBase.dynamic_templates;
export function checkESTemplate(forceUpdate: boolean) {
if (forceUpdate) {
Logger.warn('CAUTION: Force update of the mapping files is enabled. This causes the backend to ignore' +
' existing mapping files on start.');
}
if (!existsSync(templatePath) || forceUpdate) {
Logger.info(`No mapping for Core version ${coreVersion} found, starting automatic mapping generation. ` +
`This may take a while.`);
const map = generateTemplate(getProjectReflection(resolve('node_modules', '@openstapps', 'core', 'src')),
ignoredTags, false);
// these have no properties to replace
const excludeBaseFields = [
'filterableKeyword.field.template.json',
'sortableKeyword.field.template.json',
'text.field.template.json',
'filterableDate.field.template.json',
];
if (map.errors.length > 0) {
// tslint:disable-next-line:no-magic-numbers
writeFileSync(errorPath, JSON.stringify(map.template, null, 2));
if (excludeBaseFields.indexOf(templateType) === -1) {
writeFileSync(errorReportPath, `ERROR REPORT FOR CORE VERSION ${coreVersion}\n${map.errors.join('\n')}`);
try {
// extend the template by the properties of the basetemplate
templateBase.properties = {...templateBase.properties,
...templates['base.template.json'].mappings._default_.properties,
};
} catch (e) {
// tslint:disable-next-line: no-floating-promises
Logger.error(`Failed to merge properties on: ${templateType}`);
throw e;
// tslint:disable-next-line:no-floating-promises
Logger.error(`There were errors while generating the template, and the backend cannot continue. A list of all ` +
`errors can be found at ${errorReportPath}. To resolve this` +
` issue by hand you can go to "${errorPath}" and correct the issues manually, then move it to ${templatePath}.`);
process.exit(1);
} else {
Logger.ok('Mapping files were generated successfully.');
writeFileSync(templatePath, JSON.stringify(map.template));
}
const fieldKeys = Object.keys(templateBase.properties);
fieldKeys.forEach((fieldKey) => {
const field = templateBase.properties[fieldKey];
const keys = Object.keys(field);
// we have subtype-references to replace
if (keys.indexOf('_typeRef') > -1) {
// if we are already inline of a superObject, we don't resolve types
if (inline > 1) {
delete templateBase.properties[fieldKey];
} else {
// we have more than one reference
if (Array.isArray(field._typeRef)) {
let obj = {};
field._typeRef.forEach((subType: string) => {
obj = {...obj, ...assembleElasticsearchTemplate(subType, templates, inline + 1)};
});
templateBase.properties[fieldKey] = obj;
} else {
templateBase.properties[fieldKey] = assembleElasticsearchTemplate(field._typeRef, templates, inline + 1);
}
}
} else if (keys.indexOf('_fieldRef') > -1) {
templateBase.properties[fieldKey] = assembleElasticsearchTemplate(field._fieldRef, templates, inline + 1);
}
});
} else {
Logger.info(`Using existing mapping at "${templatePath}"`);
}
return templateBase;
}
/**
* Reads all template files and returns the assembled template
*/
// TODO: check if redundant
export async function getElasticsearchTemplate(): Promise<object> {
// readIM all templates
const elasticsearchFolder = resolve('.', 'src', 'storage', 'elasticsearch', 'templates');
// tslint:disable-next-line: no-any
const templates: {[key: string]: any; } = {};
const fileNames = await readdir(elasticsearchFolder);
const availableTypes = fileNames.filter((fileName) => {
return Array.isArray(fileName.match(/\w*\.sc-type\.template\.json/i));
})
.map((fileName) => {
return fileName.substring(0, fileName.indexOf('.sc-type.template.json'));
});
const promises = fileNames.map(async (fileName) => {
const file = await readFile(resolve(elasticsearchFolder, fileName), 'utf8');
try {
templates[fileName] = JSON.parse(file.toString());
} catch (jsonParsingError) {
await Logger.error(`Failed parsing file: ${fileName}`);
throw jsonParsingError;
}
});
await Promise.all(promises);
const template = templates['base.template.json'];
availableTypes.forEach((configType) => {
template.mappings[configType.toLowerCase()] =
assembleElasticsearchTemplate(`${configType}.sc-type.template.json`, templates, 0);
});
// this is like the base type (StappsCoreThing)
const baseProperties = template.mappings._default_.properties;
Object.keys(baseProperties)
.forEach((basePropertyName) => {
let field = baseProperties[basePropertyName];
field = templates[field._fieldRef];
template.mappings._default_.properties[basePropertyName] = field;
});
return template;
}
/**
@@ -148,7 +69,7 @@ export async function getElasticsearchTemplate(): Promise<object> {
*/
export async function putTemplate(client: Client): Promise<void> {
return client.indices.putTemplate({
body: await getElasticsearchTemplate(),
body: JSON.parse((await readFile(templatePath, 'utf8')).toString()),
name: 'global',
});
}