diff --git a/README.md b/README.md index b4e1eaf7..d259efde 100644 --- a/README.md +++ b/README.md @@ -23,27 +23,28 @@ you with everything you need to run this backend. ## Generating Elasticsearch Mapping The mappings will be generated automatically on the first start. If there are any errors, the backend will inform you and stop the execution, however it will do its best to complete the mappings. You can then either resolve these errors in the `core-tools` or the `core`, depending on where it originated. -If you need a quick solution, you can also take the generated output file and manually correct the errors, then rename it to `template_[coreVersion].json` -and restart the backend. This time it will take your file. The filenames and the path will be displayed in the log of the backend. +If you need a quick solution, you can also take the generated output file and manually correct the errors, then rename it to `[coreVersion]_template_[type].json` (replace any spaces with a `_`) +and restart the backend (make sure that you don't have `ES_FORCE_MAPPING_UPDATE` set to `true`). This time it will take your file. *The filenames and the path will also be displayed in the log of the backend.* ### Manually Resolving Errors There are multiple types of errors the backend can run into. Manual error resolving requires you to be familiar with Elasticsearch mappings. An error will be represented in the output through an Elasticsearch type written in CAPS. Refer to either the console output -or the `error_report_[coreVersion].txt` for more info. If you feel lucky you can try to replace every error (`"type": "MISSING_PREMAP"`, +or the `[coreVersion]_error_report.txt` for more info. If you feel lucky you can try to replace every error (`"type": "MISSING_PREMAP"`, `"type": "PARSE_ERROR"`, `"type": "TYPE_CONFLICT"`) with ```json "dynamic": true, "properties": {} ``` -This should ONLY be used as a temporary workaround. +This should ONLY be used as a temporary workaround and might compromise other features. ### Startup Behaviour *This might be important if you work on the Core* +The backend is using the `core-tools` to automatically generate Elasticsearch Mappings and Aggregations from the current `core` version. -By default, the backend creates a local copy of the generated mappings in `src/storage/elasticsearch/templates/template_[coreVersion].json`. -On each start, it first checks if this file exists, if it does, it will just use that file and *not* generate a new mapping to cut down the time +By default, the backend creates a local copy of the generated mappings and aggregations in `src/storage/elasticsearch/templates/[coreVersion]_template_[type].json` and `src/storage/elasticsearch/templates/[coreVersion]_aggregations.json`. +On each start, it first checks if the aggregation file exists, this is because it does not know which of the types actually exist for the current core version. If the file does exist, it will just use the existing files and *not* generate a new mapping to cut down the time it takes to start the backend. When you are working on the Core, you might not want to have this behaviour, you can then either delete the generated file at each start or run the backend with the environment variable `ES_FORCE_MAPPING_UPDATE=true`. This will cause it to generate the mapping each time starts regardless of whether there are already files there. diff --git a/package-lock.json b/package-lock.json index 29b98207..4829b1b5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -219,9 +219,9 @@ } }, "@openstapps/core": { - "version": "0.29.0", - "resolved": "https://registry.npmjs.org/@openstapps/core/-/core-0.29.0.tgz", - "integrity": "sha512-xXfBIYWQlnYiRSURxNBs1nmd38EcgAZGXoHRBaT4Cv/E5bt2ciMOo+MqPVLtKaAf1YPHB419FbxxDjxTkccL2g==", + "version": "0.31.0", + "resolved": "https://registry.npmjs.org/@openstapps/core/-/core-0.31.0.tgz", + "integrity": "sha512-E0/VS4YvXHZEc1VF1PFnPJR/5HyTurLjBRunN1Zn4ji9AtT3LY81BybuZ1dxwm07ZN8aJhJL/dGegN3h3gM15Q==", "requires": { "@types/geojson": "1.0.6", "@types/json-patch": "0.0.30", @@ -241,9 +241,9 @@ } }, "@openstapps/core-tools": { - "version": "0.9.0", - "resolved": "https://registry.npmjs.org/@openstapps/core-tools/-/core-tools-0.9.0.tgz", - "integrity": "sha512-ltkQVc3ykGsqnPUop+lwp1ctlAlvJWt9L7FZ+3q+6Eepvjiqu/nZJM5N11qDIptOfjB0yXY0ovdTqJFQ+fc0uQ==", + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@openstapps/core-tools/-/core-tools-0.11.0.tgz", + "integrity": "sha512-e3eGbOyBBDGc6/yRkSAAXjuIGpOpmUyXl5QIaAfYE2Od4W0JXd/Gm89loP4fjHy1ullIE6ESwQ0asKVxV3ihMQ==", "requires": { "@krlwlfrt/async-pool": "0.1.0", "@openstapps/logger": "0.3.1", @@ -4716,9 +4716,9 @@ }, "dependencies": { "glob": { - "version": "7.1.4", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.4.tgz", - "integrity": "sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==", + "version": "7.1.6", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", + "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", "requires": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", diff --git a/package.json b/package.json index 5679fb8e..8060fca3 100644 --- a/package.json +++ b/package.json @@ -29,8 +29,8 @@ "tslint": "tslint -p tsconfig.json -c tslint.json 'src/**/*.ts'" }, "dependencies": { - "@openstapps/core": "0.29.0", - "@openstapps/core-tools": "0.9.0", + "@openstapps/core": "0.31.0", + "@openstapps/core-tools": "0.11.0", "@openstapps/logger": "0.4.0", "@types/node": "10.14.12", "commander": "2.20.0", @@ -72,16 +72,16 @@ "@types/sinon-express-mock": "1.3.7", "@types/supertest": "2.0.7", "@types/uuid": "3.4.5", - "chai": "4.2.0", "chai-as-promised": "7.1.1", + "chai": "4.2.0", "conventional-changelog-cli": "2.0.21", - "mocha": "6.1.4", "mocha-typescript": "1.1.17", + "mocha": "6.1.4", "nyc": "14.1.1", "prepend-file-cli": "1.0.6", "rimraf": "2.6.3", - "sinon": "7.3.2", "sinon-express-mock": "2.2.0", + "sinon": "7.3.2", "supertest": "4.0.2", "tslint": "5.18.0", "typedoc": "0.14.2", diff --git a/src/storage/elasticsearch/aggregations.ts b/src/storage/elasticsearch/aggregations.ts index 8ec5b6d1..d0c4d845 100644 --- a/src/storage/elasticsearch/aggregations.ts +++ b/src/storage/elasticsearch/aggregations.ts @@ -13,56 +13,25 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -import {SCBackendAggregationConfiguration, SCFacet} from '@openstapps/core'; +import {SCFacet, SCThingType} from '@openstapps/core'; +import {AggregationSchema} from '@openstapps/core-tools/lib/mappings/aggregation-definitions'; +import {readFileSync} from 'fs'; import { AggregationResponse, - AggregationSchema, - ESNestedAggregation, isBucketAggregation, + isESAggMatchAllFilter, isESNestedAggregation, - isESTermsFilter, isNestedAggregation, + isESTermsFilter, + isNestedAggregation, } from './common'; +import {aggregationsPath} from './templating'; /** * Builds the aggregation * @returns a schema to tell elasticsearch which aggregations to collect */ -export function buildAggregations(aggsConfig: SCBackendAggregationConfiguration[]): AggregationSchema { - - const result: AggregationSchema = {}; - - for (const aggregation of aggsConfig) { - if (typeof aggregation.onlyOnTypes !== 'undefined') { - for (const type of aggregation.onlyOnTypes) { - if (typeof result[type] === 'undefined') { - result[type] = { - aggs: {}, - filter: { - type: { - value: type, - }, - }, - }; - } - - (result[type] as ESNestedAggregation).aggs[aggregation.fieldName] = { - terms: { - field: `${aggregation.fieldName}.keyword`, - size: 1000, - }, - }; - } - } else { - result[aggregation.fieldName] = { - terms: { - field: `${aggregation.fieldName}.keyword`, - size: 1000, - }, - }; - } - } - - return result; +export function buildAggregations(): AggregationSchema { + return JSON.parse((readFileSync(aggregationsPath, 'utf8')).toString()); } /** @@ -103,7 +72,8 @@ export function parseAggregations( }; }), field: fieldName, - onlyOnType: type.filter.type.value, + onlyOnType: isESAggMatchAllFilter(type.filter) + ? undefined : type.filter.type.value as SCThingType, }); } } diff --git a/src/storage/elasticsearch/common.ts b/src/storage/elasticsearch/common.ts index 2c015a9f..1d57d55b 100644 --- a/src/storage/elasticsearch/common.ts +++ b/src/storage/elasticsearch/common.ts @@ -15,6 +15,12 @@ */ import {SCThingType} from '@openstapps/core'; import {SCThing} from '@openstapps/core'; +import { + ESAggMatchAllFilter, + ESAggTypeFilter, + ESNestedAggregation, + ESTermsFilter, +} from '@openstapps/core-tools/lib/mappings/aggregation-definitions'; import {NameList} from 'elasticsearch'; /** @@ -88,14 +94,6 @@ export function isNestedAggregation(agg: BucketAggregation | NestedAggregation): return typeof (agg as BucketAggregation).buckets === 'undefined'; } -/** - * An elasticsearch bucket aggregation - * @see https://www.elastic.co/guide/en/elasticsearch/reference/5.6/search-aggregations-bucket.html - */ -export interface AggregationSchema { - [aggregationName: string]: ESTermsFilter | ESNestedAggregation; -} - /** * A configuration for using the Dis Max Query * @@ -275,26 +273,6 @@ export interface ESTermFilter { }; } -/** - * An elasticsearch terms filter - */ -export interface ESTermsFilter { - /** - * Terms filter definition - */ - terms: { - /** - * Field to apply filter to - */ - field: string; - - /** - * Number of results - */ - size?: number; - }; -} - /** * Checks if the parameter is of type ESTermsFilter * @param agg the value to check @@ -303,30 +281,6 @@ export function isESTermsFilter(agg: ESTermsFilter | ESNestedAggregation): agg i return typeof (agg as ESTermsFilter).terms !== 'undefined'; } -/** - * For nested aggregations - */ -export interface ESNestedAggregation { - /** - * Possible nested Aggregations - */ - aggs: AggregationSchema; - /** - * Possible filter for types - */ - filter: { - /** - * The type of the object to find - */ - type: { - /** - * The name of the type - */ - value: SCThingType; - }; - }; -} - /** * Checks if the parameter is of type ESTermsFilter * @param agg the value to check @@ -335,6 +289,15 @@ export function isESNestedAggregation(agg: ESTermsFilter | ESNestedAggregation): return typeof (agg as ESNestedAggregation).aggs !== 'undefined'; } +/** + * Checks if the parameter is of type + * + * @param filter the filter to narrow the type of + */ +export function isESAggMatchAllFilter(filter: ESAggTypeFilter | ESAggMatchAllFilter): filter is ESAggMatchAllFilter { + return filter.hasOwnProperty('match_all'); +} + /** * An elasticsearch type filter */ @@ -358,6 +321,7 @@ export interface ESGeoDistanceFilterArguments { * The radius of the circle centred on the specified location */ distance: string; + [fieldName: string]: { /** * Latitute @@ -412,9 +376,9 @@ export interface ESBooleanFilterArguments { * An elasticsearch boolean filter */ export interface ESBooleanFilter { - /** - * @see ESBooleanFilterArguments - */ + /** + * @see ESBooleanFilterArguments + */ bool: ESBooleanFilterArguments; } @@ -485,6 +449,7 @@ export interface ESGeoDistanceSortArguments { * Value unit */ unit: 'm'; + [field: string]: { /** * Latitute @@ -512,9 +477,9 @@ export interface ESGeoDistanceSort { * An elasticsearch script sort */ export interface ScriptSort { - /** - * A script - */ + /** + * A script + */ _script: { /** * Order diff --git a/src/storage/elasticsearch/elasticsearch.ts b/src/storage/elasticsearch/elasticsearch.ts index cd151fe8..e2f22002 100644 --- a/src/storage/elasticsearch/elasticsearch.ts +++ b/src/storage/elasticsearch/elasticsearch.ts @@ -23,6 +23,7 @@ import { SCThingType, SCUuid, } from '@openstapps/core'; +import {AggregationSchema} from '@openstapps/core-tools/lib/mappings/aggregation-definitions'; import {Logger} from '@openstapps/logger'; import * as ES from 'elasticsearch'; import * as moment from 'moment'; @@ -31,7 +32,6 @@ import {Bulk} from '../bulk-storage'; import {Database} from '../database'; import {buildAggregations, parseAggregations} from './aggregations'; import { - AggregationSchema, ElasticsearchConfig, ElasticsearchObject, ElasticsearchQueryDisMaxConfig, @@ -196,9 +196,17 @@ export class Elasticsearch implements Database { this.aliasMap = {}; this.ready = false; - this.aggregationsSchema = buildAggregations(this.config.internal.aggregations); + checkESTemplate(typeof process.env.ES_FORCE_MAPPING_UPDATE !== 'undefined' ? + process.env.ES_FORCE_MAPPING_UPDATE === 'true' : false); + + this.aggregationsSchema = buildAggregations(); this.mailQueue = mailQueue; + + /*refreshAllTemplates(this.client) + .then(() => { + // noop + });*/ } /** @@ -333,7 +341,7 @@ export class Elasticsearch implements Database { } // re-apply the index template before each new bulk operation - await putTemplate(this.client); + await putTemplate(this.client, bulk.type); await this.client.indices.create({ index, }); @@ -390,7 +398,7 @@ export class Elasticsearch implements Database { // create the new index if it does not exists if (!(await this.client.indices.exists({index}))) { // re-apply the index template before each new bulk operation - await putTemplate(this.client); + await putTemplate(this.client, bulk.type); await this.client.indices.create({ index, }); @@ -475,9 +483,6 @@ export class Elasticsearch implements Database { Monitoring.setUp(monitoringConfiguration, this.client, this.mailQueue); } - checkESTemplate(typeof process.env.ES_FORCE_MAPPING_UPDATE !== 'undefined' ? - process.env.ES_FORCE_MAPPING_UPDATE === 'true' : false); - return this.getAliasMap(); } diff --git a/src/storage/elasticsearch/query.ts b/src/storage/elasticsearch/query.ts index 293106a4..b008acbe 100644 --- a/src/storage/elasticsearch/query.ts +++ b/src/storage/elasticsearch/query.ts @@ -246,6 +246,7 @@ function buildFunctionsForBoostingTypes( return functions; } + /** * Builds body for Elasticsearch requests * @param params Parameters for querying the backend diff --git a/src/storage/elasticsearch/templating.ts b/src/storage/elasticsearch/templating.ts index dac8cc7f..d6963262 100644 --- a/src/storage/elasticsearch/templating.ts +++ b/src/storage/elasticsearch/templating.ts @@ -13,6 +13,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +import {SCThingType} from '@openstapps/core'; import {getProjectReflection} from '@openstapps/core-tools/lib/common'; import {generateTemplate} from '@openstapps/core-tools/lib/mapping'; import {Logger} from '@openstapps/logger'; @@ -24,51 +25,95 @@ import sanitize = require('sanitize-filename'); import {configFile, coreVersion} from '../../common'; const dirPath = resolve('src', 'storage', 'elasticsearch', 'templates'); -const templatePath = resolve(dirPath, sanitize(`template_${coreVersion}.json`, {replacement: '-'})); -const errorPath = resolve(dirPath, sanitize(`failed_template_${coreVersion}.json`, {replacement: '-'})); -const errorReportPath = resolve(dirPath, sanitize(`error_report_${coreVersion}.txt`, {replacement: '-'})); +export const aggregationsPath = resolve(dirPath, sanitize(`${coreVersion}-aggregations.json`, {replacement: '-'})); +const templateErrorPath = resolve(dirPath, sanitize(`${coreVersion}-template-[type].error.json`, {replacement: '-'})); +const aggregationsErrorPath = resolve(dirPath, sanitize(`${coreVersion}-aggregations.error.json`, {replacement: '-'})); +const errorReportPath = resolve(dirPath, sanitize(`${coreVersion}-error-report.txt`, {replacement: '-'})); /** * Check if the correct template exists */ export function checkESTemplate(forceUpdate: boolean) { + // as the forced mapping update is only meant for development, print a warning if it is enabled if (forceUpdate) { Logger.warn('CAUTION: Force update of the mapping files is enabled. This causes the backend to ignore' + ' existing mapping files on start.'); } - if (!existsSync(templatePath) || forceUpdate) { + // we don't exactly know which files are there, so we just check if the aggregations exist + // for the current core version + if (forceUpdate || !existsSync(aggregationsPath)) { Logger.info(`No mapping for Core version ${coreVersion} found, starting automatic mapping generation. ` + `This may take a while.`); const map = generateTemplate(getProjectReflection(resolve('node_modules', '@openstapps', 'core', 'src')), configFile.backend.mappingIgnoredTags, false); if (map.errors.length > 0) { + for (const type of Object.keys(map.mappings)) { + writeFileSync(getTemplatePath(Object.keys(map.mappings[type].mappings)[0] as SCThingType, true), + // tslint:disable-next-line:no-magic-numbers + JSON.stringify(map.mappings[type], null, 2)); + } // tslint:disable-next-line:no-magic-numbers - writeFileSync(errorPath, JSON.stringify(map.template, null, 2)); + writeFileSync(aggregationsErrorPath, JSON.stringify(map.aggregations, null, 2)); writeFileSync(errorReportPath, `ERROR REPORT FOR CORE VERSION ${coreVersion}\n${map.errors.join('\n')}`); - // tslint:disable-next-line:no-floating-promises - Logger.error(`There were errors while generating the template, and the backend cannot continue. A list of all ` + - `errors can be found at ${errorReportPath}. To resolve this` + - ` issue by hand you can go to "${errorPath}" and correct the issues manually, then move it to ${templatePath}.`); + void Logger.error(`There were errors while generating the template, and the backend cannot continue. A list of ` + + `all errors can be found at ${errorReportPath}. To resolve this` + + ` issue by hand you can go to "${templateErrorPath}" and "${aggregationsErrorPath}", then correct the issues` + + ` manually and move the files to the template paths and "${aggregationsPath}" respectively.`); process.exit(1); } else { Logger.ok('Mapping files were generated successfully.'); - writeFileSync(templatePath, JSON.stringify(map.template)); + for (const type of Object.keys(map.mappings)) { + writeFileSync(getTemplatePath(Object.keys(map.mappings[type].mappings)[0] as SCThingType, false), + // tslint:disable-next-line:no-magic-numbers + JSON.stringify(map.mappings[type], null, 2)); + } + writeFileSync(aggregationsPath, JSON.stringify(map.aggregations)); } } else { - Logger.info(`Using existing mapping at "${templatePath}"`); + Logger.info(`Using existing mappings for core version ${coreVersion}`); } } /** - * Puts a new global template + * Generates the path to the template of an SCThingType + * + * @param type the type for the path + * @param error whether an error occurred in the file + */ +function getTemplatePath(type: SCThingType, error = false): string { + return resolve(dirPath, sanitize(`${coreVersion}-template-${type}${error ? '.error' : ''}.json`, {replacement: '-'})); +} + +/** + * Re-applies all interfaces for every type + * * @param client An elasticsearch client to use */ -export async function putTemplate(client: Client): Promise { +export async function refreshAllTemplates(client: Client) { + for (const type of Object.values(SCThingType)) { + await putTemplate(client, type as SCThingType); + } +} + +/** + * Prepares all indices + * + * This includes applying the mapping, settings + * + * @param type the SCThingType of which the template should be set + * @param client An elasticsearch client to use + */ +export async function putTemplate(client: Client, type: SCThingType) { + let out = type.toLowerCase(); + while (out.includes(' ')) { + out = out.replace(' ', '_'); + } + return client.indices.putTemplate({ - body: JSON.parse((await readFile(templatePath, 'utf8')).toString()), - name: 'global', + body: JSON.parse((await readFile(getTemplatePath(type), 'utf8')).toString()), + name: `template_${out}`, }); }