From f34e98625d8759c652535f98aafc467e032543a8 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Thu, 16 Jan 2020 04:31:53 +0800 Subject: [PATCH 1/4] apply patch from original submission --- .gitignore | 1 + docs/es-db-compare.md | 21 ++ package.json | 7 +- scripts/es-db-compare/compareProjects.js | 279 +++++++++++++++++++++++ scripts/es-db-compare/constants.js | 13 ++ scripts/es-db-compare/index.js | 191 ++++++++++++++++ scripts/es-db-compare/report.mustache | 89 ++++++++ scripts/es-db-compare/util.js | 180 +++++++++++++++ 8 files changed, 779 insertions(+), 2 deletions(-) create mode 100644 docs/es-db-compare.md create mode 100644 scripts/es-db-compare/compareProjects.js create mode 100644 scripts/es-db-compare/constants.js create mode 100644 scripts/es-db-compare/index.js create mode 100644 scripts/es-db-compare/report.mustache create mode 100644 scripts/es-db-compare/util.js diff --git a/.gitignore b/.gitignore index edd85b28..e59603ad 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ jspm_packages !.elasticbeanstalk/*.global.yml .DS_Store .idea +report.html diff --git a/docs/es-db-compare.md b/docs/es-db-compare.md new file mode 100644 index 00000000..a27a7d98 --- /dev/null +++ b/docs/es-db-compare.md @@ -0,0 +1,21 @@ +# es-db-compare + +## Configuration +The following properties can be set from env variables: + +- PROJECT_START_ID: if set, only projects with id that large than or equal to the value are compared. +- PROJECT_END_ID: if set, only projects with id that less than or equal to the value are compared. +- PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared. + +There could be some fields that always mismatch in ES and DB. +The variable named `ignoredProperties` at `scripts/es-db-compare/constants.js` maintains a list of fields which will be ignored +during the comparation. You may need to modify/add/delete items in the list. + +### Note +- `PROJECT_START_ID` and `PROJECT_END_ID` must exist together. +- At least one of `PROJECT_START_ID(also PROJECT_END_ID)` and `PROJECT_LAST_ACTIVITY_AT` needs be set before running the script. + +## Usage + +Set up configuration and execute command `npm run es-db-compare` on the command line. +It will then generate a HTML report with name `report.html` under the current directory. diff --git a/package.json b/package.json index a7e72a9c..68962e0b 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,8 @@ "test": "NODE_ENV=test npm run lint && NODE_ENV=test npm run sync:es && NODE_ENV=test npm run sync:db && NODE_ENV=test ./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- --timeout 10000 --require babel-core/register $(find src -path '*spec.js*') --exit", "test:watch": "NODE_ENV=test ./node_modules/.bin/mocha -w --require babel-core/register $(find src -path '*spec.js*')", "seed": "babel-node src/tests/seed.js --presets es2015", - "demo-data": "babel-node local/seed" + "demo-data": "babel-node local/seed", + "es-db-compare": "babel-node scripts/es-db-compare" }, "repository": { "type": "git", @@ -53,8 +54,11 @@ "express-request-id": "^1.1.0", "express-sanitizer": "^1.0.2", "express-validation": "^0.6.0", + "handlebars": "^4.5.3", "http-aws-es": "^4.0.0", "joi": "^8.0.5", + "jsondiffpatch": "^0.4.1", + "jsonpath": "^1.0.2", "jsonwebtoken": "^8.3.0", "lodash": "^4.17.11", "memwatch-next": "^0.3.0", @@ -64,7 +68,6 @@ "pg": "^7.11.0", "pg-native": "^3.0.0", "sequelize": "^5.8.7", - "jsonpath": "^1.0.2", "swagger-ui-express": "^4.0.6", "tc-core-library-js": "appirio-tech/tc-core-library-js.git#v2.6.3", "traverse": "^0.6.6", diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js new file mode 100644 index 00000000..787bccdd --- /dev/null +++ b/scripts/es-db-compare/compareProjects.js @@ -0,0 +1,279 @@ +/* eslint-disable no-console */ +/* eslint-disable consistent-return */ +/* eslint-disable no-restricted-syntax */ +/* + * Compare the data from database and data from ES. + * Specific to project-related data. + * + * Please consider decouple some reusable logics from this module before create + * modules to compare other models. + */ + +const Diff = require('jsondiffpatch'); +const lodash = require('lodash'); +const scriptUtil = require('./util'); +const scriptConstants = require('./constants'); + +const associations = { + phases: 'Phase', + members: 'Member', + invites: 'Invite', + attachment: 'Attachment', +}; + +const differ = Diff.create({ + objectHash: obj => obj.id, + propertyFilter: (name) => { + if (scriptConstants.ignoredProperties.includes(name)) { + return false; + } + return true; + }, +}); + +/** + * Process diff delta to extract project-related data. + * + * @param {Object} delta the diff delta. See `util.flatten()` + * @param {Object} esData the data from ES + * @param {Object} dbData the data from DB + * @param {Object} finalData the data patched + * @returns {Object} Object project diff delta in a specific data structure + */ +function processDelta(delta, esData, dbData, finalData) { + const processMissingObject = (item, option) => { + if (item.type === 'delete') { + const projectId = lodash.get(dbData, lodash.slice(item.path, 0, 1)).id; + console.log(`one dbOnly found for ${option.modelName} with id ${item.originalValue.id}`); + return { + type: 'dbOnly', + projectId, + modelName: option.modelName, + id: item.originalValue.id, + dbCopy: item.originalValue, + }; + } + if (item.type === 'add') { + const projectId = lodash.get(esData, lodash.slice(item.path, 0, 1)).id; + console.log(`one esOnly found for ${option.modelName} with id ${item.value.id}`); + return { + type: 'esOnly', + projectId, + modelName: option.modelName, + id: item.value.id, + esCopy: item.value, + }; + } + }; + + const processProduct = (item) => { + const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 5) }); + if (itemNew.dataType === 'array') { + return processMissingObject(item, { modelName: 'Product' }); + } + if (['add', 'delete', 'modify'].includes(itemNew.type)) { + const path = scriptUtil.generateJSONPath(itemNew.path); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const dbCopy = lodash.find( + lodash.find( + lodash.find(dbData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id }, + ); + const esCopy = lodash.find( + lodash.find( + lodash.find(esData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id }, + ); + console.log(`one mismatch found for Product with id ${id}`); + return { + type: 'mismatch', + projectId, + id, + modelName: 'Product', + path, + dbCopy, + esCopy, + }; + } + }; + + const processAssociation = (item, option) => { + if (item.path[1] === 'phases' && item.path[3] === 'products') { + return processProduct(item); + } + const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 2) }); + if (itemNew.dataType === 'array') { + return processMissingObject(item, option); + } + if (['add', 'delete', 'modify'].includes(itemNew.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(itemNew.path, 1)); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const dbCopy = lodash.find( + lodash.find(dbData, { id: projectId })[option.refPath], + { id }, + ); + const esCopy = lodash.find( + lodash.find(esData, { id: projectId })[option.refPath], + { id }, + ); + console.log(`one mismatch found for ${option.modelName} with id ${id}`); + return { + type: 'mismatch', + projectId, + modelName: option.modelName, + id, + path, + dbCopy, + esCopy, + }; + } + }; + + if (delta.path.length > 2 && associations[delta.path[1]]) { + return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] }); + } + if (delta.dataType === 'array') { + return processMissingObject(delta, { modelName: 'Project' }); + } + if (['add', 'delete', 'modify'].includes(delta.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1)); + const id = lodash.get(finalData, lodash.slice(delta.path, 0, 1)).id; + const dbCopy = lodash.find(dbData, { id }); + const esCopy = lodash.find(esData, { id }); + console.log(`one mismatch found for Project with id ${id}`); + return { + type: 'mismatch', + projectId: id, + modelName: 'Project', + id, + path, + dbCopy, + esCopy, + }; + } +} + +/** + * Compare Project data from ES and DB. + * + * @param {Object} esData the data from ES + * @param {Object} dbData the data from DB + * @returns {Object} the data to feed handlebars template + */ +function compareProjects(esData, dbData) { + const data = { + project: { + rootMismatch: {}, + esOnly: [], + dbOnly: [], + }, + meta: { + esCopies: [], + dbCopies: [], + counts: { + Project: 0, + }, + uniqueDeltas: [], + }, + }; + + const storeDelta = (root, delta) => { + if (delta.modelName === 'Project') { + if (delta.type === 'esOnly') { + data[root].esOnly.push(delta); + return; + } + if (delta.type === 'dbOnly') { + data[root].dbOnly.push(delta); + return; + } + } + if (!data[root].rootMismatch[delta.projectId]) { + data[root].rootMismatch[delta.projectId] = { project: [], associations: {} }; + } + if (delta.modelName === 'Project') { + data[root].rootMismatch[delta.projectId].project.push(delta); + return; + } + const currentAssociations = data[root].rootMismatch[delta.projectId].associations; + if (!Object.keys(currentAssociations).includes(delta.modelName)) { + currentAssociations[delta.modelName] = { + mismatches: {}, + esOnly: [], + dbOnly: [], + }; + } + if (delta.type === 'mismatch') { + const mismatches = currentAssociations[delta.modelName].mismatches; + if (!mismatches[delta.id]) { + mismatches[delta.id] = []; + } + mismatches[delta.id].push(delta); + return; + } + currentAssociations[delta.modelName][delta.type].push(delta); + }; + + const collectDataCopies = (delta) => { + if (delta.dbCopy) { + if (!lodash.find(data.meta.dbCopies, lodash.pick(delta, ['modelName', 'id']))) { + data.meta.dbCopies.push(delta); + } + } + if (delta.esCopy) { + if (!lodash.find(data.meta.esCopies, lodash.pick(delta, ['modelName', 'id']))) { + data.meta.esCopies.push(delta); + } + } + }; + + const countInconsistencies = () => { + lodash.set( + data.project, + 'meta.totalObjects', + data.project.dbOnly.length + data.project.esOnly.length, + ); + lodash.set( + data.project, + 'meta.totalProjects', + Object.keys(data.project.rootMismatch).length + data.project.dbOnly.length + data.project.esOnly.length, + ); + lodash.map(data.project.rootMismatch, (value) => { + const currentValue = value; + lodash.set(currentValue, 'meta.counts', currentValue.project.length ? 1 : 0); + lodash.map(currentValue.associations, (subObject) => { + lodash.set( + subObject, + 'meta.counts', + Object.keys(subObject.mismatches).length + subObject.dbOnly.length + subObject.esOnly.length, + ); + currentValue.meta.counts += subObject.meta.counts; + }); + data.project.meta.totalObjects += currentValue.meta.counts; + }); + }; + + const result = differ.diff(dbData, esData); + const finalData = differ.patch(Diff.clone(dbData), result); + const flattenedResult = scriptUtil.flatten(result); + for (const item of flattenedResult) { + const delta = processDelta(item, esData, dbData, finalData); + if (delta) { + collectDataCopies(delta); + storeDelta('project', delta); + } + } + countInconsistencies(); + return data; +} + +module.exports = { + compareProjects, +}; diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js new file mode 100644 index 00000000..9529931f --- /dev/null +++ b/scripts/es-db-compare/constants.js @@ -0,0 +1,13 @@ +/* + * Constants used in the script + */ + +module.exports = { + ignoredProperties: [ + 'createdAt', + 'updatedAt', + 'deletedAt', + 'deletedBy', + 'projectUrl', + ], +}; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js new file mode 100644 index 00000000..4a3314a4 --- /dev/null +++ b/scripts/es-db-compare/index.js @@ -0,0 +1,191 @@ +/* eslint-disable no-console */ +/* + * Compare data between DB and ES and generate a report to be uploaded + * to AWS S3. + */ + +import Joi from 'joi'; +import lodash from 'lodash'; +import config from 'config'; + +import models from '../../src/models'; +import util from '../../src/util'; +import { INVITE_STATUS } from '../../src/constants'; + +const handlebars = require('handlebars'); +const path = require('path'); +const fs = require('fs'); +const { compareProjects } = require('./compareProjects'); + +const scriptConfig = { + PROJECT_START_ID: process.env.PROJECT_START_ID, + PROJECT_END_ID: process.env.PROJECT_END_ID, + PROJECT_LAST_ACTIVITY_AT: process.env.PROJECT_LAST_ACTIVITY_AT, +}; + +const reportPathname = './report.html'; + +const configSchema = Joi.object().keys({ + PROJECT_START_ID: Joi.number().integer().positive().optional(), + PROJECT_END_ID: Joi.number().integer().positive().optional(), + PROJECT_LAST_ACTIVITY_AT: Joi.date().optional(), +}) + .with('PROJECT_START_ID', 'PROJECT_END_ID') + .or('PROJECT_START_ID', 'PROJECT_LAST_ACTIVITY_AT'); + +try { + Joi.attempt(scriptConfig, configSchema); +} catch (err) { + console.error(err.message); + process.exit(); +} + +const es = util.getElasticSearchClient(); + +const ES_PROJECT_INDEX = config.get('elasticsearchConfig.indexName'); +const ES_PROJECT_TYPE = config.get('elasticsearchConfig.docType'); + +/** + * Get es search criteria. + * + * @returns {Object} the search criteria + */ +function getESSearchCriteria() { + const filters = []; + if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { + filters.push({ + filtered: { + filter: { + range: { + id: { + gte: scriptConfig.PROJECT_START_ID, + lte: scriptConfig.PROJECT_END_ID, + }, + }, + }, + }, + }); + } + if (!lodash.isNil(scriptConfig.PROJECT_LAST_ACTIVITY_AT)) { + filters.push({ + filtered: { + filter: { + range: { + lastActivityAt: { + gte: scriptConfig.PROJECT_LAST_ACTIVITY_AT, + }, + }, + }, + }, + }); + } + const searchCriteria = { + index: ES_PROJECT_INDEX, + type: ES_PROJECT_TYPE, + body: { + query: { + bool: { + must: filters, + }, + }, + }, + }; + return searchCriteria; +} + +/** + * Get handlebars template. + * + * @returns {Object} the template + */ +function getTemplate() { + handlebars.registerHelper('getValue', (data, key) => data[key]); + handlebars.registerHelper('toJSON', obj => JSON.stringify(obj, null, 2)); + const template = handlebars.compile(fs.readFileSync(path.join(__dirname, 'report.mustache')).toString()); + return template; +} + +/** + * Get ES data. + * + * @returns {Promise} the ES data + */ +async function getESData() { + const searchCriteria = getESSearchCriteria(); + return es.search(searchCriteria) + .then((docs) => { + const rows = lodash.map(docs.hits.hits, single => single._source); // eslint-disable-line no-underscore-dangle + return rows; + }); +} + +/** + * Get DB data. + * + * @returns {Promise} the DB data + */ +async function getDBData() { + const filter = {}; + if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { + filter.id = { $between: [scriptConfig.PROJECT_START_ID, scriptConfig.PROJECT_END_ID] }; + } + if (!lodash.isNil(scriptConfig.PROJECT_LAST_ACTIVITY_AT)) { + filter.lastActivityAt = { $gte: scriptConfig.PROJECT_LAST_ACTIVITY_AT }; + } + return models.Project.findAll({ + where: filter, + raw: false, + include: [{ + model: models.ProjectPhase, + as: 'phases', + include: [{ + model: models.PhaseProduct, + as: 'products', + }], + }, { + model: models.ProjectMemberInvite, + as: 'invites', + where: { status: { $in: [INVITE_STATUS.PENDING, INVITE_STATUS.REQUESTED] } }, + required: false, + }, { + model: models.ProjectAttachment, + as: 'attachments', + }], + }).then((_projects) => { + const projects = _projects.map((_project) => { + if (!_project) { + return Promise.resolve(null); + } + const project = _project.toJSON(); + return models.ProjectMember.getActiveProjectMembers(project.id) + .then((currentProjectMembers) => { + project.members = currentProjectMembers; + return project; + }); + }); + return Promise.all(projects); + }); +} + +/** + * Main function. + * + * @returns {Promise} void + */ +async function main() { + const esData = await getESData(); + const dbData = await getDBData(); + const template = getTemplate(); + const data = compareProjects(esData, dbData); + const report = template(data); + fs.writeFileSync(reportPathname, report); + console.log(`report is written to ${reportPathname}`); +} + +main().then(() => { + console.log('done!'); + process.exit(); +}).catch((err) => { + console.log(err.message); + process.exit(); +}); diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache new file mode 100644 index 00000000..f4f186e8 --- /dev/null +++ b/scripts/es-db-compare/report.mustache @@ -0,0 +1,89 @@ + + Topcoder Project Service - ES/DB Comparison Report + + + + +

Summary

+There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{ project.meta.totalProjects }} projects. +

List

+

Project ({{ project.meta.totalObjects }})

+{{#each project.rootMismatch }} +

Project - id: {{ @key }} ({{ this.meta.counts }})

+ {{#if this.project.length}} + + +

{{ this.project.length }} mismatches:

+ {{/if}} + + {{#each this.project }} +
  • {{ this.path }}
  • + {{/each}} +
    + + {{#each this.associations }} +

    {{ @key }} ({{ this.meta.counts }})

    + {{#each this.mismatches }} +
    {{ @../key }} - id: {{ @key }}
    + + +

    {{ this.length }} mismatches:

    + + {{#each this }} +
  • {{ this.path }}
  • + {{/each}} +
    + {{/each}} + {{#each this.dbOnly }} +
    {{ @../key }} - id: {{ this.id }} (1)
    + +

    Found in DB but not in ES.

    + {{/each}} + + {{#each this.esOnly }} +
    {{ @../key }} - id: {{ this.id }} (1)
    + +

    Found in ES but not in DB.

    + {{/each}} + {{/each}} +{{/each}} + +{{#each project.dbOnly }} +

    Project - id: {{ this.id }} (1)

    + +

    Found in DB but not in ES.

    +{{/each}} + +{{#each project.esOnly }} +

    Project - id: {{ this.id }} (1)

    + +

    Found in ES but not in DB.

    +{{/each}} + +

    Data

    +{{#each meta.dbCopies }} + +{{/each}} +{{#each meta.esCopies }} + +{{/each}} + diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js new file mode 100644 index 00000000..704e88b8 --- /dev/null +++ b/scripts/es-db-compare/util.js @@ -0,0 +1,180 @@ +/* eslint-disable no-underscore-dangle */ +/* eslint-disable no-use-before-define */ +/* eslint-disable no-restricted-syntax */ +/* + * Util functions used in the script. + */ + +const _ = require('lodash'); +const moment = require('moment'); + +/** + * Sub-function for the flatten function that process object assets in the delta. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flattenObject(delta, path) { + let result = []; + _.map(delta, (value, key) => { + const currentPath = _.concat(path, key); + if (value instanceof Array) { + if (value.length === 2) { + result.push({ + path: currentPath, + type: 'modify', + dataType: 'object', + originalValue: value[0], + currentValue: value[1], + }); + return; + } + if (value.length === 1) { + result.push({ + path: currentPath, + type: 'add', + dataType: 'object', + value: value[0], + }); + return; + } + if (value.length === 3) { + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'object', + value: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'delete', + dataType: 'object', + value: value[0], + }); + return; + } + } + result = _.concat(result, flatten(value, _.clone(currentPath))); + }); + return result; +} + +/** + * Sub-function for the flatten function that process array assets in the delta. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flattenArray(delta, path) { + let result = []; + _.map(_.omit(delta, ['_t']), (value, key) => { + if (value instanceof Array) { + if (key.startsWith('_')) { + const index = key.substring(1); + const currentPath = [...path, index]; + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'array', + index, + originalValue: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'delete', + dataType: 'array', + index, + originalValue: value[0], + }); + return; + } + const currentPath = _.concat(path, key); + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'array', + index: key, + value: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'add', + dataType: 'array', + index: key, + value: value[0], + }); + return; + } + const currentPath = _.concat(path, key); + if (key >= 0) { + result = _.concat(result, flattenObject(value, _.clone(currentPath))); + return; + } + throw new Error(`Unhandled case at ${currentPath}`); + }); + return result; +} + +/** + * Flatten delta from json diff patch so that it can be easily manipulated. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flatten(delta, path = []) { + if (delta._t === 'a') { + return flattenArray(delta, path); + } + return flattenObject(delta, path); +} + +/** + * Generate a JSON path from array format. + * Example: `generateJSONPath([ 'members', '0', 'key' ])` will output `members[0].key` + * + * @param {Array} path path in array format + * @returns {String} the JSON path + */ +function generateJSONPath(path) { + let result = ''; + for (const item of path) { + if (!isNaN(item)) { + result += `[${item}]`; + continue; // eslint-disable-line no-continue + } + if (result) { + result += '.'; + } + result += item; + } + return result; +} + +/** + * Generate a sensible filename for the report. + * + * @returns {String} the result filename + */ +function generateFilename() { + const nodeEnv = process.env.NODE_ENV || 'default'; + const date = moment().format('DD-MM-YYYY-HH-MM-SS'); + return `es-db-report-${nodeEnv}-${date}.html`; +} + +module.exports = { + flatten, + generateJSONPath, + generateFilename, +}; From 5073ad608486f1edd5a0d809b93b03fd851db54f Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Thu, 16 Jan 2020 04:32:37 +0800 Subject: [PATCH 2/4] fix 4 issues --- docs/es-db-compare.md | 2 +- scripts/es-db-compare/compareProjects.js | 36 ++++++------ scripts/es-db-compare/constants.js | 34 ++++++++--- scripts/es-db-compare/index.js | 2 + scripts/es-db-compare/report.mustache | 72 +++++++++++++++--------- scripts/es-db-compare/util.js | 23 ++++++++ 6 files changed, 116 insertions(+), 53 deletions(-) diff --git a/docs/es-db-compare.md b/docs/es-db-compare.md index a27a7d98..ff66909a 100644 --- a/docs/es-db-compare.md +++ b/docs/es-db-compare.md @@ -8,7 +8,7 @@ The following properties can be set from env variables: - PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared. There could be some fields that always mismatch in ES and DB. -The variable named `ignoredProperties` at `scripts/es-db-compare/constants.js` maintains a list of fields which will be ignored +The variable named `ignoredPaths` at `scripts/es-db-compare/constants.js` maintains a list of json paths which will be ignored during the comparation. You may need to modify/add/delete items in the list. ### Note diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js index 787bccdd..5669014b 100644 --- a/scripts/es-db-compare/compareProjects.js +++ b/scripts/es-db-compare/compareProjects.js @@ -12,23 +12,16 @@ const Diff = require('jsondiffpatch'); const lodash = require('lodash'); const scriptUtil = require('./util'); -const scriptConstants = require('./constants'); const associations = { phases: 'Phase', members: 'Member', invites: 'Invite', - attachment: 'Attachment', + attachments: 'Attachment', }; const differ = Diff.create({ objectHash: obj => obj.id, - propertyFilter: (name) => { - if (scriptConstants.ignoredProperties.includes(name)) { - return false; - } - return true; - }, }); /** @@ -67,12 +60,12 @@ function processDelta(delta, esData, dbData, finalData) { }; const processProduct = (item) => { - const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 5) }); - if (itemNew.dataType === 'array') { + const subPath = lodash.slice(item.path, 4); + if (item.dataType === 'array' && subPath.length === 1) { return processMissingObject(item, { modelName: 'Product' }); } - if (['add', 'delete', 'modify'].includes(itemNew.type)) { - const path = scriptUtil.generateJSONPath(itemNew.path); + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1)); const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; @@ -93,6 +86,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for Product with id ${id}`); return { type: 'mismatch', + kind: item.type, + dataType: item.dataType, projectId, id, modelName: 'Product', @@ -107,12 +102,12 @@ function processDelta(delta, esData, dbData, finalData) { if (item.path[1] === 'phases' && item.path[3] === 'products') { return processProduct(item); } - const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 2) }); - if (itemNew.dataType === 'array') { + const subPath = lodash.slice(item.path, 2); + if (item.dataType === 'array' && subPath.length === 1) { return processMissingObject(item, option); } - if (['add', 'delete', 'modify'].includes(itemNew.type)) { - const path = scriptUtil.generateJSONPath(lodash.slice(itemNew.path, 1)); + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1)); const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; const dbCopy = lodash.find( @@ -126,6 +121,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for ${option.modelName} with id ${id}`); return { type: 'mismatch', + kind: item.type, + dataType: item.dataType, projectId, modelName: option.modelName, id, @@ -139,7 +136,7 @@ function processDelta(delta, esData, dbData, finalData) { if (delta.path.length > 2 && associations[delta.path[1]]) { return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] }); } - if (delta.dataType === 'array') { + if (delta.dataType === 'array' && delta.path.length === 1) { return processMissingObject(delta, { modelName: 'Project' }); } if (['add', 'delete', 'modify'].includes(delta.type)) { @@ -150,6 +147,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for Project with id ${id}`); return { type: 'mismatch', + kind: delta.type, + dataType: delta.dataType, projectId: id, modelName: 'Project', id, @@ -264,6 +263,9 @@ function compareProjects(esData, dbData) { const finalData = differ.patch(Diff.clone(dbData), result); const flattenedResult = scriptUtil.flatten(result); for (const item of flattenedResult) { + if (scriptUtil.isIgnoredPath('project', item.path)) { + continue; // eslint-disable-line no-continue + } const delta = processDelta(item, esData, dbData, finalData); if (delta) { collectDataCopies(delta); diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js index 9529931f..130f1c1b 100644 --- a/scripts/es-db-compare/constants.js +++ b/scripts/es-db-compare/constants.js @@ -3,11 +3,31 @@ */ module.exports = { - ignoredProperties: [ - 'createdAt', - 'updatedAt', - 'deletedAt', - 'deletedBy', - 'projectUrl', - ], + // currently support only a subset of jsonpath notations + // "*" means any index number + ignoredPaths: [ + 'project.projectUrl', + 'project.utm', + + 'project.deletedAt', + 'project.phases[*].deletedAt', + 'project.phases[*].products[*].deletedAt', + 'project.invites[*].deletedAt', + 'project.members[*].deletedAt', + 'project.attachments[*].deletedAt', + + 'project.updatedAt', + 'project.phases[*].updatedAt', + 'project.phases[*].products[*].updatedAt', + 'project.invites[*].updatedAt', + 'project.members[*].updatedAt', + 'project.attachments[*].updatedAt', + + 'project.deletedBy', + 'project.phases[*].deletedBy', + 'project.phases[*].products[*].deletedBy', + 'project.invites[*].deletedBy', + 'project.members[*].deletedBy', + 'project.attachments[*].deletedBy', + ] }; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index 4a3314a4..adc1a2a6 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -164,6 +164,8 @@ async function getDBData() { }); }); return Promise.all(projects); + }).then(projects => { + return JSON.parse(JSON.stringify(projects)); }); } diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache index f4f186e8..3d539bbc 100644 --- a/scripts/es-db-compare/report.mustache +++ b/scripts/es-db-compare/report.mustache @@ -1,21 +1,37 @@ Topcoder Project Service - ES/DB Comparison Report + @@ -26,13 +42,13 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{#each project.rootMismatch }}

    Project - id: {{ @key }} ({{ this.meta.counts }})

    {{#if this.project.length}} - - + +

    {{ this.project.length }} mismatches:

    {{/if}} {{#each this.project }} -
  • {{ this.path }}
  • +
  • {{ this.path }} (kind: {{ this.kind }})
  • {{/each}}
    @@ -40,24 +56,24 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in

    {{ @key }} ({{ this.meta.counts }})

    {{#each this.mismatches }}
    {{ @../key }} - id: {{ @key }}
    - - + +

    {{ this.length }} mismatches:

    {{#each this }} -
  • {{ this.path }}
  • +
  • {{ this.path }} (kind: {{ this.kind }})
  • {{/each}}
    {{/each}} {{#each this.dbOnly }}
    {{ @../key }} - id: {{ this.id }} (1)
    - +

    Found in DB but not in ES.

    {{/each}} {{#each this.esOnly }}
    {{ @../key }} - id: {{ this.id }} (1)
    - +

    Found in ES but not in DB.

    {{/each}} {{/each}} @@ -65,25 +81,25 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{#each project.dbOnly }}

    Project - id: {{ this.id }} (1)

    - +

    Found in DB but not in ES.

    {{/each}} {{#each project.esOnly }}

    Project - id: {{ this.id }} (1)

    - +

    Found in ES but not in DB.

    {{/each}} - -

    Data

    -{{#each meta.dbCopies }} - -{{/each}} -{{#each meta.esCopies }} - -{{/each}} +
    + {{#each meta.dbCopies }} + + {{/each}} + {{#each meta.esCopies }} + + {{/each}} +
    diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js index 704e88b8..4b378588 100644 --- a/scripts/es-db-compare/util.js +++ b/scripts/es-db-compare/util.js @@ -8,6 +8,8 @@ const _ = require('lodash'); const moment = require('moment'); +const constants = require('./constants'); + /** * Sub-function for the flatten function that process object assets in the delta. * @@ -162,6 +164,26 @@ function generateJSONPath(path) { return result; } +/** + * Check if the json path of a delta should be ignored. + * Low-budget version. + * + * @param {String} root the model name, one of "project" and "metadata" + * @param {Array} path the path to be verified + * @returns {Boolean} the result + */ +function isIgnoredPath(root, path) { + const jsonPath = generateJSONPath(_.slice(path, 1)); + if (jsonPath === '') { + return false; + } + const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`) + if (constants.ignoredPaths.includes(expr)) { + return true; + } + return false; +} + /** * Generate a sensible filename for the report. * @@ -177,4 +199,5 @@ module.exports = { flatten, generateJSONPath, generateFilename, + isIgnoredPath, }; From 93723a5f5deaf56ad8fb8b56708ba0dcd171e0c0 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Sun, 19 Jan 2020 14:12:41 +0800 Subject: [PATCH 3/4] fix issues on array comparison --- scripts/es-db-compare/compareProjects.js | 39 +++++++++++++++++++++++- scripts/es-db-compare/constants.js | 2 +- scripts/es-db-compare/index.js | 4 +-- scripts/es-db-compare/util.js | 2 +- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js index 5669014b..35377747 100644 --- a/scripts/es-db-compare/compareProjects.js +++ b/scripts/es-db-compare/compareProjects.js @@ -24,6 +24,43 @@ const differ = Diff.create({ objectHash: obj => obj.id, }); +/** + * The json diff patch may contains deltas with same path, + * one is "added to array", the other is "deleted from array". + * In such case they can be combined and treated as "modified at an index in the array". + * + * @param {Array} deltas the data to be filtered + * @returns {Array} filtered data + */ +function processSamePath(deltas) { + const result = []; + const groups = lodash.groupBy(deltas, 'path'); + for (const value of Object.values(groups)) { + if (value.length === 1) { + result.push(value[0]); + continue; // eslint-disable-line no-continue + } + if (value.length === 2) { + result.push(Object.assign({ type: 'modify' }, lodash.omit(value[0], 'type'))); + continue; // eslint-disable-line no-continue + } + throw new Error('Internal Error'); + } + return result; +} + +/** + * Transform or filter deltas before any further proccess. + * + * @param {Array} deltas the data to be processed + * @returns {Array} the result + */ +function preProcessDeltas(deltas) { + return processSamePath( + scriptUtil.flatten(deltas), + ); +} + /** * Process diff delta to extract project-related data. * @@ -261,7 +298,7 @@ function compareProjects(esData, dbData) { const result = differ.diff(dbData, esData); const finalData = differ.patch(Diff.clone(dbData), result); - const flattenedResult = scriptUtil.flatten(result); + const flattenedResult = preProcessDeltas(result); for (const item of flattenedResult) { if (scriptUtil.isIgnoredPath('project', item.path)) { continue; // eslint-disable-line no-continue diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js index 130f1c1b..36b51fbb 100644 --- a/scripts/es-db-compare/constants.js +++ b/scripts/es-db-compare/constants.js @@ -29,5 +29,5 @@ module.exports = { 'project.invites[*].deletedBy', 'project.members[*].deletedBy', 'project.attachments[*].deletedBy', - ] + ], }; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index adc1a2a6..51aab132 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -164,9 +164,7 @@ async function getDBData() { }); }); return Promise.all(projects); - }).then(projects => { - return JSON.parse(JSON.stringify(projects)); - }); + }).then(projects => JSON.parse(JSON.stringify(projects))); } /** diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js index 4b378588..6d02040b 100644 --- a/scripts/es-db-compare/util.js +++ b/scripts/es-db-compare/util.js @@ -177,7 +177,7 @@ function isIgnoredPath(root, path) { if (jsonPath === '') { return false; } - const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`) + const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`); if (constants.ignoredPaths.includes(expr)) { return true; } From dc8339f3875455940b6d8464292623fa9f4e5847 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Sun, 19 Jan 2020 14:16:01 +0800 Subject: [PATCH 4/4] add descriptions to each kind of mismatches --- scripts/es-db-compare/index.js | 12 ++++++++++++ scripts/es-db-compare/report.mustache | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index 51aab132..f74e152d 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -101,6 +101,18 @@ function getESSearchCriteria() { function getTemplate() { handlebars.registerHelper('getValue', (data, key) => data[key]); handlebars.registerHelper('toJSON', obj => JSON.stringify(obj, null, 2)); + handlebars.registerHelper('describeKind', (kind) => { + if (kind === 'modify') { + return 'values differ'; + } + if (kind === 'add') { + return 'missed in DB'; + } + if (kind === 'delete') { + return 'missed in ES'; + } + return 'unknown'; + }); const template = handlebars.compile(fs.readFileSync(path.join(__dirname, 'report.mustache')).toString()); return template; } diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache index 3d539bbc..a69fa3cf 100644 --- a/scripts/es-db-compare/report.mustache +++ b/scripts/es-db-compare/report.mustache @@ -48,7 +48,7 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{/if}} {{#each this.project }} -
  • {{ this.path }} (kind: {{ this.kind }})
  • +
  • {{ this.path }} ({{describeKind this.kind }})
  • {{/each}}
    @@ -61,7 +61,7 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in

    {{ this.length }} mismatches:

    {{#each this }} -
  • {{ this.path }} (kind: {{ this.kind }})
  • +
  • {{ this.path }} ({{ this.kind }})
  • {{/each}}
    {{/each}}