Skip to content

ES/DB compare script (for projects without timelines) #441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ jspm_packages
!.elasticbeanstalk/*.global.yml
.DS_Store
.idea
report.html
21 changes: 21 additions & 0 deletions docs/es-db-compare.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# es-db-compare

## Configuration
The following properties can be set from env variables:

- PROJECT_START_ID: if set, only projects with id that large than or equal to the value are compared.
- PROJECT_END_ID: if set, only projects with id that less than or equal to the value are compared.
- PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared.

There could be some fields that always mismatch in ES and DB.
The variable named `ignoredPaths` at `scripts/es-db-compare/constants.js` maintains a list of json paths which will be ignored
during the comparation. You may need to modify/add/delete items in the list.

### Note
- `PROJECT_START_ID` and `PROJECT_END_ID` must exist together.
- At least one of `PROJECT_START_ID(also PROJECT_END_ID)` and `PROJECT_LAST_ACTIVITY_AT` needs be set before running the script.

## Usage

Set up configuration and execute command `npm run es-db-compare` on the command line.
It will then generate a HTML report with name `report.html` under the current directory.
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"test": "NODE_ENV=test npm run lint && NODE_ENV=test npm run sync:es && NODE_ENV=test npm run sync:db && NODE_ENV=test ./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- --timeout 10000 --require babel-core/register $(find src -path '*spec.js*') --exit",
"test:watch": "NODE_ENV=test ./node_modules/.bin/mocha -w --require babel-core/register $(find src -path '*spec.js*')",
"seed": "babel-node src/tests/seed.js --presets es2015",
"demo-data": "babel-node local/seed"
"demo-data": "babel-node local/seed",
"es-db-compare": "babel-node scripts/es-db-compare"
},
"repository": {
"type": "git",
Expand Down Expand Up @@ -53,8 +54,11 @@
"express-request-id": "^1.1.0",
"express-sanitizer": "^1.0.2",
"express-validation": "^0.6.0",
"handlebars": "^4.5.3",
"http-aws-es": "^4.0.0",
"joi": "^8.0.5",
"jsondiffpatch": "^0.4.1",
"jsonpath": "^1.0.2",
"jsonwebtoken": "^8.3.0",
"lodash": "^4.17.11",
"memwatch-next": "^0.3.0",
Expand All @@ -64,7 +68,6 @@
"pg": "^7.11.0",
"pg-native": "^3.0.0",
"sequelize": "^5.8.7",
"jsonpath": "^1.0.2",
"swagger-ui-express": "^4.0.6",
"tc-core-library-js": "appirio-tech/tc-core-library-js.git#v2.6.3",
"traverse": "^0.6.6",
Expand Down
318 changes: 318 additions & 0 deletions scripts/es-db-compare/compareProjects.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
/* eslint-disable no-console */
/* eslint-disable consistent-return */
/* eslint-disable no-restricted-syntax */
/*
* Compare the data from database and data from ES.
* Specific to project-related data.
*
* Please consider decouple some reusable logics from this module before create
* modules to compare other models.
*/

const Diff = require('jsondiffpatch');
const lodash = require('lodash');
const scriptUtil = require('./util');

const associations = {
phases: 'Phase',
members: 'Member',
invites: 'Invite',
attachments: 'Attachment',
};

const differ = Diff.create({
objectHash: obj => obj.id,
});

/**
* The json diff patch may contains deltas with same path,
* one is "added to array", the other is "deleted from array".
* In such case they can be combined and treated as "modified at an index in the array".
*
* @param {Array} deltas the data to be filtered
* @returns {Array} filtered data
*/
function processSamePath(deltas) {
const result = [];
const groups = lodash.groupBy(deltas, 'path');
for (const value of Object.values(groups)) {
if (value.length === 1) {
result.push(value[0]);
continue; // eslint-disable-line no-continue
}
if (value.length === 2) {
result.push(Object.assign({ type: 'modify' }, lodash.omit(value[0], 'type')));
continue; // eslint-disable-line no-continue
}
throw new Error('Internal Error');
}
return result;
}

/**
* Transform or filter deltas before any further proccess.
*
* @param {Array} deltas the data to be processed
* @returns {Array} the result
*/
function preProcessDeltas(deltas) {
return processSamePath(
scriptUtil.flatten(deltas),
);
}

/**
* Process diff delta to extract project-related data.
*
* @param {Object} delta the diff delta. See `util.flatten()`
* @param {Object} esData the data from ES
* @param {Object} dbData the data from DB
* @param {Object} finalData the data patched
* @returns {Object} Object project diff delta in a specific data structure
*/
function processDelta(delta, esData, dbData, finalData) {
const processMissingObject = (item, option) => {
if (item.type === 'delete') {
const projectId = lodash.get(dbData, lodash.slice(item.path, 0, 1)).id;
console.log(`one dbOnly found for ${option.modelName} with id ${item.originalValue.id}`);
return {
type: 'dbOnly',
projectId,
modelName: option.modelName,
id: item.originalValue.id,
dbCopy: item.originalValue,
};
}
if (item.type === 'add') {
const projectId = lodash.get(esData, lodash.slice(item.path, 0, 1)).id;
console.log(`one esOnly found for ${option.modelName} with id ${item.value.id}`);
return {
type: 'esOnly',
projectId,
modelName: option.modelName,
id: item.value.id,
esCopy: item.value,
};
}
};

const processProduct = (item) => {
const subPath = lodash.slice(item.path, 4);
if (item.dataType === 'array' && subPath.length === 1) {
return processMissingObject(item, { modelName: 'Product' });
}
if (['add', 'delete', 'modify'].includes(item.type)) {
const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1));
const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id;
const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id;
const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id;
const dbCopy = lodash.find(
lodash.find(
lodash.find(dbData, { id: projectId }).phases,
{ id: phaseId },
).products,
{ id },
);
const esCopy = lodash.find(
lodash.find(
lodash.find(esData, { id: projectId }).phases,
{ id: phaseId },
).products,
{ id },
);
console.log(`one mismatch found for Product with id ${id}`);
return {
type: 'mismatch',
kind: item.type,
dataType: item.dataType,
projectId,
id,
modelName: 'Product',
path,
dbCopy,
esCopy,
};
}
};

const processAssociation = (item, option) => {
if (item.path[1] === 'phases' && item.path[3] === 'products') {
return processProduct(item);
}
const subPath = lodash.slice(item.path, 2);
if (item.dataType === 'array' && subPath.length === 1) {
return processMissingObject(item, option);
}
if (['add', 'delete', 'modify'].includes(item.type)) {
const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1));
const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id;
const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id;
const dbCopy = lodash.find(
lodash.find(dbData, { id: projectId })[option.refPath],
{ id },
);
const esCopy = lodash.find(
lodash.find(esData, { id: projectId })[option.refPath],
{ id },
);
console.log(`one mismatch found for ${option.modelName} with id ${id}`);
return {
type: 'mismatch',
kind: item.type,
dataType: item.dataType,
projectId,
modelName: option.modelName,
id,
path,
dbCopy,
esCopy,
};
}
};

if (delta.path.length > 2 && associations[delta.path[1]]) {
return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] });
}
if (delta.dataType === 'array' && delta.path.length === 1) {
return processMissingObject(delta, { modelName: 'Project' });
}
if (['add', 'delete', 'modify'].includes(delta.type)) {
const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1));
const id = lodash.get(finalData, lodash.slice(delta.path, 0, 1)).id;
const dbCopy = lodash.find(dbData, { id });
const esCopy = lodash.find(esData, { id });
console.log(`one mismatch found for Project with id ${id}`);
return {
type: 'mismatch',
kind: delta.type,
dataType: delta.dataType,
projectId: id,
modelName: 'Project',
id,
path,
dbCopy,
esCopy,
};
}
}

/**
* Compare Project data from ES and DB.
*
* @param {Object} esData the data from ES
* @param {Object} dbData the data from DB
* @returns {Object} the data to feed handlebars template
*/
function compareProjects(esData, dbData) {
const data = {
project: {
rootMismatch: {},
esOnly: [],
dbOnly: [],
},
meta: {
esCopies: [],
dbCopies: [],
counts: {
Project: 0,
},
uniqueDeltas: [],
},
};

const storeDelta = (root, delta) => {
if (delta.modelName === 'Project') {
if (delta.type === 'esOnly') {
data[root].esOnly.push(delta);
return;
}
if (delta.type === 'dbOnly') {
data[root].dbOnly.push(delta);
return;
}
}
if (!data[root].rootMismatch[delta.projectId]) {
data[root].rootMismatch[delta.projectId] = { project: [], associations: {} };
}
if (delta.modelName === 'Project') {
data[root].rootMismatch[delta.projectId].project.push(delta);
return;
}
const currentAssociations = data[root].rootMismatch[delta.projectId].associations;
if (!Object.keys(currentAssociations).includes(delta.modelName)) {
currentAssociations[delta.modelName] = {
mismatches: {},
esOnly: [],
dbOnly: [],
};
}
if (delta.type === 'mismatch') {
const mismatches = currentAssociations[delta.modelName].mismatches;
if (!mismatches[delta.id]) {
mismatches[delta.id] = [];
}
mismatches[delta.id].push(delta);
return;
}
currentAssociations[delta.modelName][delta.type].push(delta);
};

const collectDataCopies = (delta) => {
if (delta.dbCopy) {
if (!lodash.find(data.meta.dbCopies, lodash.pick(delta, ['modelName', 'id']))) {
data.meta.dbCopies.push(delta);
}
}
if (delta.esCopy) {
if (!lodash.find(data.meta.esCopies, lodash.pick(delta, ['modelName', 'id']))) {
data.meta.esCopies.push(delta);
}
}
};

const countInconsistencies = () => {
lodash.set(
data.project,
'meta.totalObjects',
data.project.dbOnly.length + data.project.esOnly.length,
);
lodash.set(
data.project,
'meta.totalProjects',
Object.keys(data.project.rootMismatch).length + data.project.dbOnly.length + data.project.esOnly.length,
);
lodash.map(data.project.rootMismatch, (value) => {
const currentValue = value;
lodash.set(currentValue, 'meta.counts', currentValue.project.length ? 1 : 0);
lodash.map(currentValue.associations, (subObject) => {
lodash.set(
subObject,
'meta.counts',
Object.keys(subObject.mismatches).length + subObject.dbOnly.length + subObject.esOnly.length,
);
currentValue.meta.counts += subObject.meta.counts;
});
data.project.meta.totalObjects += currentValue.meta.counts;
});
};

const result = differ.diff(dbData, esData);
const finalData = differ.patch(Diff.clone(dbData), result);
const flattenedResult = preProcessDeltas(result);
for (const item of flattenedResult) {
if (scriptUtil.isIgnoredPath('project', item.path)) {
continue; // eslint-disable-line no-continue
}
const delta = processDelta(item, esData, dbData, finalData);
if (delta) {
collectDataCopies(delta);
storeDelta('project', delta);
}
}
countInconsistencies();
return data;
}

module.exports = {
compareProjects,
};
Loading