Skip to content

Commit 129a11f

Browse files
authored
Merge pull request #441 from imcaizheng/new-fixes
ES/DB compare script (for projects without timelines)
2 parents 6a0dfb4 + dc8339f commit 129a11f

File tree

8 files changed

+889
-2
lines changed

8 files changed

+889
-2
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ jspm_packages
4747
!.elasticbeanstalk/*.global.yml
4848
.DS_Store
4949
.idea
50+
report.html

docs/es-db-compare.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# es-db-compare
2+
3+
## Configuration
4+
The following properties can be set from env variables:
5+
6+
- PROJECT_START_ID: if set, only projects with id that large than or equal to the value are compared.
7+
- PROJECT_END_ID: if set, only projects with id that less than or equal to the value are compared.
8+
- PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared.
9+
10+
There could be some fields that always mismatch in ES and DB.
11+
The variable named `ignoredPaths` at `scripts/es-db-compare/constants.js` maintains a list of json paths which will be ignored
12+
during the comparation. You may need to modify/add/delete items in the list.
13+
14+
### Note
15+
- `PROJECT_START_ID` and `PROJECT_END_ID` must exist together.
16+
- At least one of `PROJECT_START_ID(also PROJECT_END_ID)` and `PROJECT_LAST_ACTIVITY_AT` needs be set before running the script.
17+
18+
## Usage
19+
20+
Set up configuration and execute command `npm run es-db-compare` on the command line.
21+
It will then generate a HTML report with name `report.html` under the current directory.

package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
"test": "NODE_ENV=test npm run lint && NODE_ENV=test npm run sync:es && NODE_ENV=test npm run sync:db && NODE_ENV=test ./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- --timeout 10000 --require babel-core/register $(find src -path '*spec.js*') --exit",
2323
"test:watch": "NODE_ENV=test ./node_modules/.bin/mocha -w --require babel-core/register $(find src -path '*spec.js*')",
2424
"seed": "babel-node src/tests/seed.js --presets es2015",
25-
"demo-data": "babel-node local/seed"
25+
"demo-data": "babel-node local/seed",
26+
"es-db-compare": "babel-node scripts/es-db-compare"
2627
},
2728
"repository": {
2829
"type": "git",
@@ -53,8 +54,11 @@
5354
"express-request-id": "^1.1.0",
5455
"express-sanitizer": "^1.0.2",
5556
"express-validation": "^0.6.0",
57+
"handlebars": "^4.5.3",
5658
"http-aws-es": "^4.0.0",
5759
"joi": "^8.0.5",
60+
"jsondiffpatch": "^0.4.1",
61+
"jsonpath": "^1.0.2",
5862
"jsonwebtoken": "^8.3.0",
5963
"lodash": "^4.17.11",
6064
"memwatch-next": "^0.3.0",
@@ -64,7 +68,6 @@
6468
"pg": "^7.11.0",
6569
"pg-native": "^3.0.0",
6670
"sequelize": "^5.8.7",
67-
"jsonpath": "^1.0.2",
6871
"swagger-ui-express": "^4.0.6",
6972
"tc-core-library-js": "appirio-tech/tc-core-library-js.git#v2.6.3",
7073
"traverse": "^0.6.6",
Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
/* eslint-disable no-console */
2+
/* eslint-disable consistent-return */
3+
/* eslint-disable no-restricted-syntax */
4+
/*
5+
* Compare the data from database and data from ES.
6+
* Specific to project-related data.
7+
*
8+
* Please consider decouple some reusable logics from this module before create
9+
* modules to compare other models.
10+
*/
11+
12+
const Diff = require('jsondiffpatch');
13+
const lodash = require('lodash');
14+
const scriptUtil = require('./util');
15+
16+
const associations = {
17+
phases: 'Phase',
18+
members: 'Member',
19+
invites: 'Invite',
20+
attachments: 'Attachment',
21+
};
22+
23+
const differ = Diff.create({
24+
objectHash: obj => obj.id,
25+
});
26+
27+
/**
28+
* The json diff patch may contains deltas with same path,
29+
* one is "added to array", the other is "deleted from array".
30+
* In such case they can be combined and treated as "modified at an index in the array".
31+
*
32+
* @param {Array} deltas the data to be filtered
33+
* @returns {Array} filtered data
34+
*/
35+
function processSamePath(deltas) {
36+
const result = [];
37+
const groups = lodash.groupBy(deltas, 'path');
38+
for (const value of Object.values(groups)) {
39+
if (value.length === 1) {
40+
result.push(value[0]);
41+
continue; // eslint-disable-line no-continue
42+
}
43+
if (value.length === 2) {
44+
result.push(Object.assign({ type: 'modify' }, lodash.omit(value[0], 'type')));
45+
continue; // eslint-disable-line no-continue
46+
}
47+
throw new Error('Internal Error');
48+
}
49+
return result;
50+
}
51+
52+
/**
53+
* Transform or filter deltas before any further proccess.
54+
*
55+
* @param {Array} deltas the data to be processed
56+
* @returns {Array} the result
57+
*/
58+
function preProcessDeltas(deltas) {
59+
return processSamePath(
60+
scriptUtil.flatten(deltas),
61+
);
62+
}
63+
64+
/**
65+
* Process diff delta to extract project-related data.
66+
*
67+
* @param {Object} delta the diff delta. See `util.flatten()`
68+
* @param {Object} esData the data from ES
69+
* @param {Object} dbData the data from DB
70+
* @param {Object} finalData the data patched
71+
* @returns {Object} Object project diff delta in a specific data structure
72+
*/
73+
function processDelta(delta, esData, dbData, finalData) {
74+
const processMissingObject = (item, option) => {
75+
if (item.type === 'delete') {
76+
const projectId = lodash.get(dbData, lodash.slice(item.path, 0, 1)).id;
77+
console.log(`one dbOnly found for ${option.modelName} with id ${item.originalValue.id}`);
78+
return {
79+
type: 'dbOnly',
80+
projectId,
81+
modelName: option.modelName,
82+
id: item.originalValue.id,
83+
dbCopy: item.originalValue,
84+
};
85+
}
86+
if (item.type === 'add') {
87+
const projectId = lodash.get(esData, lodash.slice(item.path, 0, 1)).id;
88+
console.log(`one esOnly found for ${option.modelName} with id ${item.value.id}`);
89+
return {
90+
type: 'esOnly',
91+
projectId,
92+
modelName: option.modelName,
93+
id: item.value.id,
94+
esCopy: item.value,
95+
};
96+
}
97+
};
98+
99+
const processProduct = (item) => {
100+
const subPath = lodash.slice(item.path, 4);
101+
if (item.dataType === 'array' && subPath.length === 1) {
102+
return processMissingObject(item, { modelName: 'Product' });
103+
}
104+
if (['add', 'delete', 'modify'].includes(item.type)) {
105+
const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1));
106+
const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id;
107+
const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id;
108+
const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id;
109+
const dbCopy = lodash.find(
110+
lodash.find(
111+
lodash.find(dbData, { id: projectId }).phases,
112+
{ id: phaseId },
113+
).products,
114+
{ id },
115+
);
116+
const esCopy = lodash.find(
117+
lodash.find(
118+
lodash.find(esData, { id: projectId }).phases,
119+
{ id: phaseId },
120+
).products,
121+
{ id },
122+
);
123+
console.log(`one mismatch found for Product with id ${id}`);
124+
return {
125+
type: 'mismatch',
126+
kind: item.type,
127+
dataType: item.dataType,
128+
projectId,
129+
id,
130+
modelName: 'Product',
131+
path,
132+
dbCopy,
133+
esCopy,
134+
};
135+
}
136+
};
137+
138+
const processAssociation = (item, option) => {
139+
if (item.path[1] === 'phases' && item.path[3] === 'products') {
140+
return processProduct(item);
141+
}
142+
const subPath = lodash.slice(item.path, 2);
143+
if (item.dataType === 'array' && subPath.length === 1) {
144+
return processMissingObject(item, option);
145+
}
146+
if (['add', 'delete', 'modify'].includes(item.type)) {
147+
const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1));
148+
const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id;
149+
const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id;
150+
const dbCopy = lodash.find(
151+
lodash.find(dbData, { id: projectId })[option.refPath],
152+
{ id },
153+
);
154+
const esCopy = lodash.find(
155+
lodash.find(esData, { id: projectId })[option.refPath],
156+
{ id },
157+
);
158+
console.log(`one mismatch found for ${option.modelName} with id ${id}`);
159+
return {
160+
type: 'mismatch',
161+
kind: item.type,
162+
dataType: item.dataType,
163+
projectId,
164+
modelName: option.modelName,
165+
id,
166+
path,
167+
dbCopy,
168+
esCopy,
169+
};
170+
}
171+
};
172+
173+
if (delta.path.length > 2 && associations[delta.path[1]]) {
174+
return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] });
175+
}
176+
if (delta.dataType === 'array' && delta.path.length === 1) {
177+
return processMissingObject(delta, { modelName: 'Project' });
178+
}
179+
if (['add', 'delete', 'modify'].includes(delta.type)) {
180+
const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1));
181+
const id = lodash.get(finalData, lodash.slice(delta.path, 0, 1)).id;
182+
const dbCopy = lodash.find(dbData, { id });
183+
const esCopy = lodash.find(esData, { id });
184+
console.log(`one mismatch found for Project with id ${id}`);
185+
return {
186+
type: 'mismatch',
187+
kind: delta.type,
188+
dataType: delta.dataType,
189+
projectId: id,
190+
modelName: 'Project',
191+
id,
192+
path,
193+
dbCopy,
194+
esCopy,
195+
};
196+
}
197+
}
198+
199+
/**
200+
* Compare Project data from ES and DB.
201+
*
202+
* @param {Object} esData the data from ES
203+
* @param {Object} dbData the data from DB
204+
* @returns {Object} the data to feed handlebars template
205+
*/
206+
function compareProjects(esData, dbData) {
207+
const data = {
208+
project: {
209+
rootMismatch: {},
210+
esOnly: [],
211+
dbOnly: [],
212+
},
213+
meta: {
214+
esCopies: [],
215+
dbCopies: [],
216+
counts: {
217+
Project: 0,
218+
},
219+
uniqueDeltas: [],
220+
},
221+
};
222+
223+
const storeDelta = (root, delta) => {
224+
if (delta.modelName === 'Project') {
225+
if (delta.type === 'esOnly') {
226+
data[root].esOnly.push(delta);
227+
return;
228+
}
229+
if (delta.type === 'dbOnly') {
230+
data[root].dbOnly.push(delta);
231+
return;
232+
}
233+
}
234+
if (!data[root].rootMismatch[delta.projectId]) {
235+
data[root].rootMismatch[delta.projectId] = { project: [], associations: {} };
236+
}
237+
if (delta.modelName === 'Project') {
238+
data[root].rootMismatch[delta.projectId].project.push(delta);
239+
return;
240+
}
241+
const currentAssociations = data[root].rootMismatch[delta.projectId].associations;
242+
if (!Object.keys(currentAssociations).includes(delta.modelName)) {
243+
currentAssociations[delta.modelName] = {
244+
mismatches: {},
245+
esOnly: [],
246+
dbOnly: [],
247+
};
248+
}
249+
if (delta.type === 'mismatch') {
250+
const mismatches = currentAssociations[delta.modelName].mismatches;
251+
if (!mismatches[delta.id]) {
252+
mismatches[delta.id] = [];
253+
}
254+
mismatches[delta.id].push(delta);
255+
return;
256+
}
257+
currentAssociations[delta.modelName][delta.type].push(delta);
258+
};
259+
260+
const collectDataCopies = (delta) => {
261+
if (delta.dbCopy) {
262+
if (!lodash.find(data.meta.dbCopies, lodash.pick(delta, ['modelName', 'id']))) {
263+
data.meta.dbCopies.push(delta);
264+
}
265+
}
266+
if (delta.esCopy) {
267+
if (!lodash.find(data.meta.esCopies, lodash.pick(delta, ['modelName', 'id']))) {
268+
data.meta.esCopies.push(delta);
269+
}
270+
}
271+
};
272+
273+
const countInconsistencies = () => {
274+
lodash.set(
275+
data.project,
276+
'meta.totalObjects',
277+
data.project.dbOnly.length + data.project.esOnly.length,
278+
);
279+
lodash.set(
280+
data.project,
281+
'meta.totalProjects',
282+
Object.keys(data.project.rootMismatch).length + data.project.dbOnly.length + data.project.esOnly.length,
283+
);
284+
lodash.map(data.project.rootMismatch, (value) => {
285+
const currentValue = value;
286+
lodash.set(currentValue, 'meta.counts', currentValue.project.length ? 1 : 0);
287+
lodash.map(currentValue.associations, (subObject) => {
288+
lodash.set(
289+
subObject,
290+
'meta.counts',
291+
Object.keys(subObject.mismatches).length + subObject.dbOnly.length + subObject.esOnly.length,
292+
);
293+
currentValue.meta.counts += subObject.meta.counts;
294+
});
295+
data.project.meta.totalObjects += currentValue.meta.counts;
296+
});
297+
};
298+
299+
const result = differ.diff(dbData, esData);
300+
const finalData = differ.patch(Diff.clone(dbData), result);
301+
const flattenedResult = preProcessDeltas(result);
302+
for (const item of flattenedResult) {
303+
if (scriptUtil.isIgnoredPath('project', item.path)) {
304+
continue; // eslint-disable-line no-continue
305+
}
306+
const delta = processDelta(item, esData, dbData, finalData);
307+
if (delta) {
308+
collectDataCopies(delta);
309+
storeDelta('project', delta);
310+
}
311+
}
312+
countInconsistencies();
313+
return data;
314+
}
315+
316+
module.exports = {
317+
compareProjects,
318+
};

0 commit comments

Comments
 (0)