Skip to content

Commit 31a4ba3

Browse files
committed
feat: check duplicate files
1 parent d12da41 commit 31a4ba3

File tree

3 files changed

+42
-18
lines changed

3 files changed

+42
-18
lines changed

src/consoleColor.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
export const greenLog = (s: string): string => `\u001B[32m ${s} \u001B[0m`;
1+
export const greenLog = (s: string): string => `\u001B[32m${s}\u001B[0m`;
22

3-
export const yellowLog = (s: string): string => `\u001B[33m ${s} \u001B[0m`;
3+
export const yellowLog = (s: string): string => `\u001B[33m${s}\u001B[0m`;
44

5-
export const redLog = (s: string): string => `\u001B[31m ${s} \u001B[0m`;
5+
export const redLog = (s: string): string => `\u001B[31m${s}\u001B[0m`;

src/file.ts

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,43 @@
1+
import { createHash } from 'node:crypto';
2+
import { readFileSync } from 'node:fs';
13
import path from 'node:path';
24

35
import { globSync } from 'glob';
46

57
import { cmdLine } from './commandLine';
6-
import { redLog } from './consoleColor';
8+
import { redLog, yellowLog } from './consoleColor';
79

8-
export const getFiles = (): string[] => {
9-
let files = globSync('**/*', { cwd: cmdLine.sourcepath, nodir: true });
10-
files = files.filter((filename) => {
10+
const findSimilarFiles = (files: Map<string, Buffer>): string[][] => {
11+
const contentComparer: Map<string, string[]> = new Map();
12+
for (const [filename, content] of files.entries()) {
13+
const hash = createHash('md5').update(content.toString()).digest('hex');
14+
if (contentComparer.has(hash)) contentComparer.get(hash)!.push(filename);
15+
else contentComparer.set(hash, [filename]);
16+
}
17+
18+
const result: string[][] = [];
19+
for (const filenames of contentComparer.values()) if (filenames.length > 1) result.push(filenames);
20+
return result;
21+
};
22+
23+
export const getFiles = (): Map<string, Buffer> => {
24+
let filenames = globSync('**/*', { cwd: cmdLine.sourcepath, nodir: true });
25+
filenames = filenames.filter((filename) => {
1126
const extension = path.extname(filename);
1227
if (['.gz', '.brottli', '.br'].includes(extension)) {
1328
const original = filename.slice(0, -1 * extension.length);
14-
if (files.includes(original)) {
29+
if (filenames.includes(original)) {
1530
console.log(redLog(`${filename} skipped because is perhaps a compressed version of ${original}`));
1631
return false;
1732
}
1833
}
1934
return true;
2035
});
21-
return files.sort();
36+
37+
const result: Map<string, Buffer> = new Map();
38+
for (const filename of filenames)
39+
result.set(filename, readFileSync(path.join(cmdLine.sourcepath, filename), { flag: 'r' }));
40+
for (const sameFile of findSimilarFiles(result))
41+
console.log(yellowLog(`${sameFile.join(', ')} files look like identical`));
42+
return result;
2243
};

src/index.ts

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* eslint-disable unicorn/prefer-string-replace-all */
22
import { createHash } from 'node:crypto';
3-
import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3+
import { mkdirSync, writeFileSync } from 'node:fs';
44
import path from 'node:path';
55
import { gzipSync } from 'node:zlib';
66

@@ -19,18 +19,22 @@ const summary = {
1919

2020
const sources: CppCodeSources = [];
2121
const filesByExtension: ExtensionGroups = [];
22+
23+
console.log('Collecting source files');
2224
const files = getFiles();
23-
if (files.length === 0) {
25+
if (files.size === 0) {
2426
console.error(`Directory ${cmdLine.sourcepath} is empty`);
2527
process.exit(1);
2628
}
2729

28-
const rightPad = files.reduce((p, c) => (c.length > p ? c.length : p), 0);
29-
for (const file of files) {
30-
const mime = lookup(file) || 'text/plain';
30+
console.log();
31+
console.log('Translation to header file');
32+
const longestFilename = [...files.keys()].reduce((p, c) => (c.length > p ? c.length : p), 0);
33+
for (const [originalFilename, content] of files) {
34+
const mime = lookup(originalFilename) || 'text/plain';
3135
summary.filecount++;
3236

33-
const filename = file.replace(/\\/g, '/');
37+
const filename = originalFilename.replace(/\\/g, '/');
3438
const dataname = filename.replace(/[./-]/g, '_');
3539
let extension = path.extname(filename).toUpperCase();
3640
if (extension.startsWith('.')) extension = extension.slice(1);
@@ -39,7 +43,6 @@ for (const file of files) {
3943
if (group) group.count += 1;
4044
else filesByExtension.push({ extension, count: 1 });
4145

42-
const content = readFileSync(path.join(cmdLine.sourcepath, file), { flag: 'r' });
4346
const md5 = createHash('md5').update(content).digest('hex');
4447
summary.size += content.length;
4548
const zipContent = gzipSync(content, { level: 9 });
@@ -58,7 +61,7 @@ for (const file of files) {
5861
});
5962
console.log(
6063
greenLog(
61-
`[${file}] ${' '.repeat(rightPad - file.length)} ✓ gzip used (${content.length} -> ${zipContent.length} = ${zipRatio}%)`
64+
`[${originalFilename}] ${' '.repeat(longestFilename - originalFilename.length)} ✓ gzip used (${content.length} -> ${zipContent.length} = ${zipRatio}%)`
6265
)
6366
);
6467
} else {
@@ -74,7 +77,7 @@ for (const file of files) {
7477
});
7578
console.log(
7679
yellowLog(
77-
`[${file}] ${' '.repeat(rightPad - file.length)} x gzip unused ${content.length <= 1024 ? `(too small) ` : ''}(${content.length} -> ${zipContent.length} = ${zipRatio}%)`
80+
`[${originalFilename}] ${' '.repeat(longestFilename - originalFilename.length)} x gzip unused ${content.length <= 1024 ? `(too small) ` : ''}(${content.length} -> ${zipContent.length} = ${zipRatio}%)`
7881
)
7982
);
8083
}

0 commit comments

Comments
 (0)