Skip to content

Commit 55411ce

Browse files
base logic
1 parent badefed commit 55411ce

File tree

5 files changed

+97
-4
lines changed

5 files changed

+97
-4
lines changed

package-lock.json

Lines changed: 17 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
"marked": "^4.3.0",
4141
"marked-terminal": "^5.1.1",
4242
"minimatch": "^7.4.3",
43-
"ora": "^6.2.0"
43+
"ora": "^6.2.0",
44+
"ts-md5": "^1.3.1"
4445
},
4546
"devDependencies": {
4647
"@types/commander": "^2.12.2",

src/cli/commands/index/processRepository.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import fs from 'node:fs/promises';
22
import path from 'node:path';
3+
import { Md5 } from 'ts-md5';
34
import { OpenAIChat } from 'langchain/llms';
45
import { encoding_for_model } from '@dqbd/tiktoken';
56
import { APIRateLimit } from '../../utils/APIRateLimit.js';
@@ -69,6 +70,19 @@ export const processRepository = async (
6970
linkHosted,
7071
}): Promise<void> => {
7172
const content = await fs.readFile(filePath, 'utf-8');
73+
74+
//calculate the hash of the file
75+
const newChecksum = await calculateChecksum(filePath, [content]);
76+
77+
//if an existing summary.json file exists, it will check the checksums and decide if a reindex is needed
78+
const reindex = await reindexCheck(
79+
path.join(outputRoot, filePath),
80+
newChecksum,
81+
);
82+
if (!reindex) {
83+
return;
84+
}
85+
7286
const markdownFilePath = path.join(outputRoot, filePath);
7387
const url = githubFileUrl(repositoryUrl, inputRoot, filePath, linkHosted);
7488
const summaryPrompt = createCodeFileSummary(
@@ -140,6 +154,7 @@ export const processRepository = async (
140154
url,
141155
summary,
142156
questions,
157+
checksum: newChecksum,
143158
};
144159

145160
const outputPath = getFileName(markdownFilePath, '.', '.json');
@@ -195,6 +210,16 @@ export const processRepository = async (
195210
const contents = (await fs.readdir(folderPath)).filter(
196211
(fileName) => !shouldIgnore(fileName),
197212
);
213+
214+
//get the checksum of all the files in the folder
215+
const newChecksum = await calculateChecksum(folderPath, contents);
216+
217+
//if an existing summary.json file exists, it will check the checksums and decide if a reindex is needed
218+
const reindex = await reindexCheck(folderPath, newChecksum);
219+
if (!reindex) {
220+
return;
221+
}
222+
198223
// eslint-disable-next-line prettier/prettier
199224
const url = githubFolderUrl(repositoryUrl, inputRoot, folderPath, linkHosted);
200225
const allFiles: (FileSummary | null)[] = await Promise.all(
@@ -259,6 +284,7 @@ export const processRepository = async (
259284
folders: folders.filter(Boolean),
260285
summary,
261286
questions: '',
287+
checksum: newChecksum,
262288
};
263289

264290
const outputPath = path.join(folderPath, 'summary.json');
@@ -366,3 +392,51 @@ export const processRepository = async (
366392
*/
367393
return models;
368394
};
395+
396+
//reads all the files, and returns a checksum
397+
async function calculateChecksum(
398+
folderPath: string,
399+
contents: string[],
400+
): Promise<string> {
401+
const checksums: string[] = [];
402+
for (const fileName of contents) {
403+
const filePath = `${folderPath}/${fileName}`;
404+
const fileData = await fs.readFile(filePath, 'utf-8');
405+
const checksum = Md5.hashStr(fileData);
406+
checksums.push(checksum);
407+
}
408+
const concatenatedChecksum = checksums.join('');
409+
const finalChecksum = Md5.hashStr(concatenatedChecksum);
410+
return finalChecksum;
411+
}
412+
413+
//checks if a summary.json file exists, and if it does, compares the checksums to see if it needs to be re-indexed or not.
414+
async function reindexCheck(
415+
fileOrFolderPath: string,
416+
newChecksum: string,
417+
): Promise<boolean> {
418+
let summaryExists = false;
419+
try {
420+
await fs.access(path.join(fileOrFolderPath, 'summary.json'));
421+
summaryExists = true;
422+
} catch (error) {}
423+
424+
if (summaryExists) {
425+
const fileContents = await fs.readFile(
426+
path.join(fileOrFolderPath, 'summary.json'),
427+
'utf8',
428+
);
429+
const fileContentsJSON = JSON.parse(fileContents);
430+
431+
const oldChecksum = fileContentsJSON.checksum;
432+
433+
if (oldChecksum === newChecksum) {
434+
console.log(`Skipping ${fileOrFolderPath} because it has not changed`);
435+
return false;
436+
} else {
437+
return true;
438+
}
439+
}
440+
//if no summary then generate one
441+
return true;
442+
}

src/cli/utils/traverseFileSystem.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ export const traverseFileSystem = async (
3636
await dfs(folderPath);
3737

3838
await processFolder?.({
39+
inputPath,
3940
folderName,
4041
folderPath,
4142
projectName,

src/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export type FileSummary = {
2525
url: string;
2626
summary: string;
2727
questions: string;
28+
checksum: string;
2829
};
2930

3031
export type ProcessFileParams = {
@@ -47,9 +48,11 @@ export type FolderSummary = {
4748
folders: FolderSummary[];
4849
summary: string;
4950
questions: string;
51+
checksum: string;
5052
};
5153

5254
export type ProcessFolderParams = {
55+
inputPath: string;
5356
folderName: string;
5457
folderPath: string;
5558
projectName: string;

0 commit comments

Comments
 (0)