|
1 | 1 | import fs from 'node:fs/promises';
|
2 | 2 | import path from 'node:path';
|
| 3 | +import { Md5 } from 'ts-md5'; |
3 | 4 | import { OpenAIChat } from 'langchain/llms';
|
4 | 5 | import { encoding_for_model } from '@dqbd/tiktoken';
|
5 | 6 | import { APIRateLimit } from '../../utils/APIRateLimit.js';
|
@@ -69,6 +70,19 @@ export const processRepository = async (
|
69 | 70 | linkHosted,
|
70 | 71 | }): Promise<void> => {
|
71 | 72 | const content = await fs.readFile(filePath, 'utf-8');
|
| 73 | + |
| 74 | + //calculate the hash of the file |
| 75 | + const newChecksum = await calculateChecksum(filePath, [content]); |
| 76 | + |
| 77 | + //if an existing summary.json file exists, it will check the checksums and decide if a reindex is needed |
| 78 | + const reindex = await reindexCheck( |
| 79 | + path.join(outputRoot, filePath), |
| 80 | + newChecksum, |
| 81 | + ); |
| 82 | + if (!reindex) { |
| 83 | + return; |
| 84 | + } |
| 85 | + |
72 | 86 | const markdownFilePath = path.join(outputRoot, filePath);
|
73 | 87 | const url = githubFileUrl(repositoryUrl, inputRoot, filePath, linkHosted);
|
74 | 88 | const summaryPrompt = createCodeFileSummary(
|
@@ -140,6 +154,7 @@ export const processRepository = async (
|
140 | 154 | url,
|
141 | 155 | summary,
|
142 | 156 | questions,
|
| 157 | + checksum: newChecksum, |
143 | 158 | };
|
144 | 159 |
|
145 | 160 | const outputPath = getFileName(markdownFilePath, '.', '.json');
|
@@ -195,6 +210,16 @@ export const processRepository = async (
|
195 | 210 | const contents = (await fs.readdir(folderPath)).filter(
|
196 | 211 | (fileName) => !shouldIgnore(fileName),
|
197 | 212 | );
|
| 213 | + |
| 214 | + //get the checksum of all the files in the folder |
| 215 | + const newChecksum = await calculateChecksum(folderPath, contents); |
| 216 | + |
| 217 | + //if an existing summary.json file exists, it will check the checksums and decide if a reindex is needed |
| 218 | + const reindex = await reindexCheck(folderPath, newChecksum); |
| 219 | + if (!reindex) { |
| 220 | + return; |
| 221 | + } |
| 222 | + |
198 | 223 | // eslint-disable-next-line prettier/prettier
|
199 | 224 | const url = githubFolderUrl(repositoryUrl, inputRoot, folderPath, linkHosted);
|
200 | 225 | const allFiles: (FileSummary | null)[] = await Promise.all(
|
@@ -259,6 +284,7 @@ export const processRepository = async (
|
259 | 284 | folders: folders.filter(Boolean),
|
260 | 285 | summary,
|
261 | 286 | questions: '',
|
| 287 | + checksum: newChecksum, |
262 | 288 | };
|
263 | 289 |
|
264 | 290 | const outputPath = path.join(folderPath, 'summary.json');
|
@@ -366,3 +392,51 @@ export const processRepository = async (
|
366 | 392 | */
|
367 | 393 | return models;
|
368 | 394 | };
|
| 395 | + |
| 396 | +//reads all the files, and returns a checksum |
| 397 | +async function calculateChecksum( |
| 398 | + folderPath: string, |
| 399 | + contents: string[], |
| 400 | +): Promise<string> { |
| 401 | + const checksums: string[] = []; |
| 402 | + for (const fileName of contents) { |
| 403 | + const filePath = `${folderPath}/${fileName}`; |
| 404 | + const fileData = await fs.readFile(filePath, 'utf-8'); |
| 405 | + const checksum = Md5.hashStr(fileData); |
| 406 | + checksums.push(checksum); |
| 407 | + } |
| 408 | + const concatenatedChecksum = checksums.join(''); |
| 409 | + const finalChecksum = Md5.hashStr(concatenatedChecksum); |
| 410 | + return finalChecksum; |
| 411 | +} |
| 412 | + |
| 413 | +//checks if a summary.json file exists, and if it does, compares the checksums to see if it needs to be re-indexed or not. |
| 414 | +async function reindexCheck( |
| 415 | + fileOrFolderPath: string, |
| 416 | + newChecksum: string, |
| 417 | +): Promise<boolean> { |
| 418 | + let summaryExists = false; |
| 419 | + try { |
| 420 | + await fs.access(path.join(fileOrFolderPath, 'summary.json')); |
| 421 | + summaryExists = true; |
| 422 | + } catch (error) {} |
| 423 | + |
| 424 | + if (summaryExists) { |
| 425 | + const fileContents = await fs.readFile( |
| 426 | + path.join(fileOrFolderPath, 'summary.json'), |
| 427 | + 'utf8', |
| 428 | + ); |
| 429 | + const fileContentsJSON = JSON.parse(fileContents); |
| 430 | + |
| 431 | + const oldChecksum = fileContentsJSON.checksum; |
| 432 | + |
| 433 | + if (oldChecksum === newChecksum) { |
| 434 | + console.log(`Skipping ${fileOrFolderPath} because it has not changed`); |
| 435 | + return false; |
| 436 | + } else { |
| 437 | + return true; |
| 438 | + } |
| 439 | + } |
| 440 | + //if no summary then generate one |
| 441 | + return true; |
| 442 | +} |
0 commit comments