diff --git a/.eslintrc.cjs b/.eslintrc.cjs index ab3b90e..45dba77 100644 --- a/.eslintrc.cjs +++ b/.eslintrc.cjs @@ -17,6 +17,12 @@ module.exports = { 'react/react-in-jsx-scope': 'off', 'react/prop-types': 'off', '@typescript-eslint/no-explicit-any': 'off', + 'prettier/prettier': [ + 'error', + { + endOfLine: 'auto', + }, + ], }, // "overrides": [ // { diff --git a/README.md b/README.md index 7a52df2..819bd33 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,8 @@ You will be prompted to enter the name of your project, GitHub url, and select w **Note:** Do not skip entering these values or indexing may not work. +**Prompt Configuration:** You'll find prompt directions specified in `prompts.ts`, with some snippets customizable in the `autodoc.config.json`. The current prompts are developer focused and assume your repo is code focused. We will have more reference templates in the future. + Run the `index` command: ```bash doc index diff --git a/autodoc.config.json b/autodoc.config.json index 1c60421..328541c 100644 --- a/autodoc.config.json +++ b/autodoc.config.json @@ -20,5 +20,11 @@ "*.mdx", "*.toml", "*autodoc*" - ] + ], + "filePrompt": "Write a detailed technical explanation of what this code does. \n Focus on the high-level purpose of the code and how it may be used in the larger project.\n Include code examples where appropriate. Keep you response between 100 and 300 words. \n DO NOT RETURN MORE THAN 300 WORDS.\n Output should be in markdown format.\n Do not just list the methods and classes in this file.", + "folderPrompt": "Write a technical explanation of what the code in this file does\n and how it might fit into the larger project or work with other parts of the project.\n Give examples of how this code might be used. Include code examples where appropriate.\n Be concise. Include any information that may be relevant to a developer who is curious about this code.\n Keep you response under 400 words. Output should be in markdown format.\n Do not just list the files and folders in this folder.", + "chatPrompt": "", + "contentType": "code", + "targetAudience": "smart developer", + "linkHosted": false } \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 2ff7ee5..ebd487b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "@context-labs/autodoc", - "version": "0.0.1", + "version": "0.0.7", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@context-labs/autodoc", - "version": "0.0.1", + "version": "0.0.7", "license": "MIT", "dependencies": { "@dqbd/tiktoken": "^1.0.2", + "@types/istextorbinary": "^2.3.1", "chalk": "^5.2.0", "cli-progress": "^3.12.0", "commander": "^10.0.0", @@ -696,6 +697,14 @@ "rxjs": "^7.2.0" } }, + "node_modules/@types/istextorbinary": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/@types/istextorbinary/-/istextorbinary-2.3.1.tgz", + "integrity": "sha512-Fu3zxViCkMd2oEkwQ1ITv16MCfybykq1VYjoeqLcYjeq3RhWDnwMb+Ang0MP3xwq17kDkCt8XQ7omTJ/1ukSoQ==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/json-schema": { "version": "7.0.11", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz", @@ -783,8 +792,7 @@ "node_modules/@types/node": { "version": "18.15.6", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.6.tgz", - "integrity": "sha512-YErOafCZpK4g+Rp3Q/PBgZNAsWKGunQTm9FA3/Pbcm0VCriTEzcrutQ/SxSc0rytAp0NoFWue669jmKhEtd0sA==", - "dev": true + "integrity": "sha512-YErOafCZpK4g+Rp3Q/PBgZNAsWKGunQTm9FA3/Pbcm0VCriTEzcrutQ/SxSc0rytAp0NoFWue669jmKhEtd0sA==" }, "node_modules/@types/semver": { "version": "7.3.13", @@ -5746,6 +5754,14 @@ "rxjs": "^7.2.0" } }, + "@types/istextorbinary": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/@types/istextorbinary/-/istextorbinary-2.3.1.tgz", + "integrity": "sha512-Fu3zxViCkMd2oEkwQ1ITv16MCfybykq1VYjoeqLcYjeq3RhWDnwMb+Ang0MP3xwq17kDkCt8XQ7omTJ/1ukSoQ==", + "requires": { + "@types/node": "*" + } + }, "@types/json-schema": { "version": "7.0.11", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz", @@ -5826,8 +5842,7 @@ "@types/node": { "version": "18.15.6", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.6.tgz", - "integrity": "sha512-YErOafCZpK4g+Rp3Q/PBgZNAsWKGunQTm9FA3/Pbcm0VCriTEzcrutQ/SxSc0rytAp0NoFWue669jmKhEtd0sA==", - "dev": true + "integrity": "sha512-YErOafCZpK4g+Rp3Q/PBgZNAsWKGunQTm9FA3/Pbcm0VCriTEzcrutQ/SxSc0rytAp0NoFWue669jmKhEtd0sA==" }, "@types/semver": { "version": "7.3.13", diff --git a/package.json b/package.json index a662477..9f1d499 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "license": "MIT", "dependencies": { "@dqbd/tiktoken": "^1.0.2", + "@types/istextorbinary": "^2.3.1", "chalk": "^5.2.0", "cli-progress": "^3.12.0", "commander": "^10.0.0", diff --git a/src/cli/commands/estimate/index.ts b/src/cli/commands/estimate/index.ts index a68d748..f96020f 100644 --- a/src/cli/commands/estimate/index.ts +++ b/src/cli/commands/estimate/index.ts @@ -15,6 +15,12 @@ export const estimate = async ({ output, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }: AutodocRepoConfig) => { const json = path.join(output, 'docs', 'json/'); @@ -32,6 +38,12 @@ export const estimate = async ({ output: json, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }, true, ); diff --git a/src/cli/commands/index/convertJsonToMarkdown.ts b/src/cli/commands/index/convertJsonToMarkdown.ts index 747f5d9..e871399 100644 --- a/src/cli/commands/index/convertJsonToMarkdown.ts +++ b/src/cli/commands/index/convertJsonToMarkdown.ts @@ -14,6 +14,11 @@ export const convertJsonToMarkdown = async ({ name: projectName, root: inputRoot, output: outputRoot, + filePrompt: filePrompt, + folderPrompt: folderPrompt, + contentType: contentType, + targetAudience: targetAudience, + linkHosted: linkHosted, }: AutodocRepoConfig) => { /** * Count the number of files in the project @@ -27,6 +32,11 @@ export const convertJsonToMarkdown = async ({ return Promise.resolve(); }, ignore: [], + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }); /** @@ -83,6 +93,11 @@ export const convertJsonToMarkdown = async ({ projectName, processFile, ignore: [], + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }); spinnerSuccess(`Created ${files} mardown files...`); }; diff --git a/src/cli/commands/index/index.ts b/src/cli/commands/index/index.ts index 0bd3959..4ebb969 100644 --- a/src/cli/commands/index/index.ts +++ b/src/cli/commands/index/index.ts @@ -12,6 +12,12 @@ export const index = async ({ output, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }: AutodocRepoConfig) => { const json = path.join(output, 'docs', 'json/'); const markdown = path.join(output, 'docs', 'markdown/'); @@ -30,6 +36,12 @@ export const index = async ({ output: json, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }); updateSpinnerText('Processing repository...'); spinnerSuccess(); @@ -45,6 +57,12 @@ export const index = async ({ output: markdown, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }); spinnerSuccess(); @@ -56,6 +74,12 @@ export const index = async ({ output: data, llms, ignore, + filePrompt, + folderPrompt, + chatPrompt, + contentType, + targetAudience, + linkHosted, }); spinnerSuccess(); }; diff --git a/src/cli/commands/index/processRepository.ts b/src/cli/commands/index/processRepository.ts index faa0791..290a054 100644 --- a/src/cli/commands/index/processRepository.ts +++ b/src/cli/commands/index/processRepository.ts @@ -38,6 +38,11 @@ export const processRepository = async ( output: outputRoot, llms, ignore, + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }: AutodocRepoConfig, dryRun?: boolean, ) => { @@ -58,19 +63,27 @@ export const processRepository = async ( fileName, filePath, projectName, + contentType, + filePrompt, + targetAudience, + linkHosted, }): Promise => { const content = await fs.readFile(filePath, 'utf-8'); const markdownFilePath = path.join(outputRoot, filePath); - const url = githubFileUrl(repositoryUrl, inputRoot, filePath); + const url = githubFileUrl(repositoryUrl, inputRoot, filePath, linkHosted); const summaryPrompt = createCodeFileSummary( projectName, projectName, content, + contentType, + filePrompt, ); const questionsPrompt = createCodeQuestions( projectName, projectName, content, + contentType, + targetAudience, ); const summaryLength = encoding.encode(summaryPrompt).length; const questionLength = encoding.encode(questionsPrompt).length; @@ -167,7 +180,10 @@ export const processRepository = async ( folderName, folderPath, projectName, + contentType, + folderPrompt, shouldIgnore, + linkHosted, }): Promise => { /** * For now we don't care about folders @@ -179,7 +195,8 @@ export const processRepository = async ( const contents = (await fs.readdir(folderPath)).filter( (fileName) => !shouldIgnore(fileName), ); - const url = githubFolderUrl(repositoryUrl, inputRoot, folderPath); + // eslint-disable-next-line prettier/prettier + const url = githubFolderUrl(repositoryUrl, inputRoot, folderPath, linkHosted); const allFiles: (FileSummary | null)[] = await Promise.all( contents.map(async (fileName) => { const entryPath = path.join(folderPath, fileName); @@ -223,7 +240,14 @@ export const processRepository = async ( ); const summary = await callLLM( - folderSummaryPrompt(folderPath, projectName, files, folders), + folderSummaryPrompt( + folderPath, + projectName, + files, + folders, + contentType, + folderPrompt, + ), models[LLMModels.GPT4].llm, ); @@ -252,7 +276,7 @@ export const processRepository = async ( }; /** - * Get the numver of files and folderfs in the project + * Get the number of files and folders in the project */ const filesAndFolders = async (): Promise<{ @@ -271,6 +295,11 @@ export const processRepository = async ( return Promise.resolve(); }, ignore, + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }), traverseFileSystem({ inputPath: inputRoot, @@ -280,6 +309,11 @@ export const processRepository = async ( return Promise.resolve(); }, ignore, + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }), ]); @@ -301,6 +335,11 @@ export const processRepository = async ( projectName, processFile, ignore, + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }); spinnerSuccess(`Processing ${files} files...`); @@ -313,6 +352,11 @@ export const processRepository = async ( projectName, processFolder, ignore, + filePrompt, + folderPrompt, + contentType, + targetAudience, + linkHosted, }); spinnerSuccess(`Processing ${folders} folders... `); stopSpinner(); diff --git a/src/cli/commands/index/prompts.ts b/src/cli/commands/index/prompts.ts index 4dd3b32..09787de 100644 --- a/src/cli/commands/index/prompts.ts +++ b/src/cli/commands/index/prompts.ts @@ -4,19 +4,16 @@ export const createCodeFileSummary = ( filePath: string, projectName: string, fileContents: string, + contentType: string, + filePrompt: string, ): string => { return ` - You are acting as a code documentation expert for a project called ${projectName}. - Below is the code from a file located at \`${filePath}\`. - Write a detailed technical explanation of what this code does. - Focus on the high-level purpose of the code and how it may be used in the larger project. - Include code examples where appropriate. Keep you response between 100 and 300 words. - DO NOT RETURN MORE THAN 300 WORDS. - Output should be in markdown format. + You are acting as a ${contentType} documentation expert for a project called ${projectName}. + Below is the ${contentType} from a file located at \`${filePath}\`. + ${filePrompt} Do not say "this file is a part of the ${projectName} project". - Do not just list the methods and classes in this file. - Code: + ${contentType}: ${fileContents} Response: @@ -28,14 +25,16 @@ export const createCodeQuestions = ( filePath: string, projectName: string, fileContents: string, + contentType: string, + targetAudience: string, ): string => { return ` - You are acting as a code documentation expert for a project called ${projectName}. - Below is the code from a file located at \`${filePath}\`. - What are 3 questions that a super smart developer might have about this code? + You are acting as a ${contentType} documentation expert for a project called ${projectName}. + Below is the ${contentType} from a file located at \`${filePath}\`. + What are 3 questions that a ${targetAudience} might have about this ${contentType}? Answer each question in 1-2 sentences. Output should be in markdown format. - Code: + ${contentType}: ${fileContents} Questions and Answers: @@ -48,9 +47,11 @@ export const folderSummaryPrompt = ( projectName: string, files: FileSummary[], folders: FolderSummary[], + contentType: string, + folderPrompt: string, ): string => { return ` - You are acting as a code documentation expert for a project called ${projectName}. + You are acting as a ${contentType} documentation expert for a project called ${projectName}. You are currently documenting the folder located at \`${folderPath}\`. Below is a list of the files in this folder and a summary of the contents of each file: @@ -73,15 +74,8 @@ export const folderSummaryPrompt = ( `; })} - - Write a technical explanation of what the code in this file does - and how it might fit into the larger project or work with other parts of the project. - Give examples of how this code might be used. Include code examples where appropriate. - Be concise. Include any information that may be relevant to a developer who is curious about this code. - Keep you response under 400 words. Output should be in markdown format. + ${folderPrompt} Do not say "this file is a part of the ${projectName} project". - Do not just list the files and folders in this folder. - Response: `; diff --git a/src/cli/commands/init/index.ts b/src/cli/commands/init/index.ts index 0aad45f..450ea8a 100644 --- a/src/cli/commands/init/index.ts +++ b/src/cli/commands/init/index.ts @@ -30,6 +30,24 @@ export const makeConfigTemplate = ( '*.toml', '*autodoc*', ], + filePrompt: + 'Write a detailed technical explanation of what this code does. \n\ + Focus on the high-level purpose of the code and how it may be used in the larger project.\n\ + Include code examples where appropriate. Keep you response between 100 and 300 words. \n\ + DO NOT RETURN MORE THAN 300 WORDS.\n\ + Output should be in markdown format.\n\ + Do not just list the methods and classes in this file.', + folderPrompt: + 'Write a technical explanation of what the code in this file does\n\ + and how it might fit into the larger project or work with other parts of the project.\n\ + Give examples of how this code might be used. Include code examples where appropriate.\n\ + Be concise. Include any information that may be relevant to a developer who is curious about this code.\n\ + Keep you response under 400 words. Output should be in markdown format.\n\ + Do not just list the files and folders in this folder.', + chatPrompt: '', + contentType: 'code', + targetAudience: 'smart developer', + linkHosted: false, }; }; diff --git a/src/cli/commands/query/createChatChain.ts b/src/cli/commands/query/createChatChain.ts index f626994..7ff19e7 100644 --- a/src/cli/commands/query/createChatChain.ts +++ b/src/cli/commands/query/createChatChain.ts @@ -12,19 +12,21 @@ Chat History: Follow Up Input: {question} Standalone question:`); -const makeQAPrompt = (projectName: string, repositoryUrl: string) => +// eslint-disable-next-line prettier/prettier +const makeQAPrompt = (projectName: string, repositoryUrl: string, contentType: string, chatPrompt: string, targetAudience: string) => PromptTemplate.fromTemplate( - `You are an AI assistant for a software project called ${projectName}. You are trained on all the code that makes up this project. - The code for the project is located at ${repositoryUrl}. -You are given the following extracted parts of a technical summary of files in a codebase and a question. + `You are an AI assistant for a software project called ${projectName}. You are trained on all the ${contentType} that makes up this project. + The ${contentType} for the project is located at ${repositoryUrl}. +You are given the following extracted parts of a technical summary of files in a ${contentType} and a question. Provide a conversational answer with hyperlinks back to GitHub. You should only use hyperlinks that are explicitly listed in the context. Do NOT make up a hyperlink that is not listed. -Include lots of code examples and links to the code examples, where appropriate. -Assume the reader is a technical person but is not deeply familiar with ${projectName}. -Assume th reader does not know anything about how the project is strucuted or which folders/files are provided in the context. +Include lots of ${contentType} examples and links to the ${contentType} examples, where appropriate. +${chatPrompt} +Assume the reader is a ${targetAudience} but is not deeply familiar with ${projectName}. +Assume the reader does not know anything about how the project is strucuted or which folders/files are provided in the context. Do not reference the context in your answer. Instead use the context to inform your answer. If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer. -If the question is not about the ${projectName}, politely inform them that you are tuned to only answer questions about the Solana validator. +If the question is not about the ${projectName}, politely inform them that you are tuned to only answer questions about the ${projectName}. Your answer should be at least 100 words and no more than 300 words. Do not include information that is not directly relevant to the question, even if the context includes it. Always include a list of reference links to GitHub from the context. Links should ONLY come from the context. @@ -41,6 +43,9 @@ Answer in Markdown:`, export const makeChain = ( projectName: string, repositoryUrl: string, + contentType: string, + chatPrompt: string, + targetAudience: string, vectorstore: HNSWLib, llms: LLMModels[], onTokenStream?: (token: string) => void, @@ -54,7 +59,8 @@ export const makeChain = ( prompt: CONDENSE_PROMPT, }); - const QA_PROMPT = makeQAPrompt(projectName, repositoryUrl); + // eslint-disable-next-line prettier/prettier + const QA_PROMPT = makeQAPrompt(projectName, repositoryUrl, contentType, chatPrompt, targetAudience); const docChain = loadQAChain( new OpenAIChat({ temperature: 0.2, diff --git a/src/cli/commands/query/index.ts b/src/cli/commands/query/index.ts index 42603f9..3a29bf6 100644 --- a/src/cli/commands/query/index.ts +++ b/src/cli/commands/query/index.ts @@ -28,7 +28,7 @@ const clearScreenAndMoveCursorToTop = () => { }; export const query = async ( - { name, repositoryUrl, output }: AutodocRepoConfig, + { name, repositoryUrl, output, contentType, chatPrompt, targetAudience}: AutodocRepoConfig, { llms }: AutodocUserConfig, ) => { const data = path.join(output, 'docs', 'data/'); @@ -36,6 +36,9 @@ export const query = async ( const chain = makeChain( name, repositoryUrl, + contentType, + chatPrompt, + targetAudience, vectorStore, llms, (token: string) => { diff --git a/src/cli/utils/FileUtil.ts b/src/cli/utils/FileUtil.ts index 3beb9ab..dd239e0 100644 --- a/src/cli/utils/FileUtil.ts +++ b/src/cli/utils/FileUtil.ts @@ -16,18 +16,28 @@ export const githubFileUrl = ( githubRoot: string, inputRoot: string, filePath: string, + linkHosted: boolean, ): string => { - return `${githubRoot}/blob/master/${filePath.substring( - inputRoot.length - 1, - )}`; + if (linkHosted) { + return `${githubRoot}/${filePath.substring(inputRoot.length - 1)}`; + } else { + return `${githubRoot}/blob/master/${filePath.substring( + inputRoot.length - 1, + )}`; + } }; export const githubFolderUrl = ( githubRoot: string, inputRoot: string, folderPath: string, + linkHosted: boolean, ): string => { - return `${githubRoot}/tree/master/${folderPath.substring( - inputRoot.length - 1, - )}`; + if (linkHosted) { + return `${githubRoot}/${folderPath.substring(inputRoot.length - 1)}`; + } else { + return `${githubRoot}/tree/master/${folderPath.substring( + inputRoot.length - 1, + )}`; + } }; diff --git a/src/cli/utils/traverseFileSystem.ts b/src/cli/utils/traverseFileSystem.ts index 14669b6..f2e15a7 100644 --- a/src/cli/utils/traverseFileSystem.ts +++ b/src/cli/utils/traverseFileSystem.ts @@ -8,8 +8,8 @@ export const traverseFileSystem = async ( params: TraverseFileSystemParams, ): Promise => { try { - const { inputPath, projectName, processFile, processFolder, ignore } = - params; + // eslint-disable-next-line prettier/prettier + const { inputPath, projectName, processFile, processFolder, ignore, filePrompt, folderPrompt, contentType, targetAudience, linkHosted } = params; try { await fs.access(inputPath); @@ -40,6 +40,10 @@ export const traverseFileSystem = async ( folderPath, projectName, shouldIgnore, + folderPrompt, + contentType, + targetAudience, + linkHosted, }); } }), @@ -55,6 +59,10 @@ export const traverseFileSystem = async ( fileName, filePath, projectName, + filePrompt, + contentType, + targetAudience, + linkHosted, }); } }), diff --git a/src/types.ts b/src/types.ts index a8f5e96..eb85968 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,6 +11,12 @@ export type AutodocRepoConfig = { output: string; llms: LLMModels[]; ignore: string[]; + filePrompt: string; + folderPrompt: string; + chatPrompt: string; + contentType: string; + targetAudience: string; + linkHosted: boolean; }; export type FileSummary = { @@ -25,6 +31,10 @@ export type ProcessFileParams = { fileName: string; filePath: string; projectName: string; + contentType: string; + filePrompt: string; + targetAudience: string; + linkHosted: boolean; }; export type ProcessFile = (params: ProcessFileParams) => Promise; @@ -43,6 +53,10 @@ export type ProcessFolderParams = { folderName: string; folderPath: string; projectName: string; + contentType: string; + folderPrompt: string; + targetAudience: string; + linkHosted: boolean; shouldIgnore: (fileName: string) => boolean; }; @@ -54,6 +68,11 @@ export type TraverseFileSystemParams = { processFile?: ProcessFile; processFolder?: ProcessFolder; ignore: string[]; + filePrompt: string; + folderPrompt: string; + contentType: string; + targetAudience: string; + linkHosted: boolean; }; export enum LLMModels {