Skip to content

Display name improvements #259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .vscode/sourcebot.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"folders": [
{
"path": ".."
},
{
"path": "../vendor/zoekt"
}
],
"settings": {
"files.associations": {
"*.json": "jsonc",
"index.json": "json"
}
}
}
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixes
- Change connection manager upsert timeout to 5 minutes
- Fix issue with repo display names being poorly formatted, especially for gerrit. ([#259](https://github.com/sourcebot-dev/sourcebot/pull/259))

## [3.0.1] - 2025-04-01

Expand Down
28 changes: 21 additions & 7 deletions packages/backend/src/git.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
import { simpleGit, SimpleGitProgressEvent } from 'simple-git';

export const cloneRepository = async (cloneURL: string, path: string, gitConfig?: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => {
const git = simpleGit({
progress: onProgress,
});

const configParams = Object.entries(gitConfig ?? {}).flatMap(
([key, value]) => ['--config', `${key}=${value}`]
);

try {
await git.clone(
cloneURL,
path,
[
"--bare",
...configParams
]
);

Expand Down Expand Up @@ -48,6 +42,26 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
}
}

/**
* Applies the gitConfig to the repo at the given path. Note that this will
* override the values for any existing keys, and append new values for keys
* that do not exist yet. It will _not_ remove any existing keys that are not
* present in gitConfig.
*/
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);

try {
for (const [key, value] of Object.entries(gitConfig)) {
await git.addConfig(key, value);
}
} catch (error) {
throw new Error(`Failed to set git config ${path}`);
}
}

export const getBranches = async (path: string) => {
const git = simpleGit();
const branches = await git.cwd({
Expand Down
77 changes: 50 additions & 27 deletions packages/backend/src/repoCompileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js";
import { GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { RepoMetadata } from './types.js';
import path from 'path';

export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;

Expand All @@ -29,10 +30,13 @@ export const compileGithubConfig = async (
const notFound = gitHubReposResult.notFound;

const hostUrl = config.url ?? 'https://github.com';
const hostname = new URL(hostUrl).hostname;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');

const repos = gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`;
const repoDisplayName = repo.full_name;
const repoName = path.join(repoNameRoot, repoDisplayName);
const cloneUrl = new URL(repo.clone_url!);

const record: RepoData = {
Expand All @@ -42,6 +46,7 @@ export const compileGithubConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: repo.html_url,
name: repoName,
displayName: repoDisplayName,
imageUrl: repo.owner.avatar_url,
isFork: repo.fork,
isArchived: !!repo.archived,
Expand All @@ -67,6 +72,7 @@ export const compileGithubConfig = async (
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
Expand All @@ -93,13 +99,16 @@ export const compileGitlabConfig = async (
const notFound = gitlabReposResult.notFound;

const hostUrl = config.url ?? 'https://gitlab.com';
const hostname = new URL(hostUrl).hostname;

const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');

const repos = gitlabRepos.map((project) => {
const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
const cloneUrl = new URL(project.http_url_to_repo);
const isFork = project.forked_from_project !== undefined;
const repoName = `${hostname}/${project.path_with_namespace}`;
const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName);

const record: RepoData = {
external_id: project.id.toString(),
Expand All @@ -108,6 +117,7 @@ export const compileGitlabConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: projectUrl,
name: repoName,
displayName: repoDisplayName,
imageUrl: project.avatar_url,
isFork: isFork,
isArchived: !!project.archived,
Expand All @@ -130,7 +140,8 @@ export const compileGitlabConfig = async (
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.private === false)
'zoekt.public': marshalBool(project.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
Expand All @@ -157,11 +168,14 @@ export const compileGiteaConfig = async (
const notFound = giteaReposResult.notFound;

const hostUrl = config.url ?? 'https://gitea.com';
const hostname = new URL(hostUrl).hostname;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');

const repos = giteaRepos.map((repo) => {
const cloneUrl = new URL(repo.clone_url!);
const repoName = `${hostname}/${repo.full_name!}`;
const repoDisplayName = repo.full_name!;
const repoName = path.join(repoNameRoot, repoDisplayName);

const record: RepoData = {
external_id: repo.id!.toString(),
Expand All @@ -170,6 +184,7 @@ export const compileGiteaConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: repo.html_url,
name: repoName,
displayName: repoDisplayName,
imageUrl: repo.owner?.avatar_url,
isFork: repo.fork!,
isArchived: !!repo.archived,
Expand All @@ -191,6 +206,7 @@ export const compileGiteaConfig = async (
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork!),
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
Expand All @@ -212,35 +228,41 @@ export const compileGerritConfig = async (
orgId: number) => {

const gerritRepos = await getGerritReposFromConfig(config);
const hostUrl = (config.url ?? 'https://gerritcodereview.com').replace(/\/$/, ''); // Remove trailing slash
const hostname = new URL(hostUrl).hostname;
const hostUrl = config.url;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');

const repos = gerritRepos.map((project) => {
const repoId = `${hostname}/${project.name}`;
const cloneUrl = new URL(`${config.url}/${encodeURIComponent(project.name)}`);
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
const repoDisplayName = project.name;
const repoName = path.join(repoNameRoot, repoDisplayName);

let webUrl = "https://www.gerritcodereview.com/";
// Gerrit projects can have multiple web links; use the first one
if (project.web_links) {
const webLink = project.web_links[0];
if (webLink) {
webUrl = webLink.url;
const webUrl = (() => {
if (!project.web_links || project.web_links.length === 0) {
return null;
}
}

// Handle case where webUrl is just a gitiles path
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
if (webUrl.startsWith('/plugins/gitiles/')) {
webUrl = `${hostUrl}${webUrl}`;
}
const webLink = project.web_links[0];
const webUrl = webLink.url;

// Handle case where webUrl is just a gitiles path
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
if (webUrl.startsWith('/plugins/gitiles/')) {
return path.join(hostUrl, webUrl);
} else {
return webUrl;
}
})();

const record: RepoData = {
external_id: project.id.toString(),
external_codeHostType: 'gerrit',
external_codeHostUrl: hostUrl,
cloneUrl: cloneUrl.toString(),
webUrl: webUrl,
name: project.name,
name: repoName,
displayName: repoDisplayName,
isFork: false,
isArchived: false,
org: {
Expand All @@ -256,11 +278,12 @@ export const compileGerritConfig = async (
metadata: {
gitConfig: {
'zoekt.web-url-type': 'gitiles',
'zoekt.web-url': webUrl,
'zoekt.name': repoId,
'zoekt.web-url': webUrl ?? '',
'zoekt.name': repoName,
'zoekt.archived': marshalBool(false),
'zoekt.fork': marshalBool(false),
'zoekt.public': marshalBool(true),
'zoekt.display-name': repoDisplayName,
},
} satisfies RepoMetadata,
};
Expand Down
16 changes: 11 additions & 5 deletions packages/backend/src/repoManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { Redis } from 'ioredis';
import { createLogger } from "./logger.js";
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { AppContext, Settings, RepoMetadata } from "./types.js";
import { AppContext, Settings, repoMetadataSchema } from "./types.js";
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
import { cloneRepository, fetchRepository } from "./git.js";
import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js";
import { existsSync, readdirSync, promises } from 'fs';
import { indexGitRepository } from "./zoekt.js";
import { PromClient } from './promClient.js';
Expand Down Expand Up @@ -200,8 +200,7 @@ export class RepoManager implements IRepoManager {
let cloneDuration_s: number | undefined = undefined;

const repoPath = getRepoPath(repo, this.ctx);
const metadata = repo.metadata as RepoMetadata;

const metadata = repoMetadataSchema.parse(repo.metadata);

// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
Expand Down Expand Up @@ -240,7 +239,7 @@ export class RepoManager implements IRepoManager {
}
}

const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;
Expand All @@ -249,6 +248,13 @@ export class RepoManager implements IRepoManager {
this.logger.info(`Cloned ${repo.id} in ${cloneDuration_s}s`);
}

// Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we
// have in the DB.
if (metadata.gitConfig) {
await upsertGitConfig(repoPath, metadata.gitConfig);
}

this.logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000;
Expand Down
21 changes: 13 additions & 8 deletions packages/backend/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
import { z } from "zod";

export type AppContext = {
/**
Expand All @@ -16,28 +17,32 @@ export type AppContext = {

export type Settings = Required<SettingsSchema>;

/**
* Structure of the `metadata` field in the `Repo` table.
*/
export type RepoMetadata = {
// Structure of the `metadata` field in the `Repo` table.
//
// @WARNING: If you modify this schema, please make sure it is backwards
// compatible with any prior versions of the schema!!
// @NOTE: If you move this schema, please update the comment in schema.prisma
// to point to the new location.
export const repoMetadataSchema = z.object({
/**
* A set of key-value pairs that will be used as git config
* variables when cloning the repo.
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
*/
gitConfig?: Record<string, string>;
gitConfig: z.record(z.string(), z.string()).optional(),

/**
* A list of branches to index. Glob patterns are supported.
*/
branches?: string[];
branches: z.array(z.string()).optional(),

/**
* A list of tags to index. Glob patterns are supported.
*/
tags?: string[];
}
tags: z.array(z.string()).optional(),
});

export type RepoMetadata = z.infer<typeof repoMetadataSchema>;

// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {
Expand Down
16 changes: 13 additions & 3 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { exec } from "child_process";
import { AppContext, RepoMetadata, Settings } from "./types.js";
import { AppContext, repoMetadataSchema, Settings } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { getShardPrefix } from "./utils.js";
Expand All @@ -17,7 +17,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap

const repoPath = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repo.metadata as RepoMetadata;
const metadata = repoMetadataSchema.parse(repo.metadata);

if (metadata.branches) {
const branchGlobs = metadata.branches
Expand Down Expand Up @@ -57,7 +57,17 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
revisions = revisions.slice(0, 64);
}

const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -max_trigram_count ${settings.maxTrigramCount} -file_limit ${settings.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;
const command = [
'zoekt-git-index',
'-allow_missing_branches',
`-index ${ctx.indexPath}`,
`-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`,
`-tenant_id ${repo.orgId}`,
`-shard_prefix ${shardPrefix}`,
repoPath
].join(' ');

return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "Repo" ADD COLUMN "displayName" TEXT;
Loading