Skip to content

Commit ae839b0

Browse files
authored
[clangd] [C++20] [Modules] Add scanning cache (#125988)
Previously, everytime we want to get a source file declaring a specific module, we need to scan the whole projects again and again. The performance is super bad. This patch tries to improve this by introducing a simple cache.
1 parent a522c22 commit ae839b0

File tree

4 files changed

+164
-20
lines changed

4 files changed

+164
-20
lines changed

clang-tools-extra/clangd/ModulesBuilder.cpp

Lines changed: 89 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -357,19 +357,89 @@ void ModuleFileCache::remove(StringRef ModuleName) {
357357
ModuleFiles.erase(ModuleName);
358358
}
359359

360+
class ModuleNameToSourceCache {
361+
public:
362+
std::string getSourceForModuleName(llvm::StringRef ModuleName) {
363+
std::lock_guard<std::mutex> Lock(CacheMutex);
364+
auto Iter = ModuleNameToSourceCache.find(ModuleName);
365+
if (Iter != ModuleNameToSourceCache.end())
366+
return Iter->second;
367+
return "";
368+
}
369+
370+
void addEntry(llvm::StringRef ModuleName, PathRef Source) {
371+
std::lock_guard<std::mutex> Lock(CacheMutex);
372+
ModuleNameToSourceCache[ModuleName] = Source.str();
373+
}
374+
375+
void eraseEntry(llvm::StringRef ModuleName) {
376+
std::lock_guard<std::mutex> Lock(CacheMutex);
377+
ModuleNameToSourceCache.erase(ModuleName);
378+
}
379+
380+
private:
381+
std::mutex CacheMutex;
382+
llvm::StringMap<std::string> ModuleNameToSourceCache;
383+
};
384+
385+
class CachingProjectModules : public ProjectModules {
386+
public:
387+
CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
388+
ModuleNameToSourceCache &Cache)
389+
: MDB(std::move(MDB)), Cache(Cache) {
390+
assert(this->MDB && "CachingProjectModules should only be created with a "
391+
"valid underlying ProjectModules");
392+
}
393+
394+
std::vector<std::string> getRequiredModules(PathRef File) override {
395+
return MDB->getRequiredModules(File);
396+
}
397+
398+
std::string getModuleNameForSource(PathRef File) override {
399+
return MDB->getModuleNameForSource(File);
400+
}
401+
402+
std::string getSourceForModuleName(llvm::StringRef ModuleName,
403+
PathRef RequiredSrcFile) override {
404+
std::string CachedResult = Cache.getSourceForModuleName(ModuleName);
405+
406+
// Verify Cached Result by seeing if the source declaring the same module
407+
// as we query.
408+
if (!CachedResult.empty()) {
409+
std::string ModuleNameOfCachedSource =
410+
MDB->getModuleNameForSource(CachedResult);
411+
if (ModuleNameOfCachedSource == ModuleName)
412+
return CachedResult;
413+
414+
// Cached Result is invalid. Clear it.
415+
Cache.eraseEntry(ModuleName);
416+
}
417+
418+
auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
419+
Cache.addEntry(ModuleName, Result);
420+
421+
return Result;
422+
}
423+
424+
private:
425+
std::unique_ptr<ProjectModules> MDB;
426+
ModuleNameToSourceCache &Cache;
427+
};
428+
360429
/// Collect the directly and indirectly required module names for \param
361430
/// ModuleName in topological order. The \param ModuleName is guaranteed to
362431
/// be the last element in \param ModuleNames.
363-
llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
432+
llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
433+
CachingProjectModules &MDB,
364434
StringRef ModuleName) {
365435
llvm::SmallVector<llvm::StringRef> ModuleNames;
366436
llvm::StringSet<> ModuleNamesSet;
367437

368438
auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
369439
ModuleNamesSet.insert(ModuleName);
370440

371-
for (StringRef RequiredModuleName :
372-
MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
441+
for (StringRef RequiredModuleName : MDB.getRequiredModules(
442+
MDB.getSourceForModuleName(ModuleName, RequiredSource)))
373443
if (ModuleNamesSet.insert(RequiredModuleName).second)
374444
Visitor(RequiredModuleName, Visitor);
375445

@@ -386,24 +456,29 @@ class ModulesBuilder::ModulesBuilderImpl {
386456
public:
387457
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
388458

459+
ModuleNameToSourceCache &getProjectModulesCache() {
460+
return ProjectModulesCache;
461+
}
389462
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
390463

391464
llvm::Error
392-
getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
393-
ProjectModules &MDB,
465+
getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
466+
const ThreadsafeFS &TFS, CachingProjectModules &MDB,
394467
ReusablePrerequisiteModules &BuiltModuleFiles);
395468

396469
private:
397470
ModuleFileCache Cache;
471+
ModuleNameToSourceCache ProjectModulesCache;
398472
};
399473

400474
llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
401-
StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
402-
ReusablePrerequisiteModules &BuiltModuleFiles) {
475+
PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
476+
CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) {
403477
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
404478
return llvm::Error::success();
405479

406-
PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
480+
std::string ModuleUnitFileName =
481+
MDB.getSourceForModuleName(ModuleName, RequiredSource);
407482
/// It is possible that we're meeting third party modules (modules whose
408483
/// source are not in the project. e.g, the std module may be a third-party
409484
/// module for most project) or something wrong with the implementation of
@@ -416,7 +491,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
416491
llvm::formatv("Don't get the module unit for module {0}", ModuleName));
417492

418493
// Get Required modules in topological order.
419-
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
494+
auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
420495
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
421496
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
422497
continue;
@@ -454,16 +529,19 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
454529
elog("Failed to get Project Modules information for {0}", File);
455530
return std::make_unique<FailedPrerequisiteModules>();
456531
}
532+
CachingProjectModules CachedMDB(std::move(MDB),
533+
Impl->getProjectModulesCache());
457534

458-
std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
535+
std::vector<std::string> RequiredModuleNames =
536+
CachedMDB.getRequiredModules(File);
459537
if (RequiredModuleNames.empty())
460538
return std::make_unique<ReusablePrerequisiteModules>();
461539

462540
auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
463541
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
464542
// Return early if there is any error.
465543
if (llvm::Error Err = Impl->getOrBuildModuleFile(
466-
RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
544+
File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) {
467545
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
468546
toString(std::move(Err)));
469547
return std::make_unique<FailedPrerequisiteModules>();

clang-tools-extra/clangd/ProjectModules.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ class ProjectModules {
4242
llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
4343

4444
virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
45-
virtual PathRef
46-
getSourceForModuleName(llvm::StringRef ModuleName,
47-
PathRef RequiredSrcFile = PathRef()) = 0;
45+
virtual std::string getModuleNameForSource(PathRef File) = 0;
46+
virtual std::string getSourceForModuleName(llvm::StringRef ModuleName,
47+
PathRef RequiredSrcFile) = 0;
4848

4949
virtual void setCommandMangler(CommandMangler Mangler) {}
5050

clang-tools-extra/clangd/ScanningProjectModules.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
134134

135135
void ModuleDependencyScanner::globalScan(
136136
const ProjectModules::CommandMangler &Mangler) {
137+
if (GlobalScanned)
138+
return;
139+
137140
for (auto &File : CDB->getAllFiles())
138141
scan(File, Mangler);
139142

@@ -189,11 +192,18 @@ class ScanningAllProjectModules : public ProjectModules {
189192

190193
/// RequiredSourceFile is not used intentionally. See the comments of
191194
/// ModuleDependencyScanner for detail.
192-
PathRef
193-
getSourceForModuleName(llvm::StringRef ModuleName,
194-
PathRef RequiredSourceFile = PathRef()) override {
195+
std::string getSourceForModuleName(llvm::StringRef ModuleName,
196+
PathRef RequiredSourceFile) override {
195197
Scanner.globalScan(Mangler);
196-
return Scanner.getSourceForModuleName(ModuleName);
198+
return Scanner.getSourceForModuleName(ModuleName).str();
199+
}
200+
201+
std::string getModuleNameForSource(PathRef File) override {
202+
auto ScanningResult = Scanner.scan(File, Mangler);
203+
if (!ScanningResult || !ScanningResult->ModuleName)
204+
return {};
205+
206+
return *ScanningResult->ModuleName;
197207
}
198208

199209
private:

clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,54 @@
2727
namespace clang::clangd {
2828
namespace {
2929

30+
class GlobalScanningCounterProjectModules : public ProjectModules {
31+
public:
32+
GlobalScanningCounterProjectModules(
33+
std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count)
34+
: Underlying(std::move(Underlying)), Count(Count) {}
35+
36+
std::vector<std::string> getRequiredModules(PathRef File) override {
37+
return Underlying->getRequiredModules(File);
38+
}
39+
40+
std::string getModuleNameForSource(PathRef File) override {
41+
return Underlying->getModuleNameForSource(File);
42+
}
43+
44+
void setCommandMangler(CommandMangler Mangler) override {
45+
Underlying->setCommandMangler(std::move(Mangler));
46+
}
47+
48+
std::string getSourceForModuleName(llvm::StringRef ModuleName,
49+
PathRef RequiredSrcFile) override {
50+
Count++;
51+
return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile);
52+
}
53+
54+
private:
55+
std::unique_ptr<ProjectModules> Underlying;
56+
std::atomic<unsigned> &Count;
57+
};
58+
3059
class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
3160
public:
3261
MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS)
3362
: MockCompilationDatabase(TestDir),
3463
MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)),
35-
TFS(TFS) {
64+
TFS(TFS), GlobalScanningCount(0) {
3665
this->ExtraClangFlags.push_back("-std=c++20");
3766
this->ExtraClangFlags.push_back("-c");
3867
}
3968

4069
void addFile(llvm::StringRef Path, llvm::StringRef Contents);
4170

4271
std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override {
43-
return scanningProjectModules(MockedCDBPtr, TFS);
72+
return std::make_unique<GlobalScanningCounterProjectModules>(
73+
scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount);
4474
}
4575

76+
unsigned getGlobalScanningCount() const { return GlobalScanningCount; }
77+
4678
private:
4779
class MockClangCompilationDatabase : public tooling::CompilationDatabase {
4880
public:
@@ -68,6 +100,8 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
68100

69101
std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr;
70102
const ThreadsafeFS &TFS;
103+
104+
mutable std::atomic<unsigned> GlobalScanningCount;
71105
};
72106

73107
// Add files to the working testing directory and the compilation database.
@@ -590,6 +624,28 @@ export constexpr int M = 43;
590624
EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
591625
}
592626

627+
TEST_F(PrerequisiteModulesTests, ScanningCacheTest) {
628+
MockDirectoryCompilationDatabase CDB(TestDir, FS);
629+
630+
CDB.addFile("M.cppm", R"cpp(
631+
export module M;
632+
)cpp");
633+
CDB.addFile("A.cppm", R"cpp(
634+
export module A;
635+
import M;
636+
)cpp");
637+
CDB.addFile("B.cppm", R"cpp(
638+
export module B;
639+
import M;
640+
)cpp");
641+
642+
ModulesBuilder Builder(CDB);
643+
644+
Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
645+
Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
646+
EXPECT_EQ(CDB.getGlobalScanningCount(), 1u);
647+
}
648+
593649
} // namespace
594650
} // namespace clang::clangd
595651

0 commit comments

Comments
 (0)