Skip to content

Commit 0f38c54

Browse files
authored
[llvm-debuginfo-analyzer] Add support for parsing DWARF / CodeView SourceLanguage (#137223)
This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically, - The `LVSourceLanguage` class is introduced to represent any supported language by any of the debug info representations. - Update `LVDWARFReader.cpp` and `LVCodeViewVisitor.cpp` to parse the source language where it applies. Added a new `=Language` attribute; `getAttributeLanguage()` is internally used to control whether this information is being printed.
1 parent 7809b14 commit 0f38c54

32 files changed

+202
-22
lines changed

llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,13 @@ toolchain name, binary file format, etc.
134134
The following attributes describe the most common information for a
135135
logical element. They help to identify the lexical scope level; the
136136
element visibility across modules (global, local); the toolchain name
137-
that produced the binary file.
137+
and source language that produced the binary file.
138138

139139
.. code-block:: text
140140
141141
=global: Element referenced across Compile Units.
142142
=format: Object file format name.
143+
=language: Source language name.
143144
=level: Lexical scope level (File=0, Compile Unit=1).
144145
=local: Element referenced only in the Compile Unit.
145146
=producer: Toolchain identification name.
@@ -231,6 +232,7 @@ toolchain name, binary file format, etc.
231232
=filename
232233
=files
233234
=format
235+
=language
234236
=level
235237
=producer
236238
=publics

llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
1616

1717
#include "llvm/DebugInfo/LogicalView/Core/LVObject.h"
18+
#include "llvm/DebugInfo/LogicalView/Core/LVSourceLanguage.h"
1819
#include "llvm/Support/Casting.h"
1920
#include "llvm/Support/Compiler.h"
2021
#include "llvm/Support/MathExtras.h"
@@ -221,6 +222,9 @@ class LLVM_ABI LVElement : public LVObject {
221222
virtual StringRef getProducer() const { return StringRef(); }
222223
virtual void setProducer(StringRef ProducerName) {}
223224

225+
virtual LVSourceLanguage getSourceLanguage() const { return {}; }
226+
virtual void setSourceLanguage(LVSourceLanguage SL) {}
227+
224228
virtual bool isCompileUnit() const { return false; }
225229
virtual bool isRoot() const { return false; }
226230

llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ enum class LVAttributeKind {
107107
Generated, // --attribute=generated
108108
Global, // --attribute=global
109109
Inserted, // --attribute=inserted
110+
Language, // --attribute=language
110111
Level, // --attribute=level
111112
Linkage, // --attribute=linkage
112113
Local, // --attribute=local
@@ -338,6 +339,7 @@ class LVOptions {
338339
ATTRIBUTE_OPTION(Generated);
339340
ATTRIBUTE_OPTION(Global);
340341
ATTRIBUTE_OPTION(Inserted);
342+
ATTRIBUTE_OPTION(Language);
341343
ATTRIBUTE_OPTION(Level);
342344
ATTRIBUTE_OPTION(Linkage);
343345
ATTRIBUTE_OPTION(Location);

llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,9 @@ class LLVM_ABI LVScopeCompileUnit final : public LVScope {
419419
// Compilation directory name.
420420
size_t CompilationDirectoryIndex = 0;
421421

422+
// Source language.
423+
LVSourceLanguage SourceLanguage{};
424+
422425
// Used by the CodeView Reader.
423426
codeview::CPUType CompilationCPUType = codeview::CPUType::X64;
424427

@@ -549,6 +552,9 @@ class LLVM_ABI LVScopeCompileUnit final : public LVScope {
549552
ProducerIndex = getStringPool().getIndex(ProducerName);
550553
}
551554

555+
LVSourceLanguage getSourceLanguage() const override { return SourceLanguage; }
556+
void setSourceLanguage(LVSourceLanguage SL) override { SourceLanguage = SL; }
557+
552558
void setCPUType(codeview::CPUType Type) { CompilationCPUType = Type; }
553559
codeview::CPUType getCPUType() { return CompilationCPUType; }
554560

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//===-- LVSourceLanguage.h --------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines the LVSourceLanguage struct, a unified representation of
10+
// the source language used in a compile unit.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVSOURCELANGUAGE_H
15+
#define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVSOURCELANGUAGE_H
16+
17+
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/BinaryFormat/Dwarf.h"
19+
#include "llvm/DebugInfo/CodeView/CodeView.h"
20+
21+
namespace llvm {
22+
namespace logicalview {
23+
24+
/// A source language supported by any of the debug info representations.
25+
struct LVSourceLanguage {
26+
static constexpr unsigned TagDwarf = 0x00;
27+
static constexpr unsigned TagCodeView = 0x01;
28+
29+
enum TaggedLanguage : uint32_t {
30+
Invalid = -1U,
31+
32+
// DWARF
33+
#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
34+
DW_LANG_##NAME = (TagDwarf << 16) | ID,
35+
#include "llvm/BinaryFormat/Dwarf.def"
36+
// CodeView
37+
#define CV_LANGUAGE(NAME, ID) CV_LANG_##NAME = (TagCodeView << 16) | ID,
38+
#include "llvm/DebugInfo/CodeView/CodeViewLanguages.def"
39+
};
40+
41+
LVSourceLanguage() = default;
42+
LVSourceLanguage(llvm::dwarf::SourceLanguage SL)
43+
: LVSourceLanguage(TagDwarf, SL) {}
44+
LVSourceLanguage(llvm::codeview::SourceLanguage SL)
45+
: LVSourceLanguage(TagCodeView, SL) {}
46+
bool operator==(const LVSourceLanguage &SL) const {
47+
return get() == SL.get();
48+
}
49+
bool operator==(const LVSourceLanguage::TaggedLanguage &TL) const {
50+
return get() == TL;
51+
}
52+
53+
bool isValid() const { return Language != Invalid; }
54+
TaggedLanguage get() const { return Language; }
55+
StringRef getName() const;
56+
57+
private:
58+
TaggedLanguage Language = Invalid;
59+
60+
LVSourceLanguage(unsigned Tag, unsigned Lang)
61+
: Language(static_cast<TaggedLanguage>((Tag << 16) | Lang)) {}
62+
unsigned getTag() const { return Language >> 16; }
63+
unsigned getLang() const { return Language & 0xffff; }
64+
};
65+
66+
} // end namespace logicalview
67+
} // end namespace llvm
68+
69+
#endif // LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVSOURCELANGUAGE_H

llvm/lib/DebugInfo/LogicalView/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ add_lv_impl_folder(Core
1414
Core/LVReader.cpp
1515
Core/LVScope.cpp
1616
Core/LVSort.cpp
17+
Core/LVSourceLanguage.cpp
1718
Core/LVSupport.cpp
1819
Core/LVSymbol.cpp
1920
Core/LVType.cpp

llvm/lib/DebugInfo/LogicalView/Core/LVOptions.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ void LVOptions::resolveDependencies() {
3939
setAttributeFilename();
4040
setAttributeFiles();
4141
setAttributeFormat();
42+
setAttributeLanguage();
4243
setAttributeLevel();
4344
setAttributeProducer();
4445
setAttributePublics();

llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,11 +1717,19 @@ void LVScopeCompileUnit::print(raw_ostream &OS, bool Full) const {
17171717

17181718
void LVScopeCompileUnit::printExtra(raw_ostream &OS, bool Full) const {
17191719
OS << formattedKind(kind()) << " '" << getName() << "'\n";
1720-
if (options().getPrintFormatting() && options().getAttributeProducer())
1721-
printAttributes(OS, Full, "{Producer} ",
1722-
const_cast<LVScopeCompileUnit *>(this), getProducer(),
1723-
/*UseQuotes=*/true,
1724-
/*PrintRef=*/false);
1720+
if (options().getPrintFormatting()) {
1721+
if (options().getAttributeProducer())
1722+
printAttributes(OS, Full, "{Producer} ",
1723+
const_cast<LVScopeCompileUnit *>(this), getProducer(),
1724+
/*UseQuotes=*/true,
1725+
/*PrintRef=*/false);
1726+
if (options().getAttributeLanguage())
1727+
if (auto SL = getSourceLanguage(); SL.isValid())
1728+
printAttributes(OS, Full, "{Language} ",
1729+
const_cast<LVScopeCompileUnit *>(this), SL.getName(),
1730+
/*UseQuotes=*/true,
1731+
/*PrintRef=*/false);
1732+
}
17251733

17261734
// Reset file index, to allow its children to print the correct filename.
17271735
options().resetFilenameIndex();
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===-- LVSourceLanguage.cpp ----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements LVSourceLanguage.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/DebugInfo/LogicalView/Core/LVSourceLanguage.h"
14+
#include "llvm/DebugInfo/CodeView/EnumTables.h"
15+
#include "llvm/Support/ScopedPrinter.h"
16+
17+
using namespace llvm;
18+
using namespace llvm::logicalview;
19+
20+
StringRef LVSourceLanguage::getName() const {
21+
if (!isValid())
22+
return {};
23+
switch (getTag()) {
24+
case LVSourceLanguage::TagDwarf:
25+
return llvm::dwarf::LanguageString(getLang());
26+
case LVSourceLanguage::TagCodeView: {
27+
static auto LangNames = llvm::codeview::getSourceLanguageNames();
28+
return LangNames[getLang()].Name;
29+
}
30+
default:
31+
llvm_unreachable("Unsupported language");
32+
}
33+
}

llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,9 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
949949
Scope->setName(CurrentObjectName);
950950
if (options().getAttributeProducer())
951951
Scope->setProducer(Compile2.Version);
952+
if (options().getAttributeLanguage())
953+
Scope->setSourceLanguage(LVSourceLanguage{
954+
static_cast<llvm::codeview::SourceLanguage>(Compile2.getLanguage())});
952955
getReader().isSystemEntry(Scope, CurrentObjectName);
953956

954957
// The line records in CodeView are recorded per Module ID. Update
@@ -994,6 +997,9 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
994997
Scope->setName(CurrentObjectName);
995998
if (options().getAttributeProducer())
996999
Scope->setProducer(Compile3.Version);
1000+
if (options().getAttributeLanguage())
1001+
Scope->setSourceLanguage(LVSourceLanguage{
1002+
static_cast<llvm::codeview::SourceLanguage>(Compile3.getLanguage())});
9971003
getReader().isSystemEntry(Scope, CurrentObjectName);
9981004

9991005
// The line records in CodeView are recorded per Module ID. Update

llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
172172
if (options().getAttributeProducer())
173173
CurrentElement->setProducer(dwarf::toStringRef(FormValue));
174174
break;
175+
case dwarf::DW_AT_language:
176+
if (options().getAttributeLanguage())
177+
CurrentElement->setSourceLanguage(LVSourceLanguage{
178+
static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
179+
break;
175180
case dwarf::DW_AT_upper_bound:
176181
CurrentElement->setUpperBound(GetBoundValue(FormValue));
177182
break;

llvm/test/tools/llvm-debuginfo-analyzer/COFF/02-coff-logical-lines.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
; The logical views shows the intermixed lines and assembler instructions,
1616
; allowing to compare the code generated by the different toolchains.
1717

18-
; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
18+
; RUN: llvm-debuginfo-analyzer --attribute=language,level,format,producer \
1919
; RUN: --print=lines,instructions \
2020
; RUN: %p/Inputs/hello-world-codeview-clang.o \
2121
; RUN: %p/Inputs/hello-world-codeview-msvc.o 2>&1 | \
@@ -26,6 +26,7 @@
2626
; ONE-EMPTY:
2727
; ONE-NEXT: [001] {CompileUnit} 'hello-world.cpp'
2828
; ONE-NEXT: [002] {Producer} 'clang version 15.0.0 {{.*}}'
29+
; ONE-NEXT: [002] {Language} 'Cpp'
2930
; ONE-NEXT: [002] {Function} extern not_inlined 'main' -> 'int'
3031
; ONE-NEXT: [003] 4 {Line}
3132
; ONE-NEXT: [003] {Code} 'subq $0x28, %rsp'
@@ -43,6 +44,7 @@
4344
; ONE-EMPTY:
4445
; ONE-NEXT: [001] {CompileUnit} 'hello-world.cpp'
4546
; ONE-NEXT: [002] {Producer} 'Microsoft (R) Optimizing Compiler'
47+
; ONE-NEXT: [002] {Language} 'Cpp'
4648
; ONE-NEXT: [002] {Function} extern not_inlined 'main' -> 'int'
4749
; ONE-NEXT: [003] 4 {Line}
4850
; ONE-NEXT: [003] {Code} 'subq $0x28, %rsp'

llvm/test/tools/llvm-debuginfo-analyzer/COFF/03-coff-incorrect-lexical-scope-typedef.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
; emits both typedefs at the same lexical scope (3), which is wrong.
3131
; GCC and MSVC emit correct lexical scope for both typedefs.
3232

33-
; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
33+
; RUN: llvm-debuginfo-analyzer --attribute=language,level,format,producer \
3434
; RUN: --output-sort=kind \
3535
; RUN: --print=symbols,types,lines \
3636
; RUN: %p/Inputs/pr-44884-codeview-clang.o \
@@ -42,6 +42,7 @@
4242
; ONE-EMPTY:
4343
; ONE-NEXT: [001] {CompileUnit} 'pr-44884.cpp'
4444
; ONE-NEXT: [002] {Producer} 'clang version 15.0.0 {{.*}}'
45+
; ONE-NEXT: [002] {Language} 'Cpp'
4546
; ONE-NEXT: [002] {Function} extern not_inlined 'bar' -> 'int'
4647
; ONE-NEXT: [003] {Parameter} 'Input' -> 'float'
4748
; ONE-NEXT: [003] 1 {Line}
@@ -63,6 +64,7 @@
6364
; ONE-EMPTY:
6465
; ONE-NEXT: [001] {CompileUnit} 'pr-44884.cpp'
6566
; ONE-NEXT: [002] {Producer} 'Microsoft (R) Optimizing Compiler'
67+
; ONE-NEXT: [002] {Language} 'Cpp'
6668
; ONE-NEXT: [002] {Function} extern not_inlined 'bar' -> 'int'
6769
; ONE-NEXT: [003] {Variable} 'Input' -> 'float'
6870
; ONE-NEXT: [003] 1 {Line}

llvm/test/tools/llvm-debuginfo-analyzer/COFF/04-coff-missing-nested-enumerators.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
; references to the enumerators 'RED' and 'BLUE'. The CodeView generated
2626
; by GCC and MSVC, does include such references.
2727

28-
; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer,size \
28+
; RUN: llvm-debuginfo-analyzer --attribute=language,level,format,producer,size \
2929
; RUN: --output-sort=name \
3030
; RUN: --print=symbols,types \
3131
; RUN: %p/Inputs/pr-46466-codeview-clang.o \
@@ -37,6 +37,7 @@
3737
; ONE-EMPTY:
3838
; ONE-NEXT: [001] {CompileUnit} 'pr-46466.cpp'
3939
; ONE-NEXT: [002] {Producer} 'clang version 15.0.0 {{.*}}'
40+
; ONE-NEXT: [002] {Language} 'Cpp'
4041
; ONE-NEXT: [002] {Variable} extern 'S' -> 'Struct'
4142
; ONE-NEXT: [002] 1 {Struct} 'Struct' [Size = 1]
4243
; ONE-NEXT: [003] {Member} public 'U' -> 'Union'
@@ -50,6 +51,7 @@
5051
; ONE-EMPTY:
5152
; ONE-NEXT: [001] {CompileUnit} 'pr-46466.cpp'
5253
; ONE-NEXT: [002] {Producer} 'Microsoft (R) Optimizing Compiler'
54+
; ONE-NEXT: [002] {Language} 'Cpp'
5355
; ONE-NEXT: [002] {Variable} extern 'S' -> 'Struct'
5456
; ONE-NEXT: [002] 1 {Struct} 'Struct' [Size = 1]
5557
; ONE-NEXT: [003] {Member} public 'U' -> 'Union'

llvm/test/tools/llvm-debuginfo-analyzer/COFF/05-coff-incorrect-lexical-scope-variable.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
; The CodeView generated by MSVC, show those variables at the correct
3232
; lexical scope: '3' and '4' respectively.
3333

34-
; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
34+
; RUN: llvm-debuginfo-analyzer --attribute=language,level,format,producer \
3535
; RUN: --output-sort=name \
3636
; RUN: --print=symbols \
3737
; RUN: %p/Inputs/pr-43860-codeview-clang.o \
@@ -43,6 +43,7 @@
4343
; ONE-EMPTY:
4444
; ONE-NEXT: [001] {CompileUnit} 'pr-43860.cpp'
4545
; ONE-NEXT: [002] {Producer} 'clang version 15.0.0 {{.*}}'
46+
; ONE-NEXT: [002] {Language} 'Cpp'
4647
; ONE-NEXT: [002] 2 {Function} inlined 'InlineFunction' -> 'int'
4748
; ONE-NEXT: [003] {Parameter} '' -> 'int'
4849
; ONE-NEXT: [002] {Function} extern not_inlined 'test' -> 'int'
@@ -59,6 +60,7 @@
5960
; ONE-EMPTY:
6061
; ONE-NEXT: [001] {CompileUnit} 'pr-43860.cpp'
6162
; ONE-NEXT: [002] {Producer} 'Microsoft (R) Optimizing Compiler'
63+
; ONE-NEXT: [002] {Language} 'Cpp'
6264
; ONE-NEXT: [002] {Function} extern declared_inlined 'InlineFunction' -> 'int'
6365
; ONE-NEXT: [003] {Block}
6466
; ONE-NEXT: [004] {Variable} 'Var_2' -> 'int'

llvm/test/tools/llvm-debuginfo-analyzer/COFF/06-coff-full-logical-view.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
; ONE-EMPTY:
2929
; ONE-NEXT: [0x0000000000][001] {CompileUnit} 'test.cpp'
3030
; ONE-NEXT: [0x0000000000][002] {Producer} 'clang version 15.0.0 {{.*}}'
31+
; ONE-NEXT: [0x0000000000][002] {Language} 'Cpp'
3132
; ONE-NEXT: {Directory} 'test.cpp'
3233
; ONE-NEXT: {Directory} 'x:/tests/input'
3334
; ONE-NEXT: {File} 'general'

llvm/test/tools/llvm-debuginfo-analyzer/DWARF/02-dwarf-logical-lines.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
; The logical views shows the intermixed lines and assembler instructions,
1616
; allowing to compare the code generated by the different toolchains.
1717

18-
; RUN: llvm-debuginfo-analyzer --attribute=level,format,producer \
18+
; RUN: llvm-debuginfo-analyzer --attribute=language,level,format,producer \
1919
; RUN: --print=lines,instructions \
2020
; RUN: %p/Inputs/hello-world-dwarf-clang.o \
2121
; RUN: %p/Inputs/hello-world-dwarf-gcc.o 2>&1 | \
@@ -26,6 +26,7 @@
2626
; ONE-EMPTY:
2727
; ONE-NEXT: [001] {CompileUnit} 'hello-world.cpp'
2828
; ONE-NEXT: [002] {Producer} 'clang version 15.0.0 {{.*}}'
29+
; ONE-NEXT: [002] {Language} 'DW_LANG_C_plus_plus_14'
2930
; ONE-NEXT: [002] 3 {Function} extern not_inlined 'main' -> 'int'
3031
; ONE-NEXT: [003] 4 {Line}
3132
; ONE-NEXT: [003] {Code} 'pushq %rbp'
@@ -48,6 +49,7 @@
4849
; ONE-EMPTY:
4950
; ONE-NEXT: [001] {CompileUnit} 'hello-world.cpp'
5051
; ONE-NEXT: [002] {Producer} 'GNU C++14 10.3.0 {{.*}}'
52+
; ONE-NEXT: [002] {Language} 'DW_LANG_C_plus_plus'
5153
; ONE-NEXT: [002] 3 {Function} extern not_inlined 'main' -> 'int'
5254
; ONE-NEXT: [003] 4 {Line}
5355
; ONE-NEXT: [003] {Code} 'endbr64'

0 commit comments

Comments
 (0)