Skip to content

Commit 4835572

Browse files
authored
[analyzer] Check the correct first and last elements in cstring.UninitializedRead (llvm#95408)
I intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives: ```c++ int t[] = {1,2,3}; memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn ``` The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here. A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is _initialized_, it can only check if its a well-defined character or not. In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type. Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100 Before my patch, there were 87. https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100
1 parent d6af73e commit 4835572

File tree

4 files changed

+280
-66
lines changed

4 files changed

+280
-66
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/ADT/iterator_range.h"
3535
#include "llvm/Support/Allocator.h"
3636
#include "llvm/Support/Casting.h"
37+
#include "llvm/Support/ErrorHandling.h"
3738
#include <cassert>
3839
#include <cstdint>
3940
#include <limits>
@@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode {
99100
#define REGION(Id, Parent) Id ## Kind,
100101
#define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
101102
#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
103+
#undef REGION
104+
#undef REGION_RANGE
102105
};
103106

104107
private:
@@ -171,6 +174,8 @@ class MemRegion : public llvm::FoldingSetNode {
171174

172175
Kind getKind() const { return kind; }
173176

177+
StringRef getKindStr() const;
178+
174179
template<typename RegionTy> const RegionTy* getAs() const;
175180
template <typename RegionTy>
176181
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const;

clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Lines changed: 168 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "InterCheckerAPI.h"
15+
#include "clang/AST/OperationKinds.h"
1516
#include "clang/Basic/Builtins.h"
1617
#include "clang/Basic/CharInfo.h"
1718
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -22,10 +23,13 @@
2223
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
2324
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
2425
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
26+
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
2527
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28+
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
29+
#include "llvm/ADT/APSInt.h"
2630
#include "llvm/ADT/STLExtras.h"
27-
#include "llvm/ADT/SmallString.h"
2831
#include "llvm/ADT/StringExtras.h"
32+
#include "llvm/Support/Casting.h"
2933
#include "llvm/Support/raw_ostream.h"
3034
#include <functional>
3135
#include <optional>
@@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call,
304308
// Re-usable checks
305309
ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
306310
AnyArgExpr Arg, SVal l) const;
311+
// Check whether the origin region behind \p Element (like the actual array
312+
// region \p Element is from) is initialized.
313+
ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
314+
AnyArgExpr Buffer, SVal Element, SVal Size) const;
307315
ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
308316
AnyArgExpr Buffer, SVal Element,
309317
AccessKind Access,
@@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call,
329337
const Stmt *S, StringRef WarningMsg) const;
330338
void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
331339
void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
332-
const Expr *E) const;
340+
const Expr *E, StringRef Msg) const;
333341
ProgramStateRef checkAdditionOverflow(CheckerContext &C,
334342
ProgramStateRef state,
335343
NonLoc left,
@@ -351,16 +359,16 @@ REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
351359
// Individual checks and utility methods.
352360
//===----------------------------------------------------------------------===//
353361

354-
std::pair<ProgramStateRef , ProgramStateRef >
355-
CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
362+
std::pair<ProgramStateRef, ProgramStateRef>
363+
CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
356364
QualType Ty) {
357365
std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
358366
if (!val)
359-
return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
367+
return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
360368

361369
SValBuilder &svalBuilder = C.getSValBuilder();
362370
DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
363-
return state->assume(svalBuilder.evalEQ(state, *val, zero));
371+
return State->assume(svalBuilder.evalEQ(State, *val, zero));
364372
}
365373

366374
ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
@@ -393,6 +401,149 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
393401
return stateNonNull;
394402
}
395403

404+
static std::optional<NonLoc> getIndex(ProgramStateRef State,
405+
const ElementRegion *ER, CharKind CK) {
406+
SValBuilder &SVB = State->getStateManager().getSValBuilder();
407+
ASTContext &Ctx = SVB.getContext();
408+
409+
if (CK == CharKind::Regular) {
410+
if (ER->getValueType() != Ctx.CharTy)
411+
return {};
412+
return ER->getIndex();
413+
}
414+
415+
if (ER->getValueType() != Ctx.WideCharTy)
416+
return {};
417+
418+
QualType SizeTy = Ctx.getSizeType();
419+
NonLoc WideSize =
420+
SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
421+
SizeTy)
422+
.castAs<NonLoc>();
423+
SVal Offset =
424+
SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
425+
if (Offset.isUnknown())
426+
return {};
427+
return Offset.castAs<NonLoc>();
428+
}
429+
430+
// Basically 1 -> 1st, 12 -> 12th, etc.
431+
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
432+
Os << Idx << llvm::getOrdinalSuffix(Idx);
433+
}
434+
435+
ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
436+
ProgramStateRef State,
437+
AnyArgExpr Buffer, SVal Element,
438+
SVal Size) const {
439+
440+
// If a previous check has failed, propagate the failure.
441+
if (!State)
442+
return nullptr;
443+
444+
const MemRegion *R = Element.getAsRegion();
445+
const auto *ER = dyn_cast_or_null<ElementRegion>(R);
446+
if (!ER)
447+
return State;
448+
449+
const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
450+
if (!SuperR)
451+
return State;
452+
453+
// FIXME: We ought to able to check objects as well. Maybe
454+
// UninitializedObjectChecker could help?
455+
if (!SuperR->getValueType()->isArrayType())
456+
return State;
457+
458+
SValBuilder &SVB = C.getSValBuilder();
459+
ASTContext &Ctx = SVB.getContext();
460+
461+
const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
462+
const NonLoc Zero = SVB.makeZeroArrayIndex();
463+
464+
std::optional<Loc> FirstElementVal =
465+
State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
466+
if (!FirstElementVal)
467+
return State;
468+
469+
// Ensure that we wouldn't read uninitialized value.
470+
if (Filter.CheckCStringUninitializedRead &&
471+
State->getSVal(*FirstElementVal).isUndef()) {
472+
llvm::SmallString<258> Buf;
473+
llvm::raw_svector_ostream OS(Buf);
474+
OS << "The first element of the ";
475+
printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
476+
OS << " argument is undefined";
477+
emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
478+
return nullptr;
479+
}
480+
481+
// We won't check whether the entire region is fully initialized -- lets just
482+
// check that the first and the last element is. So, onto checking the last
483+
// element:
484+
const QualType IdxTy = SVB.getArrayIndexType();
485+
486+
NonLoc ElemSize =
487+
SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
488+
.castAs<NonLoc>();
489+
490+
// FIXME: Check that the size arg to the cstring function is divisible by
491+
// size of the actual element type?
492+
493+
// The type of the argument to the cstring function is either char or wchar,
494+
// but thats not the type of the original array (or memory region).
495+
// Suppose the following:
496+
// int t[5];
497+
// memcpy(dst, t, sizeof(t) / sizeof(t[0]));
498+
// When checking whether t is fully initialized, we see it as char array of
499+
// size sizeof(int)*5. If we check the last element as a character, we read
500+
// the last byte of an integer, which will be undefined. But just because
501+
// that value is undefined, it doesn't mean that the element is uninitialized!
502+
// For this reason, we need to retrieve the actual last element with the
503+
// correct type.
504+
505+
// Divide the size argument to the cstring function by the actual element
506+
// type. This value will be size of the array, or the index to the
507+
// past-the-end element.
508+
std::optional<NonLoc> Offset =
509+
SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
510+
IdxTy)
511+
.getAs<NonLoc>();
512+
513+
// Retrieve the index of the last element.
514+
const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
515+
SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
516+
517+
if (!Offset)
518+
return State;
519+
520+
SVal LastElementVal =
521+
State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
522+
if (!isa<Loc>(LastElementVal))
523+
return State;
524+
525+
if (Filter.CheckCStringUninitializedRead &&
526+
State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
527+
const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
528+
// If we can't get emit a sensible last element index, just bail out --
529+
// prefer to emit nothing in favour of emitting garbage quality reports.
530+
if (!IdxInt) {
531+
C.addSink();
532+
return nullptr;
533+
}
534+
llvm::SmallString<258> Buf;
535+
llvm::raw_svector_ostream OS(Buf);
536+
OS << "The last accessed element (at index ";
537+
OS << IdxInt->getExtValue();
538+
OS << ") in the ";
539+
printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
540+
OS << " argument is undefined";
541+
emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
542+
return nullptr;
543+
}
544+
return State;
545+
}
546+
396547
// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
397548
ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
398549
ProgramStateRef state,
@@ -413,38 +564,17 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
413564
if (!ER)
414565
return state;
415566

416-
SValBuilder &svalBuilder = C.getSValBuilder();
417-
ASTContext &Ctx = svalBuilder.getContext();
418-
419567
// Get the index of the accessed element.
420-
NonLoc Idx = ER->getIndex();
421-
422-
if (CK == CharKind::Regular) {
423-
if (ER->getValueType() != Ctx.CharTy)
424-
return state;
425-
} else {
426-
if (ER->getValueType() != Ctx.WideCharTy)
427-
return state;
428-
429-
QualType SizeTy = Ctx.getSizeType();
430-
NonLoc WideSize =
431-
svalBuilder
432-
.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
433-
SizeTy)
434-
.castAs<NonLoc>();
435-
SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
436-
if (Offset.isUnknown())
437-
return state;
438-
Idx = Offset.castAs<NonLoc>();
439-
}
568+
std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569+
if (!Idx)
570+
return state;
440571

441572
// Get the size of the array.
442573
const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
443574
DefinedOrUnknownSVal Size =
444575
getDynamicExtent(state, superReg, C.getSValBuilder());
445576

446-
ProgramStateRef StInBound, StOutBound;
447-
std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
577+
auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
448578
if (StOutBound && !StInBound) {
449579
// These checks are either enabled by the CString out-of-bounds checker
450580
// explicitly or implicitly by the Malloc checker.
@@ -459,15 +589,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
459589
return nullptr;
460590
}
461591

462-
// Ensure that we wouldn't read uninitialized value.
463-
if (Access == AccessKind::read) {
464-
if (Filter.CheckCStringUninitializedRead &&
465-
StInBound->getSVal(ER).isUndef()) {
466-
emitUninitializedReadBug(C, StInBound, Buffer.Expression);
467-
return nullptr;
468-
}
469-
}
470-
471592
// Array bound check succeeded. From this point forward the array bound
472593
// should always succeed.
473594
return StInBound;
@@ -502,6 +623,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
502623

503624
// Check if the first byte of the buffer is accessible.
504625
State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
626+
505627
if (!State)
506628
return nullptr;
507629

@@ -526,6 +648,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
526648
SVal BufEnd =
527649
svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
528650
State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
651+
if (Access == AccessKind::read)
652+
State = checkInit(C, State, Buffer, BufEnd, *Length);
529653

530654
// If the buffer isn't large enough, abort.
531655
if (!State)
@@ -694,16 +818,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
694818

695819
void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
696820
ProgramStateRef State,
697-
const Expr *E) const {
821+
const Expr *E,
822+
StringRef Msg) const {
698823
if (ExplodedNode *N = C.generateErrorNode(State)) {
699-
const char *Msg =
700-
"Bytes string function accesses uninitialized/garbage values";
701824
if (!BT_UninitRead)
702825
BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
703826
"Accessing unitialized/garbage values"));
704827

705828
auto Report =
706829
std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
830+
Report->addNote("Other elements might also be undefined",
831+
Report->getLocation());
707832
Report->addRange(E->getSourceRange());
708833
bugreporter::trackExpressionValue(N, E, *Report);
709834
C.emitReport(std::move(Report));

clang/lib/StaticAnalyzer/Core/MemRegion.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,17 @@ bool MemRegion::canPrintPrettyAsExpr() const {
630630
return false;
631631
}
632632

633+
StringRef MemRegion::getKindStr() const {
634+
switch (getKind()) {
635+
#define REGION(Id, Parent) \
636+
case Id##Kind: \
637+
return #Id;
638+
#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
639+
#undef REGION
640+
}
641+
llvm_unreachable("Unkown kind!");
642+
}
643+
633644
void MemRegion::printPretty(raw_ostream &os) const {
634645
assert(canPrintPretty() && "This region cannot be printed pretty.");
635646
os << "'";

0 commit comments

Comments
 (0)