From 76ede05c49483d6b2e9b2c79e440c9f47f6516c9 Mon Sep 17 00:00:00 2001 From: "John L. Singleton" Date: Thu, 6 Apr 2023 15:22:01 -0400 Subject: [PATCH 1/2] import performance suite --- scripts/performance_testing/Config.ps1 | 4 + .../Convert-DurationStringToMs.ps1 | 77 +++++++ .../Get-DurationString.ps1 | 12 + .../performance_testing/Get-QueryString.ps1 | 12 + .../Get-TestTmpDirectory.ps1 | 5 + scripts/performance_testing/README.md | 211 +++++++++++++++++ .../Test-ReleasePerformance.ps1 | 213 ++++++++++++++++++ .../performance_testing/profile_predicates.py | 203 +++++++++++++++++ 8 files changed, 737 insertions(+) create mode 100644 scripts/performance_testing/Config.ps1 create mode 100644 scripts/performance_testing/Convert-DurationStringToMs.ps1 create mode 100644 scripts/performance_testing/Get-DurationString.ps1 create mode 100644 scripts/performance_testing/Get-QueryString.ps1 create mode 100644 scripts/performance_testing/Get-TestTmpDirectory.ps1 create mode 100644 scripts/performance_testing/README.md create mode 100644 scripts/performance_testing/Test-ReleasePerformance.ps1 create mode 100644 scripts/performance_testing/profile_predicates.py diff --git a/scripts/performance_testing/Config.ps1 b/scripts/performance_testing/Config.ps1 new file mode 100644 index 0000000000..4ba1db78fd --- /dev/null +++ b/scripts/performance_testing/Config.ps1 @@ -0,0 +1,4 @@ +Import-Module -Name "$PSScriptRoot/../PSCodingStandards/CodingStandards" + +$REQUIRED_CODEQL_VERSION = (Get-Content (Join-Path (Get-RepositoryRoot) "supported_codeql_configs.json") | ConvertFrom-Json).supported_environment.codeql_cli + diff --git a/scripts/performance_testing/Convert-DurationStringToMs.ps1 b/scripts/performance_testing/Convert-DurationStringToMs.ps1 new file mode 100644 index 0000000000..043290deab --- /dev/null +++ b/scripts/performance_testing/Convert-DurationStringToMs.ps1 @@ -0,0 +1,77 @@ +function Convert-DurationStringToMs { + param( + [Parameter(Mandatory)] + [string] + $DurationString + ) + + $durationStack = @() + $unitStack = @() + + + $durationBuff = $false + $unitBuff = $false + + for($i=0; $i -le $DurationString.Length; $i++){ + $s = $DurationString[$i] + #Write-Host $s + if($s -match "\d|\."){ # consume if it is a number or a decimal + + # init buffer + if($durationBuff -eq $false){ + $durationBuff = "" + } + + # accept last unit + if(-Not $unitBuff -eq $false){ + $unitStack += $unitBuff + $unitBuff = $false + } + + $durationBuff += $s + }else{ # otherwise it is a unit -- multiply by it to get the ms. + + # init buffer + if($unitBuff -eq $false){ + $unitBuff = "" + } + + # accept last digit buffer + if(-Not $durationBuff -eq $false){ + $durationStack += $durationBuff + $durationBuff = $false + } + + $unitBuff += $s + } + } + + # should always end with accepting the last one (because it will be a + # unit) + $unitStack += $unitBuff + + $totalMs = 0 + + for($i=0; $i -le $unitStack.Length; $i++){ + + $time = [System.Convert]::ToDecimal($durationStack[$i]) + $unit = $unitStack[$i] + + if($unit -eq 'h'){ + $time = $time * (60*60*1000) + } + if($unit -eq 'm'){ + $time = $time * (60*1000) + } + if($unit -eq 's'){ + $time = $time * (1000) + } + if($unit -eq 'ms'){ + $time = $time + } + + $totalMs += $time + } + + return $totalMs +} \ No newline at end of file diff --git a/scripts/performance_testing/Get-DurationString.ps1 b/scripts/performance_testing/Get-DurationString.ps1 new file mode 100644 index 0000000000..cb38133427 --- /dev/null +++ b/scripts/performance_testing/Get-DurationString.ps1 @@ -0,0 +1,12 @@ +function Get-DurationString { + param( + [Parameter(Mandatory)] + [string] + $LogLine + ) + $In = $LogLine.IndexOf('eval')+5 + $Out = $LogLine.indexof(']') + + return $LogLine.substring($In, $Out - $In) +} + diff --git a/scripts/performance_testing/Get-QueryString.ps1 b/scripts/performance_testing/Get-QueryString.ps1 new file mode 100644 index 0000000000..d39ee863c1 --- /dev/null +++ b/scripts/performance_testing/Get-QueryString.ps1 @@ -0,0 +1,12 @@ +function Get-QueryString { + param( + [Parameter(Mandatory)] + [string] + $LogLine + ) + $In = $LogLine.IndexOf('Evaluation done; writing results to ')+36 + $Out = $LogLine.IndexOf('.bqrs') + + return $LogLine.SubString($In, $Out - $In) +} + diff --git a/scripts/performance_testing/Get-TestTmpDirectory.ps1 b/scripts/performance_testing/Get-TestTmpDirectory.ps1 new file mode 100644 index 0000000000..d2e0fb3f8d --- /dev/null +++ b/scripts/performance_testing/Get-TestTmpDirectory.ps1 @@ -0,0 +1,5 @@ +function Get-TestTmpDirectory { + $Dir = [System.IO.Path]::GetTempPath() + return Join-Path $Dir "$([System.Guid]::NewGuid())" +} + diff --git a/scripts/performance_testing/README.md b/scripts/performance_testing/README.md new file mode 100644 index 0000000000..cc2414b0ad --- /dev/null +++ b/scripts/performance_testing/README.md @@ -0,0 +1,211 @@ +# Performance Testing + +Performance testing may be accomplished by using the performance testing tool found in this directory, `Test-ReleasePerformance.ps1`. Note that this script depends on other files from this repository. It may be run on external builds of Coding Standards through the `-CodingStandardsPath` flag, but it should be run from a fresh checkout of this repository. + +This script requires `pwsh` to be installed. Note that the Windows native Powershell is not sufficient and you should download PowerShell Core. + +- Installing on Windows: https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.3 +- Installing on Linux: https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-linux?view=powershell-7.3 +- Installing on MacOS: https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-macos?view=powershell-7.3 + +Before invoking this script you should start a powershell session by typing `pwsh` at a command prompt. + +## Usage + +``` +NAME + .\scripts\performance_testing\Test-ReleasePerformance.ps1 + +SYNOPSIS + Test release performance. Generates outputs 2 csv files containing the slowest predicates as well as the queries + causing work. Note that the method of computing query execution time is inaccurate due to the shared nature of + predicates. + + +SYNTAX + C:\Projects\codeql-coding-standards\scripts\performance_testing\Test-ReleasePerformance.ps1 -RunTests [-Threads ] -DatabaseArchive + [-TestTimestamp ] [-CodingStandardsPath ] [-ResultsDirectory ] [-ReleaseTag ] -Suite [-Platform ] -Language + [] + + C:\Projects\codeql-coding-standards\scripts\performance_testing\Test-ReleasePerformance.ps1 -ProcessResults -ResultsFile [-ResultsDirectory ] + [-ReleaseTag ] -Suite [-Platform ] -Language [] + + +DESCRIPTION + Test release performance. Generates outputs 2 csv files containing the slowest predicates as well as the queries + causing work. Note that the method of computing query execution time is inaccurate due to the shared nature of + predicates. + + +PARAMETERS + -RunTests [] + Configures tool to run tests. + + Required? true + Position? named + Default value False + Accept pipeline input? false + Accept wildcard characters? false + + -Threads + Specifies the number of threads to use. + + Required? false + Position? named + Default value 5 + Accept pipeline input? false + Accept wildcard characters? false + + -DatabaseArchive + Specifies the database to use for testing. Should be a zipped database + directory. + + Required? true + Position? named + Default value + Accept pipeline input? false + Accept wildcard characters? false + + -TestTimestamp + The timestamp to use for the test. + + Required? false + Position? named + Default value (Get-Date -Format "yyyy-MM-dd_HH-mm-ss") + Accept pipeline input? false + Accept wildcard characters? false + + -CodingStandardsPath + The path to the coding standards root directory. This can be either the + root of the repository or the root of the coding standards directory. + + Required? false + Position? named + Default value "$PSScriptRoot../../" + Accept pipeline input? false + Accept wildcard characters? false + + -ProcessResults [] + + Required? true + Position? named + Default value False + Accept pipeline input? false + Accept wildcard characters? false + + -ResultsFile + Configures tool to process results. + + Required? true + Position? named + Default value + Accept pipeline input? false + Accept wildcard characters? false + + -ResultsDirectory + Where results should be written to. + + Required? false + Position? named + Default value (Get-Location) + Accept pipeline input? false + Accept wildcard characters? false + + -ReleaseTag + The release tag to use for the test. + + Required? false + Position? named + Default value current + Accept pipeline input? false + Accept wildcard characters? false + + -Suite + Which suite to run. + + Required? true + Position? named + Default value + Accept pipeline input? false + Accept wildcard characters? false + + -Platform + The platform to run on. This is just a descriptive string. + + Required? false + Position? named + Default value $PSVersionTable.Platform + Accept pipeline input? false + Accept wildcard characters? false + + -Language + The language to run on. + + Required? true + Position? named + Default value + Accept pipeline input? false + Accept wildcard characters? false + + + This cmdlet supports the common parameters: Verbose, Debug, + ErrorAction, ErrorVariable, WarningAction, WarningVariable, + OutBuffer, PipelineVariable, and OutVariable. For more information, see + about_CommonParameters (https://go.microsoft.com/fwlink/?LinkID=113216). + +INPUTS + +OUTPUTS + + +RELATED LINKS + +``` +## Example Usage + +Run the `cert` suite for `c` from within the Coding Standards repository. + +``` +.\scripts\performance_testing\Test-ReleasePerformance.ps1 -RunTests -DatabaseArchive ..\codeql-coding-standards-release-engineering\data\commaai-openpilot-72d1744d830bc249d8761a1d843a98fb0ced49fe-cpp.zip -Suite cert -Language c +``` + +Run the `cert` suite for `c` on an external release, specifying a `-ReleaseTag` as well. The `-ReleaseTag` parameter does not have to match the code you are testing, it is for organization purposes only. + +``` +.\scripts\performance_testing\Test-ReleasePerformance.ps1 -RunTests -DatabaseArchive ..\codeql-coding-standards-release-engineering\data\commaai-openpilot-72d1744d830bc249d8761a1d843a98fb0ced49fe-cpp.zip -Suite cert -Language c -ReleaseTag "2.16.0" -CodingStandardsPath "Downloads\code-scanning-cpp-query-pack-2.16.0\codeql-coding-standards\" +``` + + + +## Outputs + +The `Test-ReleasePerformance.ps1` produces three files in the `ResultsDirectory` location, which defaults `performance_tests` within the current working directory. + +- `suite=$Suite,datum=queries.csv` - Which contains the run time for each query. +- `suite=$Suite,datum=evaluator-log.json` - Which contains the evaluator log. +- `suite=$Suite,datum=sarif.sarif` - The sarif log file for the run. + +## Profiling Predicates + +If you wish to extract predicate-level profiling information, you may use the script `profile_predicates.py` located in this directory. It requires Python3 with `pandas` and `numpy` to work. If you wish to use a virtual environment you may create one as follows on a Unix-based platform: + +``` +python -mvenv venv +source venv/bin/activate +pip install pandas numpy +``` + +The script works by summarizing ALL of the csv and json files within a given directory. Thus, if you want to profile multiple suites or multiple releases you may place the files within that directory by repeatedly invoking `Test-ReleasePerformance.ps1.` Make sure to supply the same output directory each time so that the results accumulate in the correct location. + +To invoke the script run: + +``` +python scripts/performance_testing/profile_predicates.py +``` + +For example: +``` +python .\scripts\performance_testing\profile_predicates.py .\performance_tests\ +``` + +This will produce an additional CSV file per release, platform, and language within that directory called: `slow-log,datum=predicates,release={release},platform={platform},language={language}.csv` which will contain the execution times of all of the predicates used during execution. diff --git a/scripts/performance_testing/Test-ReleasePerformance.ps1 b/scripts/performance_testing/Test-ReleasePerformance.ps1 new file mode 100644 index 0000000000..c82c3f3e5c --- /dev/null +++ b/scripts/performance_testing/Test-ReleasePerformance.ps1 @@ -0,0 +1,213 @@ +<# +.SYNOPSIS + Test release performance. Generates outputs 2 csv files containing the slowest predicates as well as the queries + causing work. Note that the method of computing query execution time is inaccurate due to the shared nature of + predicates. + +.DESCRIPTION + Test release performance. Generates outputs 2 csv files containing the slowest predicates as well as the queries + causing work. Note that the method of computing query execution time is inaccurate due to the shared nature of + predicates. +#> +param( + # Configures tool to run tests. + [Parameter(Mandatory, ParameterSetName = 'RunTests')] + [switch] + $RunTests, + + # Specifies the number of threads to use. + [Parameter(Mandatory=$false, ParameterSetName = 'RunTests')] + [string] + $Threads=5, + + # Specifies the database to use for testing. Should be a zipped database + # directory. + [Parameter(Mandatory, ParameterSetName = 'RunTests')] + [string] + $DatabaseArchive, + + # The timestamp to use for the test. + [Parameter(Mandatory = $false, ParameterSetName = 'RunTests')] + [string] + $TestTimestamp=(Get-Date -Format "yyyy-MM-dd_HH-mm-ss"), + + # The path to the coding standards root directory. This can be either the + # root of the repository or the root of the coding standards directory. + [Parameter(Mandatory=$false, ParameterSetName = 'RunTests')] + [string] + $CodingStandardsPath="$PSScriptRoot/../../", + + [Parameter(Mandatory, ParameterSetName = 'ProcessResults')] + [switch] + $ProcessResults, + + # Configures tool to process results. + [Parameter(Mandatory, ParameterSetName = 'ProcessResults')] + [string] + $ResultsFile, + # Where results should be written to. + [Parameter(Mandatory=$false)] + [string] + $ResultsDirectory = (Join-Path (Get-Location) "performance_tests"), + + # The release tag to use for the test. + [Parameter(Mandatory=$false)] + [string] + $ReleaseTag = "current", + # Which suite to run. + [Parameter(Mandatory)] + [ValidateSet('cert', 'misra', 'autosar')] + [string] + $Suite, + # The platform to run on. This is just a descriptive string. + [Parameter(Mandatory=$false)] + [string] + $Platform=$PSVersionTable.Platform, + # The language to run on. + [Parameter(Mandatory)] + [ValidateSet('c', 'cpp')] + [string] + $Language +) + +Import-Module -Name "$PSScriptRoot/../PSCodingStandards/CodingStandards" + +. "$PSScriptRoot/Config.ps1" +. "$PSScriptRoot/Get-TestTmpDirectory.ps1" +. "$PSScriptRoot/Convert-DurationStringToMs.ps1" +. "$PSScriptRoot/Get-DurationString.ps1" +. "$PSScriptRoot/Get-QueryString.ps1" + +# Test Programs +Write-Host "Checking 'codeql' program...." -NoNewline +Test-ProgramInstalled -Program "codeql" +Write-Host -ForegroundColor ([ConsoleColor]2) "OK" + +$CODEQL_VERSION = (codeql version --format json | ConvertFrom-Json).version + +Write-Host "Checking 'codeql' version = $REQUIRED_CODEQL_VERSION...." -NoNewline +if (-Not ($CODEQL_VERSION -eq $REQUIRED_CODEQL_VERSION)) { + throw "Invalid CodeQL version $CODEQL_VERSION. Please install $REQUIRED_CODEQL_VERSION." +} +Write-Host -ForegroundColor ([ConsoleColor]2) "OK" + + + +# Create the results/work directory +$RESULTS_DIRECTORY = Get-TestTmpDirectory +New-Item -Path $RESULTS_DIRECTORY -ItemType Directory | Out-Null + +Write-Host "Writing Results to $RESULTS_DIRECTORY" + +if (-Not $ProcessResults){ + + $DB_UNPACKED_TMP = Join-Path $RESULTS_DIRECTORY db-unpacked + $DB_UNPACKED = Join-Path $RESULTS_DIRECTORY db + $DB_FILENAME = (Get-Item $DatabaseArchive).Name + Write-Host "Copying database to $RESULTS_DIRECTORY..." + # Copy and unpack the dataset + Copy-Item -Path $DatabaseArchive -Destination $RESULTS_DIRECTORY + + Expand-Archive -LiteralPath $RESULTS_DIRECTORY\$DB_FILENAME -DestinationPath $DB_UNPACKED_TMP + + foreach($f in Get-ChildItem $DB_UNPACKED_TMP){ + Move-Item -Path $f -Destination $DB_UNPACKED + } + + + $SARIF_OUT = Join-Path $RESULTS_DIRECTORY "suite=$Suite,datum=sarif.sarif" + $EvaluatorLog = Join-Path $RESULTS_DIRECTORY "evaluator-log.json" + $EvaluatorResults = Join-Path $RESULTS_DIRECTORY "evaluator-results.json" + + + $stdOut = Join-Path ([System.IO.Path]::GetTempPath()) ([System.Guid]::NewGuid()) + $stdErr = Join-Path ([System.IO.Path]::GetTempPath()) ([System.Guid]::NewGuid()) + + Write-Host "Standard Out Buffered to: $stdOut" + Write-Host "Standard Error Buffered to: $stdErr" + + $SuiteRoot = Join-Path $Language $Suite "src" "codeql-suites" + # For some reason nothing is written to stdout so we use stderr + $SuitePath = Join-Path $CodingStandardsPath $SuiteRoot ($Suite + "-default.qls") + $procDetails = Start-Process -FilePath "codeql" -PassThru -NoNewWindow -Wait -ArgumentList "database analyze --rerun --threads $Threads --debug --tuple-counting --evaluator-log=$EvaluatorLog --format sarif-latest --search-path $(Resolve-Path $CodingStandardsPath) --output $SARIF_OUT $DB_UNPACKED $SuitePath" -RedirectStandardOutput $stdOut -RedirectStandardError $stdErr + + if (-Not $procDetails.ExitCode -eq 0) { + Get-Content $stdErr | Out-String | Write-Host + Write-Host -ForegroundColor ([ConsoleColor]4) "FAILED" + throw "Performance suite failed to run. Will not report data." + } + else { + Write-Host -ForegroundColor ([ConsoleColor]2) "OK" + $runData = $stdErr + } + +}else{ + $runData = $ResultsFile +} +# Step 1: Compile data from queries +# +$PERFORMANCE_DATA = @() + +foreach($l in Get-Content $runData){ + + # skip lines that aren't ones we can process + if(-Not $l.Contains("Evaluation done;")){ + continue + } + + $durationString = Get-DurationString -LogLine $l + $queryString = Get-QueryString -LogLine $l + $timeInMs = Convert-DurationStringToMs -DurationString $durationString + + $row = @{ + "Query" = $queryString; + "TimeInMs" = $timeInMs; + } + + Write-Host "LOG: Duration=$durationString; TimeInMs=$timeInMs; Query=$queryString" + + $PERFORMANCE_DATA += $row +} +# Step 2: Compile predicate data +# +# + +# the data must first be transformed +$procDetails = Start-Process -FilePath "codeql" -PassThru -NoNewWindow -Wait -ArgumentList "generate log-summary $EvaluatorLog $EvaluatorResults" + +if (-Not $procDetails.ExitCode -eq 0) { + Write-Host -ForegroundColor ([ConsoleColor]4) "FAILED" + throw "Did not find performance results summary." +} +else { + Write-Host -ForegroundColor ([ConsoleColor]2) "OK" +} + + +# Step 3: Write out granular performance data +# +# We root this in $ResultsDirectory/release-$Release-/platform-/$Suite.csv + +# Create the Directory (and it's parents) +$outputDirectory = (Join-Path $ResultsDirectory "release=$ReleaseTag,testedOn=$TestTimestamp" "platform=$Platform" "language=$Language") +$outputDirectorySARIF = $outputDirectory + +$queryOutputFile = Join-Path $outputDirectory "suite=$Suite,datum=queries.csv" +$evaluatorResultsFile = Join-Path $outputDirectory "suite=$Suite,datum=evaluator-log.json" + +# Create the output directory. +# note there is no need to create the sarif out directory -- it will be created +# by the copy command, below. + +New-Item -Type Directory -Path $outputDirectory -ErrorAction Ignore | Out-Null + + +# Copy processed results out +Copy-Item -Path $EvaluatorResults -Destination $evaluatorResultsFile +Copy-Item -Path $SARIF_OUT -Destination $outputDirectorySARIF + +# Write out the report +Write-Host "Writing report to $queryOutputFile" +foreach ($r in $PERFORMANCE_DATA) { + [PSCustomObject]$r | Export-CSV -Path $queryOutputFile -Append -NoTypeInformation +} \ No newline at end of file diff --git a/scripts/performance_testing/profile_predicates.py b/scripts/performance_testing/profile_predicates.py new file mode 100644 index 0000000000..f584503bb5 --- /dev/null +++ b/scripts/performance_testing/profile_predicates.py @@ -0,0 +1,203 @@ +# %% +import numpy as np +import pandas as pd +import subprocess +import glob +from pathlib import Path +import json +import math +import sys +# %% + +if len(sys.argv) < 2: + print("Usage: python profile_predicates.py ") + sys.exit(0) + +root_path = Path(sys.argv[1]) +#%% +# root_path = Path("../../performance_tests/") + + +# We only process the LATEST run for a given release x suite x platform. To support this function +# we loop over all of the possible CSV files and add a file to the "load" list +# only if it has a newer `testedOn` value. +datafiles = {} + + +def path_to_tuple(path): + parts = path.parts + + part_suite = parts[-1] + part_language = parts[-2] + part_platform = parts[-3] + part_release = parts[-4] + + release = part_release.split(",")[0].split("=")[1] + testedOn = part_release.split(",")[1].split("=")[1] + platform = part_platform.split("=")[1] + language = part_language.split("=")[1] + suite = part_suite.split(".")[0].split("=")[1].split(",")[0] + + return release, testedOn, platform, language, suite +#%% + +for f in root_path.glob(f"release*/**/*datum=evaluator-log.json"): + release, testedOn, platform, language, suite = path_to_tuple(f) + + hashEntry = { + "release": release, + "testedOn": testedOn, + "platform": platform, + "language": language, + "suite": suite, + "dataFile": f + } + + if not release in datafiles.keys(): + datafiles[(release, platform, suite, language)] = hashEntry + else: + existing = datafiles[(release, platform, suite, language)] + + if existing["testedOn"] > testedOn: + datafiles[(release, platform, suite, language)] = hashEntry +# %% +summary_df = pd.DataFrame(columns=[ + 'Release', + 'Run', + 'Platform', + 'Language', + 'Suite', + 'Predicate', + 'Execution_Time_Ms' +]) + + +new_rows = { + 'Release': [], + 'Run': [], + 'Platform': [], + 'Language': [], + 'Suite': [], + 'Predicate': [], + 'Execution_Time_Ms': [] +} + +for K, V in datafiles.items(): + print(f"Loading {str(V['dataFile'])}...", end=None) + + # we need to load the data file and then parse each JSON row + with open(V['dataFile'], 'r') as f: + json_line_data = f.read() + #json_line_objects = re.split(r"(?m)^\n", json_line_data) + json_line_objects = json_line_data.split('\n\n') + + + print(f"Done.") + + for json_line_object in json_line_objects: + + #print(".", end="None") + + # quickly do this before bothering to parse the JSON + if not ("predicateName" in json_line_object and "COMPUTE_SIMPLE" in json_line_object): + continue + + json_object = json.loads(json_line_object) + + if not "predicateName" in json_object: + continue + + if json_object["predicateName"] == "output": + continue + + + if not json_object["evaluationStrategy"] == "COMPUTE_SIMPLE": + continue + + new_rows['Release'].append(V["release"]) + new_rows['Run'].append(V["testedOn"]) + new_rows['Platform'].append(V["platform"]) + new_rows['Language'].append(V["language"]) + new_rows['Suite'].append(V["suite"]) + new_rows['Predicate'].append(json_object["predicateName"]) + new_rows['Execution_Time_Ms'].append(json_object["millis"]) + +new_df = pd.DataFrame(new_rows) +summary_df = pd.concat([summary_df, new_df]) + +# %% +# %% +performance_df = pd.DataFrame( + columns=[ + 'Release', + 'Platform', + 'Language', + 'Total_Serialized_Execution_Time_Ms', + 'Mean_Predicate_Execution_Time_Ms', + 'Median_Predicate_Execution_Time_Ms', + 'Standard_Deviation_Ms', + 'Total_Serialized_Execution_Time_s', + 'Mean_Query_Execution_Time_s', + 'Median_Predicate_Execution_Time_s', + 'Percentile95_Ms', + 'Number_of_Predicates' + ] +) + +summary_df_grouped = summary_df.groupby(['Release', 'Platform', 'Language']) + +for _, df_group in summary_df_grouped: + + release = df_group["Release"].iloc[0] + platform = df_group["Platform"].iloc[0] + language = df_group["Language"].iloc[0] + + print(f"Processing Platform={platform}, Language={language}, Release={release}") + + + execution_time = df_group["Execution_Time_Ms"].sum() + execution_time_mean = df_group["Execution_Time_Ms"].mean() + execution_time_median = df_group["Execution_Time_Ms"].median() + execution_time_std = df_group["Execution_Time_Ms"].std() + percentile_95 = df_group["Execution_Time_Ms"].quantile(.95) + num_queries = len(df_group) + + row_df = pd.DataFrame({ + 'Release' : [release], + 'Platform' : [platform], + 'Language' : [language], + 'Total_Serialized_Execution_Time_Ms' : [execution_time], + 'Mean_Predicate_Execution_Time_Ms' : [execution_time_mean], + 'Median_Predicate_Execution_Time_Ms' : [execution_time_median], + 'Standard_Deviation_Ms' : [execution_time_std], + 'Total_Serialized_Execution_Time_s' : [execution_time/1000], + 'Mean_Query_Execution_Time_s' : [execution_time_mean/1000], + 'Median_Predicate_Execution_Time_s' : [execution_time_median/1000], + 'Percentile95_Ms' : [percentile_95], + 'Number_of_Predicates' : [num_queries] + }) + + performance_df = pd.concat([performance_df, row_df]) + +#%% +# write out the high level performance summary +performance_df.to_csv(root_path.joinpath('performance-history,datum=predicate.csv'), index=False) +#%% +# write out all queries for every suite that are greater than the 95th +# percentile +for _, row in performance_df.iterrows(): + + + release = row["Release"] + platform = row["Platform"] + language = row["Language"] + percentile_95 = row["Percentile95_Ms"] + + rpl_df = summary_df[(summary_df["Release"] == release) & (summary_df["Platform"] == platform) & (summary_df["Language"] == language)] + g95 = rpl_df[(rpl_df["Execution_Time_Ms"] >= percentile_95)] + + g95 = g95.sort_values(by='Execution_Time_Ms', ascending=False) + + g95.to_csv(root_path.joinpath(f"slow-log,datum=predicates,release={release},platform={platform},language={language}.csv"), index=False) + + From 68937dec0ac220518a1c181333a34defc682bd24 Mon Sep 17 00:00:00 2001 From: "John L. Singleton" Date: Thu, 6 Apr 2023 16:03:14 -0400 Subject: [PATCH 2/2] english --- scripts/performance_testing/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/performance_testing/README.md b/scripts/performance_testing/README.md index cc2414b0ad..90b997b6fc 100644 --- a/scripts/performance_testing/README.md +++ b/scripts/performance_testing/README.md @@ -1,6 +1,8 @@ # Performance Testing -Performance testing may be accomplished by using the performance testing tool found in this directory, `Test-ReleasePerformance.ps1`. Note that this script depends on other files from this repository. It may be run on external builds of Coding Standards through the `-CodingStandardsPath` flag, but it should be run from a fresh checkout of this repository. +Performance testing may be accomplished by using the performance testing tool found in this directory, `Test-ReleasePerformance.ps1`. These results may be further processed to provide predicate level performance details by using the script `profile_predicates.py`, which is documented in the [Profiling Predicates section.](#profiling-predicates), below. + +Note that this script depends on other files from this repository. It may be run on external builds of Coding Standards through the `-CodingStandardsPath` flag, but it should be run from a fresh checkout of this repository. This script requires `pwsh` to be installed. Note that the Windows native Powershell is not sufficient and you should download PowerShell Core. @@ -169,7 +171,7 @@ Run the `cert` suite for `c` from within the Coding Standards repository. .\scripts\performance_testing\Test-ReleasePerformance.ps1 -RunTests -DatabaseArchive ..\codeql-coding-standards-release-engineering\data\commaai-openpilot-72d1744d830bc249d8761a1d843a98fb0ced49fe-cpp.zip -Suite cert -Language c ``` -Run the `cert` suite for `c` on an external release, specifying a `-ReleaseTag` as well. The `-ReleaseTag` parameter does not have to match the code you are testing, it is for organization purposes only. +Run the `cert` suite for `c` on an external release, specifying a `-ReleaseTag` as well. The `-ReleaseTag` parameter is used for configuring performance tool to generate files within subdirectories with the `-ReleaseTag` as a prefix. For example, specifying `-ReleaseTag "2.16.0"` will cause files to be generated in the `release=2.16.0` directory. ``` .\scripts\performance_testing\Test-ReleasePerformance.ps1 -RunTests -DatabaseArchive ..\codeql-coding-standards-release-engineering\data\commaai-openpilot-72d1744d830bc249d8761a1d843a98fb0ced49fe-cpp.zip -Suite cert -Language c -ReleaseTag "2.16.0" -CodingStandardsPath "Downloads\code-scanning-cpp-query-pack-2.16.0\codeql-coding-standards\"