From e56f3c3c0d8966bc5769f048d1097427fc0064b1 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 12:28:26 -0600 Subject: [PATCH 1/6] RUBY-3314 Implement variable iterations for benchmarks --- .gitignore | 1 + profile/benchmarking.rb | 97 +++++++++------------------------- profile/benchmarking/helper.rb | 51 +++++++++++++++--- profile/benchmarking/micro.rb | 35 ++++-------- 4 files changed, 79 insertions(+), 105 deletions(-) diff --git a/.gitignore b/.gitignore index a60fb2cf76..7af488f10d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock .env.private* .env build +profile/benchmarking/data diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index a054b3906b..43797982b2 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -23,107 +22,59 @@ require_relative 'benchmarking/parallel' module Mongo - # Module with all functionality for running driver benchmark tests. # # @since 2.2.3 module Benchmarking - extend self - # The current path. - # - # @return [ String ] The current path. - # - # @since 2.2.3 - CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze - - # The path to data files used in Benchmarking tests. - # # @return [ String ] Path to Benchmarking test files. - # - # @since 2.2.3 - DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze + DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze - # The file containing the single tweet document. - # - # @return [ String ] The file containing the tweet document. - # - # @since 2.2.3 - TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze + # @return [ String ] The file containing the single tweet document. + TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze - # The file containing the single small document. - # - # @return [ String ] The file containing the small document. - # - # @since 2.2.3 - SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single small document. + SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze - # The file containing the single large document. - # - # @return [ String ] The file containing the large document. - # - # @since 2.2.3 - LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single large document. + LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze - # The file to upload when testing GridFS. - # - # @return [ String ] The file containing the GridFS test data. - # - # @since 2.2.3 - GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze + # @return [ String ] The file to upload when testing GridFS. + GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze - # The file path and base name for the LDJSON files. - # # @return [ String ] The file path and base name for the LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze + LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze - # The file path and base name for the outputted LDJSON files. - # - # @return [ String ] The file path and base name for the outputted LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze + # @return [ String ] The file path and base name for the emitted LDJSON files. + LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze - # The file path and base name for the GRIDFS files to upload. - # # @return [ String ] The file path and base name for the GRIDFS files to upload. - # - # @since 2.2.3 - GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze + GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze - # The file path and base name for the outputted GRIDFS downloaded files. - # - # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files. - # - # @since 2.2.3 - GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze + # @return [ String ] The file path and base name for the emitted GRIDFS downloaded files. + GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze - # The default number of test repetitions. - # # @return [ Integer ] The number of test repetitions. - # - # @since 2.2.3 - TEST_REPETITIONS = 100.freeze + TEST_REPETITIONS = 100 - # The number of default warmup repetitions of the test to do before - # recording times. - # - # @return [ Integer ] The default number of warmup repetitions. + # Convenience helper for loading the single tweet document. # - # @since 2.2.3 - WARMUP_REPETITIONS = 10.freeze - + # @return [ Hash ] a single parsed JSON document def tweet_document Benchmarking.load_file(TWEET_DOCUMENT_FILE).first end + # Convenience helper for loading the single small document. + # + # @return [ Hash ] a single parsed JSON document def small_document Benchmarking.load_file(SMALL_DOCUMENT_FILE).first end + # Convenience helper for loading the single large document. + # + # @return [ Hash ] a single parsed JSON document def large_document Benchmarking.load_file(LARGE_DOCUMENT_FILE).first end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 11d9f670ab..654eddbfd9 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true -# rubocop:todo all module Mongo - # Helper functions used by benchmarking tasks module Benchmarking - extend self # Load a json file and represent each document as a Hash. @@ -19,7 +16,7 @@ module Benchmarking # # @since 2.2.3 def load_file(file_name) - File.open(file_name, "r") do |f| + File.open(file_name, 'r') do |f| f.each_line.collect do |line| parse_json(line) end @@ -39,8 +36,47 @@ def load_file(file_name) # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| - if doc['_id'] && doc['_id']['$oid'] - doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) + doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] + end + end + + # The spec requires that most benchmarks use a variable number of + # iterations, defined as follows: + # + # * iterations should loop for at least 1 minute cumulative execution + # time + # * iterations should stop after 100 iterations or 5 minutes cumulative + # execution time, whichever is shorter + # + # This method will yield once for each iteration. + # + # @param [ Integer ] max_iterations the maximum number of iterations to + # attempt (default: 100) + # @param [ Integer ] min_time the minimum number of seconds to spend + # iterating + # @param [ Integer ] max_time the maximum number of seconds to spend + # iterating. + # + # @return [ Array ] the timings for each iteration + def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block) + [].tap do |results| + iteration_count = 0 + cumulative_time = 0 + + loop do + timing = Benchmark.realtime(&block) + + iteration_count += 1 + cumulative_time += timing + results.push timing + + # always stop after the maximum time has elapsed, regardless of + # iteration count. + break if cumulative_time > max_time + + # otherwise, break if the minimum time has elapsed, and the maximum + # number of iterations have been reached. + break if cumulative_time >= min_time && iteration_count >= max_iterations end end end @@ -56,7 +92,8 @@ def parse_json(document) # # @since 2.2.3 def median(values) - values.sort![values.size/2-1] + i = (values.size / 2) - 1 + values.sort[i] end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 2f560bb310..99edf2ae10 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -17,13 +16,11 @@ module Mongo module Benchmarking - # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. # # @since 2.2.3 module Micro - extend self # Run a micro benchmark test. @@ -38,10 +35,11 @@ module Micro # # @since 2.2.3 def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) - file_name = type.to_s << "_bson.json" + file_name = type.to_s << '_bson.json' GC.disable - file_path = [Benchmarking::DATA_PATH, file_name].join('/') + file_path = [ Benchmarking::DATA_PATH, file_name ].join('/') puts "#{action} : #{send(action, file_path, repetitions)}" + GC.enable end # Run an encoding micro benchmark test. @@ -59,16 +57,8 @@ def encode(file_name, repetitions) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) - # WARMUP_REPETITIONS.times do - # doc.to_bson - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - document.to_bson - end - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times { document.to_bson } end Benchmarking.median(results) end @@ -88,18 +78,13 @@ def decode(file_name, repetitions) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - # WARMUP_REPETITIONS.times do - # BSON::Document.from_bson(buffers.shift) - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - BSON::Document.from_bson(buffer) - buffer.rewind! - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times do + BSON::Document.from_bson(buffer) + buffer.rewind! end end + Benchmarking.median(results) end end From 897fc1c58c1188da0d19746767b28f7381a7406b Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 13:52:28 -0600 Subject: [PATCH 2/6] report percentiles along with the median --- Rakefile | 10 +++--- profile/benchmarking/helper.rb | 60 ++++++++++++++++++++++++++++++++-- profile/benchmarking/micro.rb | 58 ++++++++++++++++++-------------- 3 files changed, 94 insertions(+), 34 deletions(-) diff --git a/Rakefile b/Rakefile index f64a790c12..e954ca3f9c 100644 --- a/Rakefile +++ b/Rakefile @@ -137,22 +137,20 @@ require_relative "profile/benchmarking" namespace :benchmark do desc "Run the driver benchmark tests." - namespace :micro do - desc "Run the common driver micro benchmarking tests" + namespace :bson do + desc "Run the bson benchmarking tests" namespace :flat do desc "Benchmarking for flat bson documents." # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :encode do - puts "MICRO BENCHMARK:: FLAT:: ENCODE" - Mongo::Benchmarking::Micro.run(:flat, :encode) + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :decode do - puts "MICRO BENCHMARK:: FLAT:: DECODE" - Mongo::Benchmarking::Micro.run(:flat, :decode) + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 654eddbfd9..73b4d8bb84 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -81,19 +81,73 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ end end + # Formats and displays a report of the given results. + # + # @param [ Hash ] results the results of a benchmarking run. + # @param [ Integer ] indent how much the report should be indented. + # @param [ Array ] percentiles the percentile values to report + def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) + indentation = ' ' * indent + results.each do |key, value| + puts "#{indentation}#{key}:" + if value.is_a?(Hash) + report(value, indent: indent + 2, percentiles: percentiles) + else + ps = Percentiles.new(value) + puts "#{indentation} median: %g" % [ ps[50] ] + puts "#{indentation} percentiles:" + percentiles.each do |pct| + puts "#{indentation} %g: %g" % [ pct, ps[pct] ] + end + end + end + end + + # A utility class for returning the list item at a given percentile + # value. + class Percentiles + # @return [ Array ] the sorted list of numbers to consider + attr_reader :list + + # Create a new Percentiles object that encapsulates the given list of + # numbers. + # + # @param [ Array ] list the list of numbers to considier + def initialize(list) + @list = list.sort + end + + # Finds and returns the element in the list that represents the given + # percentile value. + # + # @param [ Number ] percentile a number in the range [1,100] + # + # @return [ Number ] the element of the list for the given percentile. + def [](percentile) + i = (list.size * percentile / 100.0).ceil - 1 + list[i] + end + end + # Get the median of values in a list. # # @example Get the median. # Benchmarking.median(values) # - # @param [ Array ] The values to get the median of. + # @param [ Array ] values The values to get the median of. # # @return [ Numeric ] The median of the list. - # - # @since 2.2.3 def median(values) i = (values.size / 2) - 1 values.sort[i] end + + # Runs a given block with GC disabled. + def without_gc + GC.disable + yield + ensure + GC.enable + end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 99edf2ae10..80899983c9 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -18,28 +18,43 @@ module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. - # - # @since 2.2.3 module Micro extend self + # Runs all of the benchmarks specified by the given mapping. + # + # @example Run a collection of benchmarks. + # Benchmarking::Micro.run_all( + # flat: %i[ encode decode ], + # deep: %i[ encode decode ], + # full: %i[ encode decode ] + # ) + # + # @return [ Hash ] a hash of the results for each benchmark + def run_all(map) + {}.tap do |results| + map.each do |type, actions| + results[type] = {} + + actions.each do |action| + results[type][action] = run(type, action) + end + end + end + end + # Run a micro benchmark test. # # @example Run a test. # Benchmarking::Micro.run(:flat) # # @param [ Symbol ] type The type of test to run. - # @param [ Integer ] repetitions The number of test repetitions. - # - # @return [ Numeric ] The test results. + # @param [ :encode | :decode ] action The action to perform. # - # @since 2.2.3 - def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) - file_name = type.to_s << '_bson.json' - GC.disable - file_path = [ Benchmarking::DATA_PATH, file_name ].join('/') - puts "#{action} : #{send(action, file_path, repetitions)}" - GC.enable + # @return [ Array ] The test results for each iteration + def run(type, action) + file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") + Benchmarking.without_gc { send(action, file_path) } end # Run an encoding micro benchmark test. @@ -50,17 +65,14 @@ def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Numeric ] The median of the results. - # - # @since 2.2.3 - def encode(file_name, repetitions) + # @return [ Array ] The list of the results for each iteration + def encode(file_name) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) - results = Benchmarking.benchmark(max_iterations: repetitions) do + Benchmarking.benchmark do 10_000.times { document.to_bson } end - Benchmarking.median(results) end # Run a decoding micro benchmark test. @@ -71,21 +83,17 @@ def encode(file_name, repetitions) # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Numeric ] The median of the results. - # - # @since 2.2.3 - def decode(file_name, repetitions) + # @return [ Array ] The list of the results for each iteration + def decode(file_name) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - results = Benchmarking.benchmark(max_iterations: repetitions) do + Benchmarking.benchmark do 10_000.times do BSON::Document.from_bson(buffer) buffer.rewind! end end - - Benchmarking.median(results) end end end From 072b56a3cbc122eaa223e50ca226b9198ca1639c Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 15:10:30 -0600 Subject: [PATCH 3/6] rename Benchmarking::Micro to Benchmarking::BSON --- Rakefile | 36 ++++++++++++++-------- profile/benchmarking.rb | 2 +- profile/benchmarking/{micro.rb => bson.rb} | 22 ++++++------- profile/benchmarking/helper.rb | 2 +- 4 files changed, 36 insertions(+), 26 deletions(-) rename profile/benchmarking/{micro.rb => bson.rb} (85%) diff --git a/Rakefile b/Rakefile index e954ca3f9c..49733ed4af 100644 --- a/Rakefile +++ b/Rakefile @@ -135,22 +135,32 @@ require_relative "profile/benchmarking" # Some require data files, available from the drivers team. See the comments above each task for details." namespace :benchmark do - desc "Run the driver benchmark tests." + desc "Run the bson benchmarking tests" + task :bson do + puts "BSON BENCHMARK" + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ + flat: %i[ encode decode ], + deep: %i[ encode decode ], + full: %i[ encode decode ], + }) + }) + end namespace :bson do - desc "Run the bson benchmarking tests" - namespace :flat do desc "Benchmarking for flat bson documents." # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :encode do - Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) + puts "BSON BENCHMARK :: FLAT :: ENCODE" + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :decode do - Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) + puts "BSON BENCHMARK :: FLAT :: DECODE" + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) end end @@ -159,14 +169,14 @@ namespace :benchmark do # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :encode do - puts "MICRO BENCHMARK:: DEEP:: ENCODE" - Mongo::Benchmarking::Micro.run(:deep, :encode) + puts "BSON BENCHMARK :: DEEP :: ENCODE" + Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :decode do - puts "MICRO BENCHMARK:: DEEP:: DECODE" - Mongo::Benchmarking::Micro.run(:deep, :decode) + puts "BSON BENCHMARK :: DEEP :: DECODE" + Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) end end @@ -175,14 +185,14 @@ namespace :benchmark do # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :encode do - puts "MICRO BENCHMARK:: FULL:: ENCODE" - Mongo::Benchmarking::Micro.run(:full, :encode) + puts "BSON BENCHMARK :: FULL :: ENCODE" + Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :decode do - puts "MICRO BENCHMARK:: FULL:: DECODE" - Mongo::Benchmarking::Micro.run(:full, :decode) + puts "BSON BENCHMARK :: FULL :: DECODE" + Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 43797982b2..08f6b20423 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -16,7 +16,7 @@ require 'benchmark' require_relative 'benchmarking/helper' -require_relative 'benchmarking/micro' +require_relative 'benchmarking/bson' require_relative 'benchmarking/single_doc' require_relative 'benchmarking/multi_doc' require_relative 'benchmarking/parallel' diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/bson.rb similarity index 85% rename from profile/benchmarking/micro.rb rename to profile/benchmarking/bson.rb index 80899983c9..0a3667b261 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/bson.rb @@ -18,13 +18,13 @@ module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. - module Micro + module BSON extend self # Runs all of the benchmarks specified by the given mapping. # # @example Run a collection of benchmarks. - # Benchmarking::Micro.run_all( + # Benchmarking::BSON.run_all( # flat: %i[ encode decode ], # deep: %i[ encode decode ], # full: %i[ encode decode ] @@ -43,10 +43,10 @@ def run_all(map) end end - # Run a micro benchmark test. + # Run a BSON benchmark test. # # @example Run a test. - # Benchmarking::Micro.run(:flat) + # Benchmarking::BSON.run(:flat) # # @param [ Symbol ] type The type of test to run. # @param [ :encode | :decode ] action The action to perform. @@ -57,10 +57,10 @@ def run(type, action) Benchmarking.without_gc { send(action, file_path) } end - # Run an encoding micro benchmark test. + # Run an encoding BSON benchmark test. # # @example Run an encoding test. - # Benchmarking::Micro.encode(file_name) + # Benchmarking::BSON.encode(file_name) # # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. @@ -68,17 +68,17 @@ def run(type, action) # @return [ Array ] The list of the results for each iteration def encode(file_name) data = Benchmarking.load_file(file_name) - document = BSON::Document.new(data.first) + document = ::BSON::Document.new(data.first) Benchmarking.benchmark do 10_000.times { document.to_bson } end end - # Run a decoding micro benchmark test. + # Run a decoding BSON benchmark test. # # @example Run an decoding test. - # Benchmarking::Micro.decode(file_name) + # Benchmarking::BSON.decode(file_name) # # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. @@ -86,11 +86,11 @@ def encode(file_name) # @return [ Array ] The list of the results for each iteration def decode(file_name) data = Benchmarking.load_file(file_name) - buffer = BSON::Document.new(data.first).to_bson + buffer = ::BSON::Document.new(data.first).to_bson Benchmarking.benchmark do 10_000.times do - BSON::Document.from_bson(buffer) + ::BSON::Document.from_bson(buffer) buffer.rewind! end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 73b4d8bb84..e66b292c5b 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -36,7 +36,7 @@ def load_file(file_name) # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| - doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] + doc['_id'] = ::BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] end end From 126e87d13dc2434d1b9c38535d1c13ba221da5bc Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 16:47:19 -0600 Subject: [PATCH 4/6] refactoring to appease rubocop --- .rubocop.yml | 3 +++ profile/benchmarking/helper.rb | 28 +++++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 2b7a7f90f7..6cb04cd597 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -83,6 +83,9 @@ Style/Documentation: Exclude: - 'spec/**/*' +Style/FormatStringToken: + Enabled: false + Style/ModuleFunction: EnforcedStyle: extend_self diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index e66b292c5b..309932964c 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -85,20 +85,14 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ # # @param [ Hash ] results the results of a benchmarking run. # @param [ Integer ] indent how much the report should be indented. - # @param [ Array ] percentiles the percentile values to report + # @param [ Array ] percentiles the percentile values to report def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) - indentation = ' ' * indent results.each do |key, value| - puts "#{indentation}#{key}:" + puts format('%*s%s:', indent, '', key) if value.is_a?(Hash) report(value, indent: indent + 2, percentiles: percentiles) else - ps = Percentiles.new(value) - puts "#{indentation} median: %g" % [ ps[50] ] - puts "#{indentation} percentiles:" - percentiles.each do |pct| - puts "#{indentation} %g: %g" % [ pct, ps[pct] ] - end + report_result(value, indent, percentiles) end end end @@ -149,5 +143,21 @@ def without_gc ensure GC.enable end + + private + + # Formats and displays the results of a single benchmark run. + # + # @param [ Array ] results the results to report + # @param [ Integer ] indent how much the report should be indented + # @param [ Array ] percentiles the percentiles to report + def report_result(results, indent, percentiles) + ps = Percentiles.new(results) + puts format('%*smedian: %g', indent + 2, '', ps[50]) + puts format('%*spercentiles:', indent + 2, '') + percentiles.each do |pct| + puts format('%*s%g: %g', indent + 4, '', pct, ps[pct]) + end + end end end From e86addc24eee503e43cc5b96da4660d8526de290 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 15 Aug 2023 09:41:11 -0600 Subject: [PATCH 5/6] RUBY-3315 benchmark scoring --- profile/benchmarking/bson.rb | 57 +++++++++++++++++++++++++++-- profile/benchmarking/helper.rb | 49 ++----------------------- profile/benchmarking/percentiles.rb | 31 ++++++++++++++++ profile/benchmarking/summary.rb | 56 ++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 48 deletions(-) create mode 100644 profile/benchmarking/percentiles.rb create mode 100644 profile/benchmarking/summary.rb diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb index 0a3667b261..88cb979ab2 100644 --- a/profile/benchmarking/bson.rb +++ b/profile/benchmarking/bson.rb @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +require_relative 'percentiles' +require_relative 'summary' + module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and @@ -43,6 +46,20 @@ def run_all(map) end end + # As defined by the spec, the score for a given benchmark is the + # size of the task (in MB) divided by the median wall clock time. + # + # @param [ Symbol ] type the type of the task + # @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles + # object to query for the median time. + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration, used to scale the task size. + # + # @return [ Numeric ] the score for the given task. + def score_for(type, percentiles, scale: 10_000) + task_size(type, scale) / percentiles[50] + end + # Run a BSON benchmark test. # # @example Run a test. @@ -51,10 +68,14 @@ def run_all(map) # @param [ Symbol ] type The type of test to run. # @param [ :encode | :decode ] action The action to perform. # - # @return [ Array ] The test results for each iteration + # @return [ Hash<:timings,:percentiles,:score> ] The test results for + # the requested benchmark. def run(type, action) - file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") - Benchmarking.without_gc { send(action, file_path) } + timings = Benchmarking.without_gc { send(action, file_for(type)) } + percentiles = Percentiles.new(timings) + score = score_for(type, percentiles) + + Summary.new(timings, percentiles, score) end # Run an encoding BSON benchmark test. @@ -95,6 +116,36 @@ def decode(file_name) end end end + + private + + # The path to the source file for the given task type. + # + # @param [ Symbol ] type the task type + # + # @return [ String ] the path to the source file. + def file_for(type) + File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") + end + + # As defined by the spec, the size of a BSON task is the size of the + # file, multipled by the scale (the number of times the file is processed + # per iteration), divided by a million. + # + # "the dataset size for a task is the size of the single-document source + # file...times 10,000 operations" + # + # "Each task will have defined for it an associated size in + # megabytes (MB)" + # + # @param [ Symbol ] type the type of the task + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration (e.g. 10,000) + # + # @return [ Numeric ] the score for the task, reported in MB + def task_size(type, scale) + File.size(file_for(type)) * scale / 1_000_000.0 + end end end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 309932964c..b7a09d6e91 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -89,40 +89,15 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) results.each do |key, value| puts format('%*s%s:', indent, '', key) - if value.is_a?(Hash) - report(value, indent: indent + 2, percentiles: percentiles) + + if value.respond_to?(:summary) + puts value.summary(indent + 2, percentiles) else - report_result(value, indent, percentiles) + report(value, indent: indent + 2, percentiles: percentiles) end end end - # A utility class for returning the list item at a given percentile - # value. - class Percentiles - # @return [ Array ] the sorted list of numbers to consider - attr_reader :list - - # Create a new Percentiles object that encapsulates the given list of - # numbers. - # - # @param [ Array ] list the list of numbers to considier - def initialize(list) - @list = list.sort - end - - # Finds and returns the element in the list that represents the given - # percentile value. - # - # @param [ Number ] percentile a number in the range [1,100] - # - # @return [ Number ] the element of the list for the given percentile. - def [](percentile) - i = (list.size * percentile / 100.0).ceil - 1 - list[i] - end - end - # Get the median of values in a list. # # @example Get the median. @@ -143,21 +118,5 @@ def without_gc ensure GC.enable end - - private - - # Formats and displays the results of a single benchmark run. - # - # @param [ Array ] results the results to report - # @param [ Integer ] indent how much the report should be indented - # @param [ Array ] percentiles the percentiles to report - def report_result(results, indent, percentiles) - ps = Percentiles.new(results) - puts format('%*smedian: %g', indent + 2, '', ps[50]) - puts format('%*spercentiles:', indent + 2, '') - percentiles.each do |pct| - puts format('%*s%g: %g', indent + 4, '', pct, ps[pct]) - end - end end end diff --git a/profile/benchmarking/percentiles.rb b/profile/benchmarking/percentiles.rb new file mode 100644 index 0000000000..aeebe9d1d9 --- /dev/null +++ b/profile/benchmarking/percentiles.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for returning the list item at a given percentile + # value. + class Percentiles + # @return [ Array ] the sorted list of numbers to consider + attr_reader :list + + # Create a new Percentiles object that encapsulates the given list of + # numbers. + # + # @param [ Array ] list the list of numbers to considier + def initialize(list) + @list = list.sort + end + + # Finds and returns the element in the list that represents the given + # percentile value. + # + # @param [ Number ] percentile a number in the range [1,100] + # + # @return [ Number ] the element of the list for the given percentile. + def [](percentile) + i = (list.size * percentile / 100.0).ceil - 1 + list[i] + end + end + end +end diff --git a/profile/benchmarking/summary.rb b/profile/benchmarking/summary.rb new file mode 100644 index 0000000000..93fddf5435 --- /dev/null +++ b/profile/benchmarking/summary.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for encapsulating the summary information for a + # benchmark, including behaviors for reporting on the summary. + class Summary + # @return [ Array ] the timings of each iteration in the + # benchmark + attr_reader :timings + + # @return [ Percentiles ] the percentiles object for querying the + # timing at a given percentile value. + attr_reader :percentiles + + # @return [ Numeric ] the composite score for the benchmark + attr_reader :score + + # Construct a new Summary object with the given timings, percentiles, + # and score. + # + # @param [ Array ] timings the timings of each iteration in the + # benchmark + # @param [ Percentiles ] percentiles the percentiles object for querying + # the timing at a given percentile value + # @param [ Numeric ] score the composite score for the benchmark + def initialize(timings, percentiles, score) + @timings = timings + @percentiles = percentiles + @score = score + end + + # @return [ Numeric ] the median timing for the benchmark. + def median + percentiles[50] + end + + # Formats and displays the results of a single benchmark run. + # + # @param [ Integer ] indent how much the report should be indented + # @param [ Array ] points the percentile points to report + # + # @return [ String ] a YAML-formatted summary + def summary(indent, points) + [].tap do |lines| + lines << format('%*sscore: %g', indent, '', score) + lines << format('%*smedian: %g', indent, '', median) + lines << format('%*spercentiles:', indent, '') + points.each do |pct| + lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct]) + end + end.join("\n") + end + end + end +end From 62d81d9b9875aa1d4391e73ff2dc888430ecba98 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 21 Aug 2023 10:29:38 -0600 Subject: [PATCH 6/6] fix merge artifact --- profile/benchmarking/helper.rb | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index e58e1d3a92..856cb7659b 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -98,22 +98,6 @@ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) end end - # Formats and displays a report of the given results. - # - # @param [ Hash ] results the results of a benchmarking run. - # @param [ Integer ] indent how much the report should be indented. - # @param [ Array ] percentiles the percentile values to report - def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) - results.each do |key, value| - puts format('%*s%s:', indent, '', key) - if value.is_a?(Hash) - report(value, indent: indent + 2, percentiles: percentiles) - else - puts value.summary(indent, percentiles) - end - end - end - # A utility class for returning the list item at a given percentile # value. class Percentiles