parallelcodefoundry · Dando18 · Jan 22, 2024 · Dec 1, 2023 · Dec 2, 2023 · Dec 3, 2023
diff --git a/analysis/all-metrics.sh b/analysis/all-metrics.sh
@@ -1,13 +1,40 @@
 #!/bin/sh
 
-
+# main metrics
 python metrics.py ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-7B --output ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/metrics.csv
 python metrics.py ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-13B --output ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/metrics.csv
 python metrics.py ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-34B --output ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/metrics.csv
-
 python metrics.py ../results/a8724ee8/starcoderbase_prompted_temp0.2/results.csv --model-name StarCoderBase --output ../results/a8724ee8/starcoderbase_prompted_temp0.2/metrics.csv
-
 python metrics.py ../results/a8724ee8/phind-v2_prompted_temp0.2/results.csv --model-name Phind-V2 --output ../results/a8724ee8/phind-v2_prompted_temp0.2/metrics.csv
-
 python metrics.py ../results/a8724ee8/gpt-3.5_temp0.2/results.csv --model-name GPT-3.5 --output ../results/a8724ee8/gpt-3.5_temp0.2/metrics.csv
-python metrics.py ../results/a8724ee8/gpt-4_temp0.2/results.csv --model-name GPT-4 --output ../results/a8724ee8/gpt-4_temp0.2/metrics.csv
+python metrics.py ../results/a8724ee8/gpt-4_temp0.2/results.csv --model-name GPT-4 --output ../results/a8724ee8/gpt-4_temp0.2/metrics.csv
+
+
+# mpi scaling metrics
+python metrics-scaling.py ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-7B -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-13B -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-34B -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/starcoderbase_prompted_temp0.2/results.csv --model-name StarCoderBase -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/starcoderbase_prompted_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/phind-v2_prompted_temp0.2/results.csv --model-name Phind-V2 -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/phind-v2_prompted_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-3.5_temp0.2/results.csv --model-name GPT-3.5 -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/gpt-3.5_temp0.2/metrics-scaling-mpi.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-4_temp0.2/results.csv --model-name GPT-4 -k 1 -n 1 2 4 8 16 32 64 128 256 512 --execution-model mpi --output ../results/a8724ee8/gpt-4_temp0.2/metrics-scaling-mpi.csv
+
+
+# omp scaling metrics
+python metrics-scaling.py ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-7B -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-13B -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-34B -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/starcoderbase_prompted_temp0.2/results.csv --model-name StarCoderBase -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/starcoderbase_prompted_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/phind-v2_prompted_temp0.2/results.csv --model-name Phind-V2 -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/phind-v2_prompted_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-3.5_temp0.2/results.csv --model-name GPT-3.5 -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/gpt-3.5_temp0.2/metrics-scaling-omp.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-4_temp0.2/results.csv --model-name GPT-4 -k 1 -n 1 2 4 8 16 32 64 --execution-model omp --output ../results/a8724ee8/gpt-4_temp0.2/metrics-scaling-omp.csv
+
+
+# kokkos scaling metrics
+python metrics-scaling.py ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-7B -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/codellama-7b-hf_prompted_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-13B -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/codellama-13b-hf_prompted_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/results.csv --model-name CodeLlama-34B -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/codellama-34b-hf_prompted_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/starcoderbase_prompted_temp0.2/results.csv --model-name StarCoderBase -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/starcoderbase_prompted_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/phind-v2_prompted_temp0.2/results.csv --model-name Phind-V2 -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/phind-v2_prompted_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-3.5_temp0.2/results.csv --model-name GPT-3.5 -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/gpt-3.5_temp0.2/metrics-scaling-kokkos.csv
+python metrics-scaling.py ../results/a8724ee8/gpt-4_temp0.2/results.csv --model-name GPT-4 -k 1 -n 1 2 4 8 16 32 --execution-model kokkos --output ../results/a8724ee8/gpt-4_temp0.2/metrics-scaling-kokkos.csv
diff --git a/analysis/metrics-scaling.py b/analysis/metrics-scaling.py
@@ -0,0 +1,207 @@
+""" Compute the metrics over the data for various resource counts.
+"""
+# std imports
+import argparse
+import json
+from math import comb
+from typing import Union
+
+# tpl imports
+import numpy as np
+import pandas as pd
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("input_csv", type=str, help="Input CSV file containing the test cases.")
+    parser.add_argument("-k", "--k", type=int, default=1, help="K value for speedup@k and efficiency@k")
+    parser.add_argument("-n", "--n", type=int, nargs='+', default=[1,2,4,8,16,32,64,128,256,512], help="Number of resources for speedup@k and efficiency@k")
+    parser.add_argument("--execution-model", choices=['mpi', 'mpi+omp', 'omp', 'kokkos'], default='mpi', help="Execution model to use for speedup@k and efficiency@k")
+    parser.add_argument("-o", "--output", type=str, help="Output csv file containing the results.")
+    parser.add_argument("--problem-sizes", type=str, default='../drivers/problem-sizes.json', help="Json with problem sizes. Used for calculating GPU efficiency.")
+    parser.add_argument("--model-name", type=str, help="Add model name column with this value")
+    return parser.parse_args()
+
+def nCr(n: int, r: int) -> int:
+    if n < r:
+        return 1
+    return comb(n, r)
+
+def _speedupk(runtimes: Union[pd.Series, np.ndarray], baseline_runtime: float, k: int, n: int) -> float:
+    """ Compute the speedup@k metric """
+    # create a copy of the runtimes
+    if isinstance(runtimes, pd.Series):
+        runtimes = runtimes.values.copy()
+    else:
+        runtimes = runtimes.copy()
+
+    # sort the runtimes
+    runtimes.sort()
+
+    # compute expected value
+    sum = 0.0
+    num_samples = runtimes.shape[0]
+    for j in range(1, num_samples+1):
+        num = nCr(j-1, k-1) * baseline_runtime
+        den = nCr(num_samples, k) * max(runtimes[j-1], 1e-8)
+        sum += num / den
+    return pd.Series({f"speedup_{n}@{k}": sum})
+
+def speedupk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
+    """ Compute the speedup@k metric """
+    df = df.copy()
+
+    # get all runs where is_valid is true
+    df = df[df["is_valid"] == True]
+
+    # choose processor count; hardcoded right now
+    df = df[df["n"] == n]
+    df = df.copy()
+
+    # use min best_sequential_runtime
+    df["best_sequential_runtime"] = df.groupby(["name", "parallelism_model", "output_idx"])["best_sequential_runtime"].transform("min")
+
+    # group by name, parallelism_model, and output_idx and call _speedupk
+    df = df.groupby(["name", "parallelism_model", "problem_type"]).apply(
+            lambda row: _speedupk(row["runtime"], np.min(row["best_sequential_runtime"]), k, n)
+        ).reset_index()
+
+    # compute the mean speedup@k
+    df = df.groupby(["parallelism_model", "problem_type"]).agg({f"speedup_{n}@{k}": "mean"})
+
+    return df
+
+def _efficiencyk(runtimes: Union[pd.Series, np.ndarray], baseline_runtime: float, k: int, n_resources: Union[pd.Series, np.ndarray]) -> float:
+    """ Compute the efficiency@k metric """
+    # create a copy of the runtimes
+    if isinstance(runtimes, pd.Series):
+        runtimes = runtimes.values.copy()
+    else:
+        runtimes = runtimes.copy()
+
+    if isinstance(n_resources, pd.Series):
+        n_resources = n_resources.values.copy()
+    else:
+        n_resources = n_resources.copy()
+
+    # sort the runtimes
+    runtimes.sort()
+
+    # make sure n_resources is all the same value and get that value
+    assert np.all(n_resources == n_resources[0])
+    n = int(n_resources[0])
+
+    # compute expected value
+    sum = 0.0
+    num_samples = runtimes.shape[0]
+    for j in range(1, num_samples+1):
+        num = nCr(j-1, k-1) * baseline_runtime
+        den = nCr(num_samples, k) * max(runtimes[j-1], 1e-8) * n_resources[j-1]
+        sum += num / den
+    return pd.Series({f"efficiency_{n}@{k}": sum})
+
+def efficiencyk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
+    """ Compute the efficiency@k metric """
+    df = df.copy()
+
+    # get all runs where is_valid is true
+    df = df[df["is_valid"] == True]
+
+    # choose processor count; hardcoded right now
+    df = df[df["n"] == n]
+    df = df.copy()
+
+    # use min best_sequential_runtime
+    df["best_sequential_runtime"] = df.groupby(["name", "parallelism_model", "output_idx"])["best_sequential_runtime"].transform("min")
+
+    # group by name, parallelism_model, and output_idx and call _efficiencyk
+    df = df.groupby(["name", "parallelism_model", "problem_type"]).apply(
+            lambda row: _efficiencyk(row["runtime"], np.min(row["best_sequential_runtime"]), k, row["n"])
+        ).reset_index()
+
+    # compute the mean efficiency@k
+    df = df.groupby(["parallelism_model", "problem_type"]).agg({f"efficiency_{n}@{k}": "mean"})
+
+    return df
+
+def parse_problem_size(problem_size: str) -> int:
+    """ problem size is of format '(1<<n)' """
+    num = problem_size.split("<<")[1][:-1]
+    return 2 ** int(num)
+
+def main():
+    args = get_args()
+
+    # read in input
+    df = pd.read_csv(args.input_csv)
+
+    # read in problem sizes
+    with open(args.problem_sizes, "r") as f:
+        problem_sizes = json.load(f)
+        for problem in problem_sizes:
+            for parallelism_model, problem_size in problem_sizes[problem].items():
+                df.loc[(df["name"] == problem) & (df["parallelism_model"] == parallelism_model), "problem_size"] = parse_problem_size(problem_size)
+
+    # remove rows where parallelism_model is kokkos and num_threads is 64
+    #df = df[~((df["parallelism_model"] == "kokkos") & (df["num_threads"] == 64))]
+
+    # filter/aggregate
+    df["did_run"] = df["did_run"].fillna(False)     # if it didn't build, then this will be nan; overwrite
+    df["is_valid"] = df["is_valid"].fillna(False)   # if it didn't build, then this will be nan; overwrite
+
+    if args.execution_model == "mpi":
+        df = df[df["parallelism_model"] == "mpi"]
+        df["n"] = df["num_procs"]
+    elif args.execution_model == "mpi+omp":
+        df = df[df["parallelism_model"] == "mpi+omp"]
+        df["n"] = df["num_procs"] * df["num_threads"]
+    elif args.execution_model == "omp":
+        df = df[df["parallelism_model"] == "omp"]
+        df["n"] = df["num_threads"]
+    elif args.execution_model == "kokkos":
+        df = df[df["parallelism_model"] == "kokkos"]
+        df["n"] = df["num_threads"]
+    else:
+        raise NotImplementedError(f"Unsupported execution model {args.execution_model}")
+
+    # get values for each k
+    all_results = []
+    for n in args.n:
+        speedup_values = speedupk(df, args.k, n)
+        efficiency_values = efficiencyk(df, args.k, n)
+        all_results.extend([speedup_values, efficiency_values])
+
+    # merge all_results; each df has one column and the same index
+    # build a new df with all the columns and the same index
+    merged_df = pd.concat(all_results, axis=1).reset_index()
+
+    # if there were no successfull builds or runs, then speedup@k will be nan after merging
+    # replace NaN speedup@k values with 0.0
+    for n in args.n:
+        merged_df[f"speedup_{n}@{args.k}"] = merged_df[f"speedup_{n}@{args.k}"].fillna(0.0)
+        merged_df[f"efficiency_{n}@{args.k}"] = merged_df[f"efficiency_{n}@{args.k}"].fillna(0.0)
+
+    # add model name column
+    if args.model_name:
+        merged_df.insert(0, "model_name", args.model_name)
+
+    # clean up column names
+    column_name_map = {
+        "model_name": "model",
+        "parallelism_model": "execution model",
+        "problem_type": "problem type",
+    }
+    merged_df = merged_df.rename(columns=column_name_map)
+
+    # write to csv
+    if args.output:
+        merged_df.to_csv(args.output, index=False)
+    else:
+        pd.set_option('display.max_columns', merged_df.shape[1]+1)
+        pd.set_option('display.max_rows', merged_df.shape[0]+1)
+        print(merged_df)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/drivers/cpp/benchmarks/geometry/10_geometry_convex_hull/baseline.hpp b/drivers/cpp/benchmarks/geometry/10_geometry_convex_hull/baseline.hpp
@@ -0,0 +1,54 @@
+#pragma once
+#include <vector>
+#include <algorithm>
+
+/* Find the set of points that defined the smallest convex polygon that contains all the points in the vector points. Store the result in `hull`.
+   Example:
+
+   input: [{0, 3}, {1, 1}, {2, 2}, {4, 4}, {0, 0}, {1, 2}, {3, 1}, {3, 3}]
+   output: [{0, 3}, {4, 4}, {3, 1}, {0, 0}]
+*/
+void NO_INLINE correctConvexHull(std::vector<Point> const& points, std::vector<Point> &hull) {
+    // The polygon needs to have at least three points
+    if (points.size() < 3)   {
+        hull = points;
+        return;
+    }
+
+    std::vector<Point> pointsSorted = points;
+
+    std::sort(pointsSorted.begin(), pointsSorted.end(), [](Point const& a, Point const& b) {
+        return a.x < b.x || (a.x == b.x && a.y < b.y);
+    });
+
+    auto CrossProduct = [](Point const& a, Point const& b, Point const& c) {
+        return (c.x - a.x) * (b.y - a.y) - (c.y - a.y) * (b.x - a.x) > 0;
+    };
+
+    std::vector<Point> upperHull;
+    std::vector<Point> lowerHull;
+    upperHull.push_back(pointsSorted[0]);
+    upperHull.push_back(pointsSorted[1]);
+
+    for (size_t i = 2; i < pointsSorted.size(); i++) {
+        while (upperHull.size() > 1
+               && !CrossProduct(upperHull[upperHull.size() - 2],
+                                upperHull[upperHull.size() - 1],
+                                pointsSorted[i])) {
+            upperHull.pop_back();
+        }
+        upperHull.push_back(pointsSorted[i]);
+
+        while (lowerHull.size() > 1
+               && !CrossProduct(lowerHull[lowerHull.size() - 2],
+                                lowerHull[lowerHull.size() - 1],
+                                pointsSorted[pointsSorted.size() - i - 1])) {
+            lowerHull.pop_back();
+        }
+        lowerHull.push_back(pointsSorted[pointsSorted.size() - i - 1]);
+    }
+    upperHull.insert(upperHull.end(), lowerHull.begin(), lowerHull.end());
+
+    hull = upperHull;
+    return;
+}