parallelcodefoundry
diff --git a/‎drivers/problem-sizes.json
Lines changed: 36 additions & 36 deletions b/‎drivers/problem-sizes.json
Lines changed: 36 additions & 36 deletions
diff --git a/‎generate/generate-openai.py
Lines changed: 1 addition & 1 deletion b/‎generate/generate-openai.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎generate/translate-codellama-13b.sbatch
Lines changed: 45 additions & 0 deletions b/‎generate/translate-codellama-13b.sbatch
Lines changed: 45 additions & 0 deletions
diff --git a/‎generate/translate-codellama-34b.sbatch
Lines changed: 45 additions & 0 deletions b/‎generate/translate-codellama-34b.sbatch
Lines changed: 45 additions & 0 deletions
diff --git a/‎generate/translate-codellama-7b.sbatch
Lines changed: 45 additions & 0 deletions b/‎generate/translate-codellama-7b.sbatch
Lines changed: 45 additions & 0 deletions
@@ -4,7 +4,7 @@
     "omp": "(1<<10)",
     "mpi": "(1<<10)",
     "mpi+omp": "(1<<10)",
-    "kokkos": "(1<<10)",
+    "kokkos": "(1<<9)",
     "cuda": "(1<<10)",
     "hip": "(1<<10)"
   },
@@ -18,13 +18,13 @@
     "hip": "(1<<10)"
   },
   "02_dense_la_gemm": {
-    "serial": "(1<<10)",
-    "omp": "(1<<10)",
-    "mpi": "(1<<10)",
-    "mpi+omp": "(1<<10)",
-    "kokkos": "(1<<10)",
-    "cuda": "(1<<10)",
-    "hip": "(1<<10)"
+    "serial": "(1<<9)",
+    "omp": "(1<<9)",
+    "mpi": "(1<<9)",
+    "mpi+omp": "(1<<9)",
+    "kokkos": "(1<<9)",
+    "cuda": "(1<<9)",
+    "hip": "(1<<9)"
   },
   "03_dense_la_axpy": {
     "serial": "(1<<24)",
@@ -47,8 +47,8 @@
   "05_fft_inverse_fft": {
     "serial": "(1<<17)",
     "omp": "(1<<17)",
-    "mpi": "(1<<18)",
-    "mpi+omp": "(1<<18)",
+    "mpi": "(1<<17)",
+    "mpi+omp": "(1<<17)",
     "kokkos": "(1<<17)",
     "cuda": "(1<<17)",
     "hip": "(1<<17)"
@@ -72,22 +72,22 @@
     "hip": "(1<<18)"
   },
   "08_fft_split_fft": {
-    "serial": "(1<<18)",
-    "omp": "(1<<18)",
-    "mpi": "(1<<19)",
-    "mpi+omp": "(1<<19)",
-    "kokkos": "(1<<18)",
-    "cuda": "(1<<18)",
-    "hip": "(1<<18)"
+    "serial": "(1<<17)",
+    "omp": "(1<<17)",
+    "mpi": "(1<<17)",
+    "mpi+omp": "(1<<17)",
+    "kokkos": "(1<<17)",
+    "cuda": "(1<<17)",
+    "hip": "(1<<17)"
   },
   "09_fft_fft_out_of_place": {
-    "serial": "(1<<18)",
-    "omp": "(1<<18)",
-    "mpi": "(1<<19)",
-    "mpi+omp": "(1<<19)",
-    "kokkos": "(1<<18)",
-    "cuda": "(1<<18)",
-    "hip": "(1<<18)"
+    "serial": "(1<<17)",
+    "omp": "(1<<17)",
+    "mpi": "(1<<17)",
+    "mpi+omp": "(1<<17)",
+    "kokkos": "(1<<17)",
+    "cuda": "(1<<17)",
+    "hip": "(1<<17)"
   },
   "10_geometry_convex_hull": {
     "serial": "(1<<14)",
@@ -109,28 +109,28 @@
   },
   "12_geometry_smallest_triangle": {
     "serial": "(1<<8)",
-    "omp": "(1<<8)",
+    "omp": "(1<<7)",
     "mpi": "(1<<8)",
     "mpi+omp": "(1<<8)",
-    "kokkos": "(1<<8)",
+    "kokkos": "(1<<7)",
     "cuda": "(1<<10)",
     "hip": "(1<<10)"
   },
   "13_geometry_closest_pair_2d": {
     "serial": "(1<<14)",
-    "omp": "(1<<14)",
+    "omp": "(1<<13)",
     "mpi": "(1<<14)",
     "mpi+omp": "(1<<14)",
-    "kokkos": "(1<<14)",
+    "kokkos": "(1<<13)",
     "cuda": "(1<<14)",
     "hip": "(1<<14)"
   },
   "14_geometry_closest_pair_1d": {
     "serial": "(1<<14)",
-    "omp": "(1<<14)",
+    "omp": "(1<<13)",
     "mpi": "(1<<14)",
     "mpi+omp": "(1<<14)",
-    "kokkos": "(1<<14)",
+    "kokkos": "(1<<13)",
     "cuda": "(1<<14)",
     "hip": "(1<<14)"
   },
@@ -153,13 +153,13 @@
     "hip": "(1<<12)"
   },
   "17_graph_highest_degree": {
-    "serial": "(1<<13)",
-    "omp": "(1<<13)",
-    "mpi": "(1<<13)",
-    "mpi+omp": "(1<<13)",
+    "serial": "(1<<12)",
+    "omp": "(1<<12)",
+    "mpi": "(1<<12)",
+    "mpi+omp": "(1<<12)",
     "kokkos": "(1<<12)",
-    "cuda": "(1<<13)",
-    "hip": "(1<<13)"
+    "cuda": "(1<<12)",
+    "hip": "(1<<12)"
   },
   "18_graph_count_components": {
     "serial": "(1<<12)",
 
@@ -100,7 +100,7 @@ def get_max_requests_per_day(model: str) -> Optional[int]:
 def postprocess(prompt: str, output: str) -> str:
     """ Postprocess the output. """
     # remove leading ```, ```cpp, and trailing ```
-    output = output.lstrip("```cpp").lstrip("```").rstrip("```")
+    output = output.strip().lstrip("```cpp").lstrip("```").rstrip("```")
 
     # remove prompt if it included it
     if output.startswith(prompt):
 
@@ -0,0 +1,45 @@
+#!/bin/bash
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-task=1
+#SBATCH --mem=128000
+#SBATCH -t 12:00:0
+#SBATCH -A m2404
+#SBATCH -C gpu&hbm80g
+#SBATCH -q regular
+#SBATCH -J translate-codellama-13b
+#SBATCH -o translation-job-logs/translate-codellama-13b-%A.out
+
+# settings
+MODEL="codellama/CodeLlama-13b-hf"
+TEMP=0.2
+TOPP=0.95
+MAX_NEW_TKNS=1024
+SAMPLES_PER_PROMPT=20
+BATCH_SIZE=8
+hash=$(md5sum ../prompts/translation-prompts.json | cut -d' ' -f1)
+OUTPUT="../outputs/output_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.json"
+CACHE="../outputs/cache/cache_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.jsonl"
+echo "Writing to $OUTPUT"
+echo "model=$MODEL   MAX_NEW_TKNS=$MAX_NEW_TKNS   SAMPLES_PER_PROMPT=$SAMPLES_PER_PROMPT   BATCH_SIZE=$BATCH_SIZE"
+
+# setup
+#ml cuda/11.8.0
+source .env/bin/activate
+export HF_HOME=/pscratch/sd/d/dnicho/.cache/huggingface
+export OMP_NUM_THREADS=8
+export SLURM_CPU_BIND="cores"
+
+# generate
+srun python translate.py \
+    --model $MODEL \
+    --prompts ../prompts/translation-prompts.json \
+    --cache $CACHE \
+    --output $OUTPUT \
+    --temperature $TEMP \
+    --top_p $TOPP \
+    --do_sample \
+    --max_new_tokens $MAX_NEW_TKNS \
+    --num_samples_per_prompt $SAMPLES_PER_PROMPT \
+    --batch_size $BATCH_SIZE
@@ -0,0 +1,45 @@
+#!/bin/bash
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-task=1
+#SBATCH --mem=128000
+#SBATCH -t 14:00:0
+#SBATCH -A m2404
+#SBATCH -C gpu&hbm80g
+#SBATCH -q regular
+#SBATCH -J translate-codellama-34b
+#SBATCH -o translation-job-logs/translate-codellama-34b-%A.out
+
+# settings
+MODEL="codellama/CodeLlama-34b-hf"
+TEMP=0.2
+TOPP=0.95
+MAX_NEW_TKNS=1024
+SAMPLES_PER_PROMPT=20
+BATCH_SIZE=8
+hash=$(md5sum ../prompts/translation-prompts.json | cut -d' ' -f1)
+OUTPUT="../outputs/output_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.json"
+CACHE="../outputs/cache/cache_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.jsonl"
+echo "Writing to $OUTPUT"
+echo "model=$MODEL   MAX_NEW_TKNS=$MAX_NEW_TKNS   SAMPLES_PER_PROMPT=$SAMPLES_PER_PROMPT   BATCH_SIZE=$BATCH_SIZE"
+
+# setup
+#ml cuda/11.8.0
+source .env/bin/activate
+export HF_HOME=/pscratch/sd/d/dnicho/.cache/huggingface
+export OMP_NUM_THREADS=8
+export SLURM_CPU_BIND="cores"
+
+# generate
+srun python translate.py \
+    --model $MODEL \
+    --prompts ../prompts/translation-prompts.json \
+    --cache $CACHE \
+    --output $OUTPUT \
+    --temperature $TEMP \
+    --top_p $TOPP \
+    --do_sample \
+    --max_new_tokens $MAX_NEW_TKNS \
+    --num_samples_per_prompt $SAMPLES_PER_PROMPT \
+    --batch_size $BATCH_SIZE
@@ -0,0 +1,45 @@
+#!/bin/bash
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-task=1
+#SBATCH --mem=128000
+#SBATCH -t 06:00:0
+#SBATCH -A m2404
+#SBATCH -C gpu&hbm80g
+#SBATCH -q regular
+#SBATCH -J translate-codellama-7b
+#SBATCH -o translation-job-logs/translate-codellama-7b-%A.out
+
+# settings
+MODEL="codellama/CodeLlama-7b-hf"
+TEMP=0.2
+TOPP=0.95
+MAX_NEW_TKNS=1024
+SAMPLES_PER_PROMPT=20
+BATCH_SIZE=8
+hash=$(md5sum ../prompts/translation-prompts.json | cut -d' ' -f1)
+OUTPUT="../outputs/output_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.json"
+CACHE="../outputs/cache/cache_translate_${hash:0:8}_${MODEL//\//--}_temp${TEMP}.jsonl"
+echo "Writing to $OUTPUT"
+echo "model=$MODEL   MAX_NEW_TKNS=$MAX_NEW_TKNS   SAMPLES_PER_PROMPT=$SAMPLES_PER_PROMPT   BATCH_SIZE=$BATCH_SIZE"
+
+# setup
+#ml cuda/11.8.0
+source .env/bin/activate
+export HF_HOME=/pscratch/sd/d/dnicho/.cache/huggingface
+export OMP_NUM_THREADS=8
+export SLURM_CPU_BIND="cores"
+
+# generate
+srun python translate.py \
+    --model $MODEL \
+    --prompts ../prompts/translation-prompts.json \
+    --cache $CACHE \
+    --output $OUTPUT \
+    --temperature $TEMP \
+    --top_p $TOPP \
+    --do_sample \
+    --max_new_tokens $MAX_NEW_TKNS \
+    --num_samples_per_prompt $SAMPLES_PER_PROMPT \
+    --batch_size $BATCH_SIZE