parallelcodefoundry
diff --git a/‎drivers/cpp/cpp_driver_wrapper.py
Lines changed: 3 additions & 0 deletions b/‎drivers/cpp/cpp_driver_wrapper.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎drivers/driver_wrapper.py
Lines changed: 1 addition & 1 deletion b/‎drivers/driver_wrapper.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎generate/generate-magicoder-s-ds-prompted.sbatch
Lines changed: 45 additions & 0 deletions b/‎generate/generate-magicoder-s-ds-prompted.sbatch
Lines changed: 45 additions & 0 deletions
diff --git a/‎generate/generate.py
Lines changed: 2 additions & 2 deletions b/‎generate/generate.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎generate/utils.py
Lines changed: 162 additions & 31 deletions b/‎generate/utils.py
Lines changed: 162 additions & 31 deletions
@@ -106,6 +106,9 @@ def run(self, executable: PathLike, **run_config) -> RunOutput:
             run_process = run_command(launch_cmd, timeout=self.run_timeout, dry=self.dry)
         except subprocess.TimeoutExpired as e:
             return RunOutput(-1, str(e.stdout), f"[Timeout] {str(e.stderr)}", config=run_config)
+        except UnicodeDecodeError as e:
+            logging.warning(f"UnicodeDecodeError: {str(e)}\nRunnning command: {launch_cmd}")
+            return RunOutput(-1, "", f"UnicodeDecodeError: {str(e)}", config=run_config)
         return RunOutput(run_process.returncode, run_process.stdout, run_process.stderr, config=run_config)
 
     def test_single_output(self, prompt: str, output: str, test_driver_file: PathLike, problem_size: str) -> GeneratedTextResult:
 
@@ -46,7 +46,7 @@ def __init__(self, exit_code: int, stdout: str, stderr: str, config: dict = {}):
             logging.warning(f"Runtime is 0 for run with config {self.config}. Try increasing the problem size.")
         if self.is_valid and self.best_sequential_runtime == 0:
             logging.warning(f"The best sequential runtime is 0 for run with config {self.config}. Try increasing the problem size.")
-        if self.is_valid and self.best_sequential_runtime < 0.001:
+        if self.is_valid and self.best_sequential_runtime and self.best_sequential_runtime < 0.001:
             logging.warning(f"The best sequential runtime is very small ({self.best_sequential_runtime}) for run with config {self.config}. Try increasing the problem size.")
 
     def __repr__(self) -> str:
 
@@ -0,0 +1,45 @@
+#!/bin/bash
+#SBATCH -n 1
+#SBATCH -c 4
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-task=1
+#SBATCH --mem=128000
+#SBATCH -t 05:00:00
+#SBATCH -A m2404
+#SBATCH -C gpu&hbm80g
+#SBATCH -q regular
+#SBATCH -J generate-magicoder-s-ds-prompted
+#SBATCH -o generation-job-logs/generate-magicoder-s-ds-prompted-%A.out
+
+# settings
+MODEL="ise-uiuc/Magicoder-S-DS-6.7B"
+TEMP=0.2
+TOPP=0.95
+MAX_NEW_TKNS=2048
+SAMPLES_PER_PROMPT=20
+BATCH_SIZE=16
+hash=$(md5sum ../prompts/generation-prompts.json | cut -d' ' -f1)
+OUTPUT="../outputs/output_${hash:0:8}_${MODEL//\//--}_prompted_temp${TEMP}.json"
+CACHE="../outputs/cache/cache_${hash:0:8}_${MODEL//\//--}_prompted_temp${TEMP}.jsonl"
+echo "Writing to $OUTPUT"
+echo "model=$MODEL   MAX_NEW_TKNS=$MAX_NEW_TKNS   SAMPLES_PER_PROMPT=$SAMPLES_PER_PROMPT   BATCH_SIZE=$BATCH_SIZE"
+
+# setup
+#ml cuda/11.8.0
+source .env/bin/activate
+export HF_HOME=/pscratch/sd/d/dnicho/.cache/huggingface
+export OMP_NUM_THREADS=4
+
+# generate
+srun python generate.py \
+    --model $MODEL \
+    --prompts ../prompts/generation-prompts.json \
+    --output $OUTPUT \
+    --cache $CACHE \
+    --temperature $TEMP \
+    --top_p $TOPP \
+    --do_sample \
+    --max_new_tokens $MAX_NEW_TKNS \
+    --num_samples_per_prompt $SAMPLES_PER_PROMPT \
+    --batch_size $BATCH_SIZE \
+    --prompted
@@ -114,7 +114,7 @@
 )
 
 """ Iterate over prompts and generate code """
-if not args.restart and args.cache is not None:
+if not args.restart and args.cache is not None and os.path.exists(args.cache):
     with open(args.cache, 'r') as jsonl_file:
         responses = [json.loads(line) for line in jsonl_file]
         responses = [r for r in responses if r["temperature"] == args.temperature and r["prompted"] == args.prompted
@@ -133,7 +133,7 @@
         prompt_str = cur_prompt["prompt"]
 
     total_tokens += len(generator.tokenizer.encode(output[0]["generated_text"]))
-    cleaned_output = clean_output(output[0]["generated_text"], prompt_str)
+    cleaned_output = inference_config.clean_output(output[0]["generated_text"], prompt_str)
     cur_prompt["outputs"].append(cleaned_output)
 
     if idx % args.num_samples_per_prompt == args.num_samples_per_prompt - 1:
 
@@ -1,12 +1,71 @@
 # std imports
 from abc import ABC, abstractmethod
+import re
 
 # tpl imports
 import torch
 from torch.utils.data import Dataset
 from transformers import StoppingCriteria
 
 
+def clean_output(output : str, prompt : str) -> str:
+    """ Remove `prompt` from the begging of `output`.
+        Also truncate at the end of the function definition (i.e. matching closing brace).
+    """
+    # replace up to the end of the first instance of prompt
+    prompt_loc = output.find(prompt)
+    if prompt_loc == -1:
+        raise ValueError(f"Prompt not found in output: {prompt}")
+    output = output[prompt_loc + len(prompt):].strip()
+
+    # temporarily add opening brace to the beginning
+    output = '{' + output
+
+    # find the matching brace to output[0]
+    stack = []
+    index = 0
+    while index < len(output):
+        token = output[index]
+        if token == '{':
+            stack.append(token)
+        elif token == '}':
+            stack.pop()
+            if len(stack) == 0:
+                break
+
+        index += 1
+
+    # truncate at the matching brace
+    output = output[1:index+1]
+    return output
+
+GPU_FUNCTION_NAME_PATTERN = re.compile(r"__global__ void ([a-zA-Z0-9_]+)\(")
+CPU_FUNCTION_NAME_PATTERN = re.compile(r"\s*[a-zA-Z_]+ ([a-zA-Z0-9_]+)\(")
+def get_function_name(prompt: str, execution_model: str) -> str:
+    if execution_model in ['cuda', 'hip']:
+        match = GPU_FUNCTION_NAME_PATTERN.match(prompt.splitlines()[-1])
+    else:
+        match = CPU_FUNCTION_NAME_PATTERN.match(prompt.splitlines()[-1])
+    if match is None:
+        raise ValueError(f"Could not find function name in prompt: {prompt}")
+    return match.group(1)
+
+
+def find_matching_brace_index(code: str, open_brace_index: int) -> int:
+    """Finds the index of the closing brace that matches the opening brace at the given index."""
+
+    brace_count = 1
+    for i in range(open_brace_index + 1, len(code)):
+        if code[i] == "{":
+            brace_count += 1
+        elif code[i] == "}":
+            brace_count -= 1
+            if brace_count == 0:
+                return i
+
+    raise ValueError("Unmatched opening brace")
+
+
 class InferenceConfig(ABC):
 
     def __init__(self, prompted : bool = False):
@@ -36,6 +95,10 @@ def trust_remote_code(self) -> bool:
     def format_prompt(self, prompt : str) -> str:
         pass
 
+    @abstractmethod
+    def clean_output(self, output: str, prompt: str) -> str:
+        pass
+
 
 class StarCoderConfig(InferenceConfig):
 
@@ -63,6 +126,9 @@ def format_prompt(self, prompt : str) -> str:
             return f"<filename>solutions/solution_1.cpp\n// here is the correct implementation of the coding exercise\n\n{prompt}"
         return prompt.strip()
 
+    def clean_output(self, output: str, prompt: str) -> str:
+        return clean_output(output, prompt)
+
 class CodeLlamaConfig(InferenceConfig):
 
     def __init__(self, prompted : bool = False):
@@ -90,6 +156,8 @@ def format_prompt(self, prompt : str) -> str:
             return f"// filename: solutions/solution_1.cpp\n// here is the correct implementation of the coding exercise\n\n{prompt}"
         return prompt.strip()
 
+    def clean_output(self, output: str, prompt: str) -> str:
+        return clean_output(output, prompt)
 
 class PolyCoderConfig(InferenceConfig):
 
@@ -116,6 +184,9 @@ def format_prompt(self, prompt : str) -> str:
         if self.prompted:
             return f"// filename: solutions/solution_1.cpp\n// here is the correct implementation of the coding exercise\n\n{prompt}"
         return prompt.strip()
+    
+    def clean_output(self, output: str, prompt: str) -> str:
+        return clean_output(output, prompt)
 
 
 class PhindConfig(InferenceConfig):
@@ -144,6 +215,9 @@ def format_prompt(self, prompt : str) -> str:
             return f"// filename: solutions/solution_1.cpp\n// here is the correct implementation of the coding exercise\n\n{prompt}"
         return prompt.strip()
 
+    def clean_output(self, output: str, prompt: str) -> str:
+        return clean_output(output, prompt)
+
 
 class ReplitConfig(InferenceConfig):
 
@@ -174,6 +248,92 @@ def format_prompt(self, prompt : str) -> str:
             return f"// filename: solutions/solution_1.cpp\n// here is the correct implementation of the coding exercise\n\n{prompt}"
         return prompt.strip()
 
+    def clean_output(self, output: str, prompt: str) -> str:
+        return clean_output(output, prompt)
+
+
+class MagicoderConfig(InferenceConfig):
+
+    PROMPT_TEMPLATE = """You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.
+
+@@ Instruction
+{instruction}
+
+@@ Response
+"""
+
+    def __init__(self, prompted : bool = False):
+        super().__init__(prompted=prompted)
+
+    def get_dtype(self):
+        return torch.bfloat16
+
+    def init_padding(self, tokenizer):
+        tokenizer.pad_token_id = tokenizer.eos_token_id  # for batching
+        tokenizer.padding_side = "left"   # for decoder-only models
+        pass
+
+    def get_pad_token_id(self, tokenizer) -> int:
+        return tokenizer.pad_token_id
+
+    def get_eos_token_id(self, tokenizer) -> int:
+        return tokenizer.eos_token_id
+    
+    def trust_remote_code(self) -> bool:
+        return False
+
+    def format_prompt(self, prompt : str) -> str:
+        if self.prompted:
+            function_name = get_function_name(prompt, "cuda" if "__global__" in prompt else "serial")
+            prompt = f"Complete the following c++ function.\n```c++{prompt.strip()}```\nWrite only the function {function_name} and no other code. Enclose your solution in ```c++ and ```."
+            return self.PROMPT_TEMPLATE.format(instruction=prompt)
+        return prompt.strip()
+
+    def clean_output(self, output: str, prompt: str) -> str:
+        """ Clean LLM output to find code solution. The output should be in a ```c++ ``` code block. If there are
+            multiple, then it tries to find the block with the function definition (as contained in the prompt).
+            The code block itself may include the function definition and body OR just the body. This will try
+            to parse both.
+        """
+        # 0. replace up to the end of the first instance of prompt
+        prompt_loc = output.find("@@ Response")
+        if prompt_loc == -1:
+            raise ValueError(f"Prompt not found in output: {prompt}")
+        output = output[prompt_loc + len("@@ Response"):].strip()
+
+        # 1. Find all code blocks enclosed in triple backticks with "c++" language tag
+        code_blocks = re.findall(r"```c\+\+\n(.*?)\n```", output, flags=re.DOTALL)
+        code_blocks = [block.lstrip('```c++').rstrip('```') for block in code_blocks]
+
+        # 2. Prioritize code blocks containing the function definition from the prompt
+        sub_prompt = prompt.rstrip().removesuffix("@@ Response").rstrip().removesuffix("```").split("```")[-1]
+        function_name = get_function_name(sub_prompt, "cuda" if "__global__" in sub_prompt else "serial")
+        prioritized_blocks = [block for block in code_blocks if function_name in block]
+
+        # 3. Choose the first block if multiple match, or any block if none match
+        if len(code_blocks) > 0:
+            selected_block = prioritized_blocks[0] if prioritized_blocks else code_blocks[0]
+        else:
+            if '```c++' in output: # starts with ```c++ but it didn't finish
+                code_idx = output.find('```c++')
+                selected_block = output[code_idx:].removeprefix('```c++')
+            else:
+                selected_block = output
+
+        # 4. Handle cases where the block contains only the function body
+        if function_name not in selected_block:
+            return selected_block
+        else:
+            function_start_index = selected_block.index(function_name)
+            open_brace_index = selected_block.find("{", function_start_index)
+            try:
+                close_brace_index = find_matching_brace_index(selected_block, open_brace_index)
+            except ValueError:
+                close_brace_index = len(selected_block)
+
+            function_body = selected_block[open_brace_index + 1 : close_brace_index]
+            return function_body + "}"
+
 
 def get_inference_config(model_name : str, **kwargs) -> InferenceConfig:
     if model_name == "bigcode/starcoderbase":
@@ -186,41 +346,12 @@ def get_inference_config(model_name : str, **kwargs) -> InferenceConfig:
         return PhindConfig(**kwargs)
     elif model_name == 'replit/replit-code-v1_5-3b':
         return ReplitConfig(**kwargs)
+    elif model_name.startswith('ise-uiuc/Magicoder'):
+        return MagicoderConfig(**kwargs)
     else:
         raise ValueError(f"Unknown model name: {model_name}")
 
 
-def clean_output(output : str, prompt : str) -> str:
-    """ Remove `prompt` from the begging of `output`.
-        Also truncate at the end of the function definition (i.e. matching closing brace).
-    """
-    # replace up to the end of the first instance of prompt
-    prompt_loc = output.find(prompt)
-    if prompt_loc == -1:
-        raise ValueError(f"Prompt not found in output: {prompt}")
-    output = output[prompt_loc + len(prompt):].strip()
-
-    # temporarily add opening brace to the beginning
-    output = '{' + output
-
-    # find the matching brace to output[0]
-    stack = []
-    index = 0
-    while index < len(output):
-        token = output[index]
-        if token == '{':
-            stack.append(token)
-        elif token == '}':
-            stack.pop()
-            if len(stack) == 0:
-                break
-
-        index += 1
-
-    # truncate at the matching brace
-    output = output[1:index+1]
-    return output
-
 class PromptDataset(Dataset):
     ''' PyTorch dataset that simply wraps a list of strings. They do not have to have the same length.
     '''