diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index f4b9d85103..3b22f19539 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -807,13 +807,17 @@ def top_k( >>> import bigframes.ml.llm as llm >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001") - >>> df = bpd.DataFrame({"Animals": ["Dog", "Bird", "Cat", "Horse"]}) + >>> df = bpd.DataFrame( + ... { + ... "Animals": ["Dog", "Bird", "Cat", "Horse"], + ... "Sounds": ["Woof", "Chirp", "Meow", "Neigh"], + ... }) >>> df.semantics.top_k("{Animals} are more popular as pets", model=model, k=2) - Animals - 0 Dog - 2 Cat + Animals Sounds + 0 Dog Woof + 2 Cat Meow - [2 rows x 1 columns] + [2 rows x 2 columns] Args: instruction (str): @@ -911,14 +915,8 @@ def top_k( ) num_selected += num_new_selected - df = ( - df[df[status_column] > 0] - .drop(["index", status_column], axis=1) - .rename(columns={"old_index": "index"}) - .set_index("index") - ) - df.index.name = None - return df + result_df: bigframes.dataframe.DataFrame = self._df.copy() + return result_df[df.set_index("old_index")[status_column] > 0.0] @staticmethod def _topk_partition(