Skip to content

Commit b586746

Browse files
authored
docs: document how to use ai.map() for information extraction (#1808)
* doc: document how to use ai.map() for information extraction * fix lint
1 parent 582bbaf commit b586746

File tree

2 files changed

+64
-33
lines changed

2 files changed

+64
-33
lines changed

bigframes/operations/ai.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ def map(
117117
attach_logprobs=False,
118118
):
119119
"""
120-
Maps the DataFrame with the semantics of the user instruction.
120+
Maps the DataFrame with the semantics of the user instruction. The name of the keys in the output_schema parameter carry
121+
semantic meaning, and can be used for information extraction.
121122
122123
**Examples:**
123124
@@ -139,6 +140,22 @@ def map(
139140
<BLANKLINE>
140141
[2 rows x 3 columns]
141142
143+
144+
>>> import bigframes.pandas as bpd
145+
>>> bpd.options.display.progress_bar = None
146+
>>> bpd.options.experiments.ai_operators = True
147+
>>> bpd.options.compute.ai_ops_confirmation_threshold = 25
148+
149+
>>> import bigframes.ml.llm as llm
150+
>>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
151+
152+
>>> df = bpd.DataFrame({"text": ["Elmo lives at 123 Sesame Street."]})
153+
>>> df.ai.map("{text}", model=model, output_schema={"person": "string", "address": "string"})
154+
text person address
155+
0 Elmo lives at 123 Sesame Street. Elmo 123 Sesame Street
156+
<BLANKLINE>
157+
[1 rows x 3 columns]
158+
142159
Args:
143160
instruction (str):
144161
An instruction on how to map the data. This value must contain

notebooks/experimental/ai_operators.ipynb

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@
264264
"id": "hQft3o3OiouS"
265265
},
266266
"source": [
267-
"# API Samples"
267+
"# API Examples"
268268
]
269269
},
270270
{
@@ -403,7 +403,7 @@
403403
"name": "stderr",
404404
"output_type": "stream",
405405
"text": [
406-
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:109: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
406+
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
407407
"`db_dtypes` is a preview feature and subject to change.\n",
408408
" warnings.warn(msg, bfe.PreviewWarning)\n"
409409
]
@@ -594,7 +594,7 @@
594594
"name": "stderr",
595595
"output_type": "stream",
596596
"text": [
597-
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:109: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
597+
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
598598
"`db_dtypes` is a preview feature and subject to change.\n",
599599
" warnings.warn(msg, bfe.PreviewWarning)\n"
600600
]
@@ -676,7 +676,7 @@
676676
},
677677
{
678678
"cell_type": "code",
679-
"execution_count": null,
679+
"execution_count": 14,
680680
"metadata": {
681681
"colab": {
682682
"base_uri": "https://localhost:8080/",
@@ -685,12 +685,30 @@
685685
"id": "PpL24AQFiouS",
686686
"outputId": "e7aff038-bf4b-4833-def8-fe2648e8885b"
687687
},
688+
"outputs": [],
689+
"source": [
690+
"# df.ai.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
691+
]
692+
},
693+
{
694+
"cell_type": "markdown",
695+
"metadata": {},
696+
"source": [
697+
"### AI Extraction\n",
698+
"\n",
699+
"AI mapping is also able to extract multiple pieces of information based on your prompt, because the output schema keys can carry semantic meanings:"
700+
]
701+
},
702+
{
703+
"cell_type": "code",
704+
"execution_count": 15,
705+
"metadata": {},
688706
"outputs": [
689707
{
690708
"name": "stderr",
691709
"output_type": "stream",
692710
"text": [
693-
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:114: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
711+
"/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n",
694712
"`db_dtypes` is a preview feature and subject to change.\n",
695713
" warnings.warn(msg, bfe.PreviewWarning)\n"
696714
]
@@ -716,54 +734,50 @@
716734
" <thead>\n",
717735
" <tr style=\"text-align: right;\">\n",
718736
" <th></th>\n",
719-
" <th>ingredient_1</th>\n",
720-
" <th>ingredient_2</th>\n",
721-
" <th>food</th>\n",
737+
" <th>text</th>\n",
738+
" <th>person</th>\n",
739+
" <th>address</th>\n",
722740
" </tr>\n",
723741
" </thead>\n",
724742
" <tbody>\n",
725743
" <tr>\n",
726744
" <th>0</th>\n",
727-
" <td>Bun</td>\n",
728-
" <td>Beef Patty</td>\n",
729-
" <td>Burger</td>\n",
745+
" <td>Elmo lives at 123 Sesame Street.</td>\n",
746+
" <td>Elmo</td>\n",
747+
" <td>123 Sesame Street</td>\n",
730748
" </tr>\n",
731749
" <tr>\n",
732750
" <th>1</th>\n",
733-
" <td>Soy Bean</td>\n",
734-
" <td>Bittern</td>\n",
735-
" <td>Tofu</td>\n",
736-
" </tr>\n",
737-
" <tr>\n",
738-
" <th>2</th>\n",
739-
" <td>Sausage</td>\n",
740-
" <td>Long Bread</td>\n",
741-
" <td>Hotdog</td>\n",
751+
" <td>124 Conch Street is SpongeBob's home</td>\n",
752+
" <td>SpongeBob</td>\n",
753+
" <td>124 Conch Street</td>\n",
742754
" </tr>\n",
743755
" </tbody>\n",
744756
"</table>\n",
745-
"<p>3 rows × 3 columns</p>\n",
746-
"</div>[3 rows x 3 columns in total]"
757+
"<p>2 rows × 3 columns</p>\n",
758+
"</div>[2 rows x 3 columns in total]"
747759
],
748760
"text/plain": [
749-
" ingredient_1 ingredient_2 food\n",
750-
"0 Bun Beef Patty Burger\n",
751-
"\n",
752-
"1 Soy Bean Bittern Tofu\n",
753-
"\n",
754-
"2 Sausage Long Bread Hotdog\n",
755-
"\n",
761+
" text person address\n",
762+
"0 Elmo lives at 123 Sesame Street. Elmo 123 Sesame Street\n",
763+
"1 124 Conch Street is SpongeBob's home SpongeBob 124 Conch Street\n",
756764
"\n",
757-
"[3 rows x 3 columns]"
765+
"[2 rows x 3 columns]"
758766
]
759767
},
760-
"execution_count": 13,
768+
"execution_count": 15,
761769
"metadata": {},
762770
"output_type": "execute_result"
763771
}
764772
],
765773
"source": [
766-
"# df.ai.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
774+
"df = bpd.DataFrame({\n",
775+
" \"text\": [\n",
776+
" \"Elmo lives at 123 Sesame Street.\", \n",
777+
" \"124 Conch Street is SpongeBob's home\",\n",
778+
" ]\n",
779+
"})\n",
780+
"df.ai.map(\"{text}\", model=gemini_model, output_schema={\"person\": \"string\", \"address\": \"string\"})"
767781
]
768782
},
769783
{

0 commit comments

Comments
 (0)