From 45b7b061c7210e541c3c2abd05d719455c40dad9 Mon Sep 17 00:00:00 2001 From: Miao Date: Tue, 20 May 2025 19:17:13 +0200 Subject: [PATCH 1/8] Update docs and examples for new standalone CLI (#524) * Update docs for standalond CLI usage * Update examples for standalond CLI usage * Refine some expressions to make docs clearer --- docs/docs/core/cli.mdx | 63 ++++++++---------------- docs/docs/core/flow_methods.mdx | 16 ++---- docs/docs/core/initialization.mdx | 54 ++++++++++---------- docs/docs/getting_started/quickstart.md | 39 +++------------ examples/amazon_s3_embedding/main.py | 4 -- examples/code_embedding/main.py | 4 -- examples/docs_to_knowledge_graph/main.py | 3 -- examples/fastapi_server_docker/main.py | 3 -- examples/gdrive_text_embedding/main.py | 4 -- examples/image_search_example/main.py | 6 --- examples/manuals_llm_extraction/main.py | 3 -- examples/pdf_embedding/main.py | 3 -- examples/product_recommendation/main.py | 3 -- examples/text_embedding/main.py | 3 -- examples/text_embedding_qdrant/main.py | 4 -- 15 files changed, 61 insertions(+), 151 deletions(-) diff --git a/docs/docs/core/cli.mdx b/docs/docs/core/cli.mdx index 832ab459..c0c31017 100644 --- a/docs/docs/core/cli.mdx +++ b/docs/docs/core/cli.mdx @@ -8,51 +8,30 @@ import TabItem from '@theme/TabItem'; # CocoIndex CLI -CocoIndex CLI embeds CLI functionality in your program. -It provides a bunch of commands for easily managing and inspecting your flows and indexes. +CocoIndex CLI is a standalone tool for easily managing and inspecting your flows and indexes. -## Enable CocoIndex CLI +## Invoking the CLI -### Use Packaged Main +Once CocoIndex is installed, you can invoke the CLI directly using the `cocoindex` command. Most commands require an `APP_TARGET` argument, which tells the CLI where your flow definitions are located. -The easiest way is to use a packaged main function: +**APP_TARGET Format:** - - +The `APP_TARGET` can be: +1. A **path to a Python file** defining your flows (e.g., `main.py`, `path/to/my_flows.py`). +2. An **installed Python module name** that contains your flow definitions (e.g., `my_package.flows`). +3. For commands that operate on a *specific flow* (like `show`, `update`, `evaluate`), you can combine the application reference with a flow name: + * `path/to/my_flows.py:MyFlow` + * `my_package.flows:MyFlow` -```python title="main.py" -import cocoindex +**Global Options:** -@cocoindex.main_fn() -def main(): - ... -``` +* `--env-file `: Load environment variables from a specified `.env` file. If not provided, `.env` in the current directory is loaded if it exists. +* `--version`: Show the CocoIndex version and exit. +* `--help`: Show the main help message and exit. - - - -With this, when the program is executed with `cocoindex` as its first argument, CocoIndex CLI will take over the control. For example: - -```sh -$ python main.py cocoindex ls # Run "ls" subcommand: list all flows -``` - -You may also provide a `cocoindex_cmd` argument to the `main_fn` decorator to change the command from `cocoindex` to something else. - -### Explicitly CLI Invoke - -An alternative way is to use `cocoindex.cli.cli` (with type [`click.Group`](https://click.palletsprojects.com/en/stable/api/#click.Group)). -For example, you may invoke the CLI explicitly with additional arguments: - - - - -```python -cocoindex.cli.cli.main(args) -``` - - - +:::caution Deprecated Usage +The old method of invoking the CLI using `python main.py cocoindex ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. +::: ## Subcommands @@ -60,8 +39,8 @@ The following subcommands are available: | Subcommand | Description | | ---------- | ----------- | -| `ls` | List all flows present in the current process. Or list all persisted flows under the current app namespace if `--all` is specified. | -| `show` | Show the spec for a specific flow. | +| `ls` | List all flows present in the given file/module. Or list all persisted flows under the current app namespace if no file/module specified. | +| `show` | Show the spec and schema for a specific flow. | | `setup` | Check and apply backend setup changes for flows, including the internal and target storage (to export). | | `drop` | Drop the backend setup for specified flows. | | `update` | Update the index defined by the flow. | @@ -71,6 +50,6 @@ The following subcommands are available: Use `--help` to see the full list of subcommands, and `subcommand --help` to see the usage of a specific one. ```sh -python main.py cocoindex --help # Show all subcommands -python main.py cocoindex show --help # Show usage of "show" subcommand +cocoindex --help # Show all subcommands +cocoindex show --help # Show usage of "show" subcommand ``` \ No newline at end of file diff --git a/docs/docs/core/flow_methods.mdx b/docs/docs/core/flow_methods.mdx index 3235cd0a..88fcc46a 100644 --- a/docs/docs/core/flow_methods.mdx +++ b/docs/docs/core/flow_methods.mdx @@ -30,17 +30,7 @@ def demo_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataSco ``` It creates a `demo_flow` object in `cocoindex.Flow` type. -To enable CLI, you also need to make sure you have a main function decorated with `@cocoindex.main_fn()`: - -```python title="main.py" -@cocoindex.main_fn() -def main(): - ... - -if __name__ == "__main__": - main() -``` @@ -78,7 +68,7 @@ The `cocoindex update` subcommand creates/updates data in the target storage. Once it's done, the target data is fresh up to the moment when the function is called. ```sh -python main.py cocoindex update +cocoindex update main.py ``` #### Library API @@ -115,7 +105,7 @@ Change capture mechanisms enable CocoIndex to continuously capture changes from To perform live update, run the `cocoindex update` subcommand with `-L` option: ```sh -python main.py cocoindex update -L +cocoindex update main.py -L ``` If there's at least one data source with change capture mechanism enabled, it will keep running until the aborted (e.g. by `Ctrl-C`). @@ -232,7 +222,7 @@ It takes the following options: Example: ```sh -python main.py cocoindex evaluate --output-dir ./eval_output +cocoindex evaluate main.py --output-dir ./eval_output ``` ### Library API diff --git a/docs/docs/core/initialization.mdx b/docs/docs/core/initialization.mdx index d2384d58..ae99ff0c 100644 --- a/docs/docs/core/initialization.mdx +++ b/docs/docs/core/initialization.mdx @@ -15,42 +15,42 @@ We'll talk about the code skeleton to initialize the library in your code, and t There're two options to initialize in your code: -* Use packaged main function. It's easier to start with. +* Use Cocoindex CLI. It's easier to start with. * Explicit initialization. It's more flexible. -### Packaged Main +### CLI-Based Initialization -The easiest way is to use a packaged main function: +When you use the `cocoindex` command-line tool, the library is automatically initialized for you: - - +1. **Environment File Loading**: + * By default, the `cocoindex` CLI searches upward from the current directory for a `.env` file. + * You can use `--env-file ` to specify one explicitly: -The `@cocoindex.main_fn` decorator wraps your main function for CocoIndex: - -```python -import cocoindex + ```sh + cocoindex --env-file path/to/custom.env ... + ``` -@cocoindex.main_fn() -def main(): - ... + * If no file is found, only existing system environment variables are used. + * Loaded variables do **not** override existing system ones. -if __name__ == "__main__": - main() -``` +2. **Automatic Library Initialization**: + * Then, the CLI automatically prepares everything using loaded environment variables — no manual setup required. + * Your script (e.g. `main.py`) is just used to discover defined flows. - - + See [Environment Variables](#environment-variables) for supported variables. -This takes care of the following effects: + The primary way to interact with CocoIndex in this setup is via CLI commands that operate on your script: + You interact with CocoIndex via CLI commands that operate on your script: -1. Initialize the library with settings loaded from environment variables, if not explicitly provided. -2. If the program is executed with the `cocoindex` command, CocoIndex CLI will take over the control. - It provides a bunch of commands for easily managing and inspecting indexes. - See [CocoIndex CLI](/docs/core/cli) for more details. -3. Otherwise, it will run the main function. + ```sh + # Example: List flows defined in my_app.py + cocoindex ls my_app.py -See [Environment Variables](#environment-variables) for supported environment variables. + # Example: Update a specific flow in my_app.py + cocoindex update my_app.py:MyFlowName + ``` + See [CocoIndex CLI](/docs/core/cli) for more details. ### Explicit Initialization @@ -123,7 +123,11 @@ If you use the Postgres database hosted by [Supabase](https://supabase.com/), pl ## Environment Variables -When you use the packaged main function, settings will be loaded from environment variables. +When using the CLI, settings are primarily loaded from environment variables. The CLI will: + +* Use the `--env-file` option if provided. +* Otherwise, try to locate a `.env` file by searching upward from the current directory. + Each setting field has a corresponding environment variable: | environment variable | corresponding field in `Settings` | required? | diff --git a/docs/docs/getting_started/quickstart.md b/docs/docs/getting_started/quickstart.md index 7fb4a550..7e4c6b0a 100644 --- a/docs/docs/getting_started/quickstart.md +++ b/docs/docs/getting_started/quickstart.md @@ -46,7 +46,7 @@ We'll need to install a bunch of dependencies for this project. 2. Prepare input files for the index. Put them in a directory, e.g. `markdown_files`. If you don't have any files at hand, you may download the example [markdown_files.zip](markdown_files.zip) and unzip it in the current directory. -## Step 2: Create the Python file `quickstart.py` +## Step 2: Define the indexing flow Create a new file `quickstart.py` and import the `cocoindex` library: @@ -54,11 +54,7 @@ Create a new file `quickstart.py` and import the `cocoindex` library: import cocoindex ``` -Then we'll create the indexing flow. - -### Step 2.1: Define the indexing flow - -Starting from the indexing flow: +Then we'll create the indexing flow as follows. ```python title="quickstart.py" @cocoindex.flow_def(name="TextEmbedding") @@ -117,24 +113,6 @@ Notes: 6. In CocoIndex, a *collector* collects multiple entries of data together. In this example, the `doc_embeddings` collector collects data from all `chunk`s across all `doc`s, and using the collected data to build a vector index `"doc_embeddings"`, using `Postgres`. -### Step 2.2: Define the main function - -We can provide an empty main function for now, with a `@cocoindex.main_fn()` decorator: - -```python title="quickstart.py" -@cocoindex.main_fn() -def _main(): - pass - -if __name__ == "__main__": - _main() -``` - -The `@cocoindex.main_fn` declares a function as the main function for an indexing application. This achieves the following effects: - -* Initialize the CocoIndex library states. Settings (e.g. database URL) are loaded from environment variables by default. -* When the CLI is invoked with `cocoindex` subcommand, `cocoindex CLI` takes over the control, which provides convenient ways to manage the index. See the next step for more details. - ## Step 3: Run the indexing pipeline and queries Specify the database URL by environment variable: @@ -148,7 +126,7 @@ export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/c We need to setup the index: ```bash -python quickstart.py cocoindex setup +cocoindex setup quickstart.py ``` Enter `yes` and it will automatically create a few tables in the database. @@ -160,7 +138,7 @@ Now we have tables needed by this CocoIndex flow. Now we're ready to build the index: ```bash -python quickstart.py cocoindex update +cocoindex update quickstart.py ``` It will run for a few seconds and output the following statistics: @@ -260,13 +238,12 @@ There're two CocoIndex-specific logic: It's done by the `eval()` method of the transform flow `text_to_embedding`. The return type of this method is `list[float]` as declared in the `text_to_embedding()` function (`cocoindex.DataSlice[list[float]]`). -### Step 4.3: Update the main function +### Step 4.3: Add the main script logic -Now we can update the main function to use the query function we just defined: +Now we can add the main logic to the program. It uses the query function we just defined: ```python title="quickstart.py" -@cocoindex.main_fn() -def _run(): +if __name__ == "__main__": # Initialize the database connection pool. pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) # Run queries in a loop to demonstrate the query capabilities. @@ -291,7 +268,7 @@ It interacts with users and search the database by calling the `search()` method ### Step 4.4: Run queries against the index -Now we can run the same Python file, which will run the new main function: +Now we can run the same Python file, which will run the new added main logic: ```bash python quickstart.py diff --git a/examples/amazon_s3_embedding/main.py b/examples/amazon_s3_embedding/main.py index d3730399..25bf0d61 100644 --- a/examples/amazon_s3_embedding/main.py +++ b/examples/amazon_s3_embedding/main.py @@ -1,5 +1,3 @@ -from dotenv import load_dotenv - import cocoindex import os @@ -52,7 +50,6 @@ def amazon_s3_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scop model="sentence-transformers/all-MiniLM-L6-v2")), default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -@cocoindex.main_fn() def _run(): # Use a `FlowLiveUpdater` to keep the flow data updated. with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow): @@ -73,5 +70,4 @@ def _run(): break if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/code_embedding/main.py b/examples/code_embedding/main.py index abd6d7b0..ec23a0f5 100644 --- a/examples/code_embedding/main.py +++ b/examples/code_embedding/main.py @@ -1,5 +1,3 @@ -from dotenv import load_dotenv - import cocoindex import os @@ -54,7 +52,6 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind query_transform_flow=code_to_embedding, default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -@cocoindex.main_fn() def _run(): # Run queries in a loop to demonstrate the query capabilities. while True: @@ -73,5 +70,4 @@ def _run(): break if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/docs_to_knowledge_graph/main.py b/examples/docs_to_knowledge_graph/main.py index 45fa184d..8c0ec9e9 100644 --- a/examples/docs_to_knowledge_graph/main.py +++ b/examples/docs_to_knowledge_graph/main.py @@ -2,7 +2,6 @@ This example shows how to extract relationships from documents and build a knowledge graph. """ import dataclasses -from dotenv import load_dotenv import cocoindex @dataclasses.dataclass @@ -148,10 +147,8 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D primary_key_fields=["id"], ) -@cocoindex.main_fn() def _run(): pass if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/fastapi_server_docker/main.py b/examples/fastapi_server_docker/main.py index 48c84eb7..8cb42b62 100644 --- a/examples/fastapi_server_docker/main.py +++ b/examples/fastapi_server_docker/main.py @@ -2,7 +2,6 @@ import uvicorn from fastapi import FastAPI -from dotenv import load_dotenv from src.cocoindex_funs import code_embedding_flow, code_to_embedding @@ -21,10 +20,8 @@ def query_endpoint(string: str): results, _ = query_handler.search(string, 10) return results -@cocoindex.main_fn() def _run(): uvicorn.run(fastapi_app, host="0.0.0.0", port=8080) if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/gdrive_text_embedding/main.py b/examples/gdrive_text_embedding/main.py index 6e82181c..2ed24916 100644 --- a/examples/gdrive_text_embedding/main.py +++ b/examples/gdrive_text_embedding/main.py @@ -1,5 +1,3 @@ -from dotenv import load_dotenv - import cocoindex import datetime import os @@ -51,7 +49,6 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: model="sentence-transformers/all-MiniLM-L6-v2")), default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -@cocoindex.main_fn() def _run(): # Use a `FlowLiveUpdater` to keep the flow data updated. with cocoindex.FlowLiveUpdater(gdrive_text_embedding_flow): @@ -72,5 +69,4 @@ def _run(): break if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/image_search_example/main.py b/examples/image_search_example/main.py index 80a80ca0..cf0448d5 100644 --- a/examples/image_search_example/main.py +++ b/examples/image_search_example/main.py @@ -1,4 +1,3 @@ -from dotenv import load_dotenv import cocoindex import datetime import os @@ -7,9 +6,6 @@ from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles -from cocoindex.lib import main_fn - -load_dotenv(override=True) OLLAMA_URL = "http://localhost:11434/api/generate" OLLAMA_MODEL = "gemma3" @@ -120,8 +116,6 @@ def search(q: str = Query(..., description="Search query"), limit: int = Query(5 }) return {"results": out} -# --- CLI entrypoint --- -@main_fn() def _run(): pass diff --git a/examples/manuals_llm_extraction/main.py b/examples/manuals_llm_extraction/main.py index 94e816de..0337dd54 100644 --- a/examples/manuals_llm_extraction/main.py +++ b/examples/manuals_llm_extraction/main.py @@ -1,7 +1,6 @@ import tempfile import dataclasses -from dotenv import load_dotenv from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered @@ -117,10 +116,8 @@ def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: coco primary_key_fields=["filename"], ) -@cocoindex.main_fn() def _run(): pass if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/pdf_embedding/main.py b/examples/pdf_embedding/main.py index 00b1ae51..2520cadb 100644 --- a/examples/pdf_embedding/main.py +++ b/examples/pdf_embedding/main.py @@ -1,6 +1,5 @@ import tempfile -from dotenv import load_dotenv from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered @@ -75,7 +74,6 @@ def pdf_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde query_transform_flow=text_to_embedding, default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -@cocoindex.main_fn() def _run(): # Run queries in a loop to demonstrate the query capabilities. while True: @@ -94,5 +92,4 @@ def _run(): break if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/product_recommendation/main.py b/examples/product_recommendation/main.py index 11568d23..df678637 100644 --- a/examples/product_recommendation/main.py +++ b/examples/product_recommendation/main.py @@ -3,7 +3,6 @@ """ import dataclasses import datetime -from dotenv import load_dotenv import cocoindex from jinja2 import Template @@ -176,10 +175,8 @@ def store_product_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde primary_key_fields=["id"], ) -@cocoindex.main_fn() def _run(): pass if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/text_embedding/main.py b/examples/text_embedding/main.py index e69e1e7c..6461ac48 100644 --- a/examples/text_embedding/main.py +++ b/examples/text_embedding/main.py @@ -1,4 +1,3 @@ -from dotenv import load_dotenv from psycopg_pool import ConnectionPool import cocoindex import os @@ -60,7 +59,6 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5): for row in cur.fetchall() ] -@cocoindex.main_fn() def _run(): # Initialize the database connection pool. pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) @@ -82,5 +80,4 @@ def _run(): break if __name__ == "__main__": - load_dotenv(override=True) _run() diff --git a/examples/text_embedding_qdrant/main.py b/examples/text_embedding_qdrant/main.py index 57f27a45..f84cdb24 100644 --- a/examples/text_embedding_qdrant/main.py +++ b/examples/text_embedding_qdrant/main.py @@ -1,5 +1,3 @@ -from dotenv import load_dotenv - import cocoindex @@ -66,7 +64,6 @@ def text_embedding_flow( ) -@cocoindex.main_fn() def _run(): # Run queries in a loop to demonstrate the query capabilities. while True: @@ -86,5 +83,4 @@ def _run(): if __name__ == "__main__": - load_dotenv(override=True) _run() From e76adb442ddce1ccd3e93bf2be51c49e3986799b Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 12:20:52 -0700 Subject: [PATCH 2/8] docs: update to `cocoindex main.py ...` --- docs/docs/core/cli.mdx | 2 +- examples/amazon_s3_embedding/README.md | 6 +++--- examples/code_embedding/README.md | 6 +++--- examples/docs_to_knowledge_graph/README.md | 6 +++--- examples/fastapi_server_docker/dockerfile | 2 +- examples/gdrive_text_embedding/README.md | 6 +++--- examples/image_search_example/README.md | 2 +- examples/manuals_llm_extraction/README.md | 6 +++--- examples/pdf_embedding/README.md | 6 +++--- examples/product_recommendation/README.md | 6 +++--- examples/text_embedding/README.md | 6 +++--- examples/text_embedding/Text_Embedding.ipynb | 4 ++-- examples/text_embedding_qdrant/README.md | 6 +++--- 13 files changed, 32 insertions(+), 32 deletions(-) diff --git a/docs/docs/core/cli.mdx b/docs/docs/core/cli.mdx index c0c31017..20ee162f 100644 --- a/docs/docs/core/cli.mdx +++ b/docs/docs/core/cli.mdx @@ -30,7 +30,7 @@ The `APP_TARGET` can be: * `--help`: Show the main help message and exit. :::caution Deprecated Usage -The old method of invoking the CLI using `python main.py cocoindex ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. +The old method of invoking the CLI using `cocoindex main.py ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. ::: ## Subcommands diff --git a/examples/amazon_s3_embedding/README.md b/examples/amazon_s3_embedding/README.md index d79a73b9..ec351f49 100644 --- a/examples/amazon_s3_embedding/README.md +++ b/examples/amazon_s3_embedding/README.md @@ -40,7 +40,7 @@ pip install -e . Setup: ```sh -python main.py cocoindex setup +cocoindex main.py setup ``` Run: @@ -59,13 +59,13 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ```sh -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` You can also add a `-L` flag to make the server keep updating the index to reflect source changes at the same time: ```sh -python main.py cocoindex server -ci -L +cocoindex main.py server -ci -L ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/code_embedding/README.md b/examples/code_embedding/README.md index dd7b9ee0..5219f165 100644 --- a/examples/code_embedding/README.md +++ b/examples/code_embedding/README.md @@ -42,13 +42,13 @@ We will match against user-provided text by a SQL query, reusing the embedding o - Setup: ```bash - python main.py cocoindex setup + cocoindex main.py setup ``` - Update index: ```bash - python main.py cocoindex update + cocoindex main.py update ``` - Run: @@ -62,7 +62,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run the following command to start CocoInsight: ``` -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). diff --git a/examples/docs_to_knowledge_graph/README.md b/examples/docs_to_knowledge_graph/README.md index 4cc3de12..01d917a3 100644 --- a/examples/docs_to_knowledge_graph/README.md +++ b/examples/docs_to_knowledge_graph/README.md @@ -34,13 +34,13 @@ pip install -e . Setup: ```bash -python main.py cocoindex setup +cocoindex main.py setup ``` Update index: ```bash -python main.py cocoindex update +cocoindex main.py update ``` ### Browse the knowledge graph @@ -66,7 +66,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` And then open the url https://cocoindex.io/cocoinsight. diff --git a/examples/fastapi_server_docker/dockerfile b/examples/fastapi_server_docker/dockerfile index c1bb35bd..9afd0fa3 100644 --- a/examples/fastapi_server_docker/dockerfile +++ b/examples/fastapi_server_docker/dockerfile @@ -10,4 +10,4 @@ COPY . . RUN cat .env -CMD ["sh", "-c", "echo yes | python main.py cocoindex setup && python main.py cocoindex update && python main.py"] +CMD ["sh", "-c", "echo yes | cocoindex main.py setup && cocoindex main.py update && python main.py"] diff --git a/examples/gdrive_text_embedding/README.md b/examples/gdrive_text_embedding/README.md index b0106a1f..29d1aeb1 100644 --- a/examples/gdrive_text_embedding/README.md +++ b/examples/gdrive_text_embedding/README.md @@ -34,7 +34,7 @@ pip install -e . Setup: ```sh -python main.py cocoindex setup +cocoindex main.py setup ``` Run: @@ -53,13 +53,13 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ```sh -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` You can also add a `-L` flag to make the server keep updating the index to reflect source changes at the same time: ```sh -python main.py cocoindex server -ci -L +cocoindex main.py server -ci -L ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/image_search_example/README.md b/examples/image_search_example/README.md index 5a1b4005..39073f67 100644 --- a/examples/image_search_example/README.md +++ b/examples/image_search_example/README.md @@ -48,7 +48,7 @@ pip install -r requirements.txt ## Run Backend ``` -python main.py cocoindex setup +cocoindex main.py setup uvicorn main:app --reload --host 0.0.0.0 --port 8000 ``` diff --git a/examples/manuals_llm_extraction/README.md b/examples/manuals_llm_extraction/README.md index a512aa92..a6a371e6 100644 --- a/examples/manuals_llm_extraction/README.md +++ b/examples/manuals_llm_extraction/README.md @@ -32,13 +32,13 @@ pip install -e . Setup: ```bash -python main.py cocoindex setup +cocoindex main.py setup ``` Update index: ```bash -python main.py cocoindex update +cocoindex main.py update ``` ### Query the index @@ -65,7 +65,7 @@ CocoInsight is a tool to help you understand your data pipeline and data index. Run CocoInsight to understand your RAG data pipeline: ``` -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). It connects to your local CocoIndex server with zero data retention. diff --git a/examples/pdf_embedding/README.md b/examples/pdf_embedding/README.md index 3dde765d..10cd537d 100644 --- a/examples/pdf_embedding/README.md +++ b/examples/pdf_embedding/README.md @@ -14,13 +14,13 @@ pip install -e . Setup: ```bash -python main.py cocoindex setup +cocoindex main.py setup ``` Update index: ```bash -python main.py cocoindex update +cocoindex main.py update ``` Run: @@ -35,7 +35,7 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ``` -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/product_recommendation/README.md b/examples/product_recommendation/README.md index 96565782..9a60d6a8 100644 --- a/examples/product_recommendation/README.md +++ b/examples/product_recommendation/README.md @@ -28,13 +28,13 @@ pip install -e . Setup: ```bash -python main.py cocoindex setup +cocoindex main.py setup ``` Update index: ```bash -python main.py cocoindex update +cocoindex main.py update ``` ### Browse the knowledge graph @@ -58,7 +58,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` And then open the url https://cocoindex.io/cocoinsight. diff --git a/examples/text_embedding/README.md b/examples/text_embedding/README.md index 2dd1dbb8..a8718540 100644 --- a/examples/text_embedding/README.md +++ b/examples/text_embedding/README.md @@ -35,13 +35,13 @@ pip install -e . Setup: ```bash -python main.py cocoindex setup +cocoindex main.py setup ``` Update index: ```bash -python main.py cocoindex update +cocoindex main.py update ``` Run: @@ -56,7 +56,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ``` -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). diff --git a/examples/text_embedding/Text_Embedding.ipynb b/examples/text_embedding/Text_Embedding.ipynb index 9bc26bd5..c9f58709 100644 --- a/examples/text_embedding/Text_Embedding.ipynb +++ b/examples/text_embedding/Text_Embedding.ipynb @@ -340,7 +340,7 @@ { "cell_type": "code", "source": [ - "!yes yes | python main.py cocoindex setup" + "!yes yes | cocoindex main.py setup" ], "metadata": { "id": "oBStjaI0Cli_" @@ -360,7 +360,7 @@ { "cell_type": "code", "source": [ - "!python main.py cocoindex update" + "!cocoindex main.py update" ], "metadata": { "id": "M9g6xIZHCn5T" diff --git a/examples/text_embedding_qdrant/README.md b/examples/text_embedding_qdrant/README.md index 3f91dc95..39ce04a8 100644 --- a/examples/text_embedding_qdrant/README.md +++ b/examples/text_embedding_qdrant/README.md @@ -59,13 +59,13 @@ We use Qdrant client to query the index, and reuse the embedding operation in th - Setup: ```bash - python main.py cocoindex setup + cocoindex main.py setup ``` - Update index: ```bash - python main.py cocoindex update + cocoindex main.py update ``` - Run: @@ -79,7 +79,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -python main.py cocoindex server -ci +cocoindex main.py server -ci ``` Open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). From a48c83116fc10ca489c9fd4acd7318acebb68f94 Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 12:37:39 -0700 Subject: [PATCH 3/8] docs: fix ordering --- docs/docs/core/cli.mdx | 2 +- examples/amazon_s3_embedding/README.md | 6 +++--- examples/code_embedding/README.md | 6 +++--- examples/docs_to_knowledge_graph/README.md | 6 +++--- examples/fastapi_server_docker/dockerfile | 2 +- examples/gdrive_text_embedding/README.md | 6 +++--- examples/image_search_example/README.md | 2 +- examples/manuals_llm_extraction/README.md | 6 +++--- examples/pdf_embedding/README.md | 6 +++--- examples/product_recommendation/README.md | 6 +++--- examples/text_embedding/README.md | 6 +++--- examples/text_embedding/Text_Embedding.ipynb | 4 ++-- examples/text_embedding_qdrant/README.md | 6 +++--- 13 files changed, 32 insertions(+), 32 deletions(-) diff --git a/docs/docs/core/cli.mdx b/docs/docs/core/cli.mdx index 20ee162f..c0c31017 100644 --- a/docs/docs/core/cli.mdx +++ b/docs/docs/core/cli.mdx @@ -30,7 +30,7 @@ The `APP_TARGET` can be: * `--help`: Show the main help message and exit. :::caution Deprecated Usage -The old method of invoking the CLI using `cocoindex main.py ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. +The old method of invoking the CLI using `python main.py cocoindex ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. ::: ## Subcommands diff --git a/examples/amazon_s3_embedding/README.md b/examples/amazon_s3_embedding/README.md index ec351f49..4f56ed58 100644 --- a/examples/amazon_s3_embedding/README.md +++ b/examples/amazon_s3_embedding/README.md @@ -40,7 +40,7 @@ pip install -e . Setup: ```sh -cocoindex main.py setup +cocoindex setup main.py ``` Run: @@ -59,13 +59,13 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ```sh -cocoindex main.py server -ci +cocoindex server -ci main.py ``` You can also add a `-L` flag to make the server keep updating the index to reflect source changes at the same time: ```sh -cocoindex main.py server -ci -L +cocoindex server -ci -L main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/code_embedding/README.md b/examples/code_embedding/README.md index 5219f165..09bb0cc1 100644 --- a/examples/code_embedding/README.md +++ b/examples/code_embedding/README.md @@ -42,13 +42,13 @@ We will match against user-provided text by a SQL query, reusing the embedding o - Setup: ```bash - cocoindex main.py setup + cocoindex setup main.py ``` - Update index: ```bash - cocoindex main.py update + cocoindex update main.py ``` - Run: @@ -62,7 +62,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run the following command to start CocoInsight: ``` -cocoindex main.py server -ci +cocoindex server -ci main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). diff --git a/examples/docs_to_knowledge_graph/README.md b/examples/docs_to_knowledge_graph/README.md index 01d917a3..e297db36 100644 --- a/examples/docs_to_knowledge_graph/README.md +++ b/examples/docs_to_knowledge_graph/README.md @@ -34,13 +34,13 @@ pip install -e . Setup: ```bash -cocoindex main.py setup +cocoindex setup main.py ``` Update index: ```bash -cocoindex main.py update +cocoindex update main.py ``` ### Browse the knowledge graph @@ -66,7 +66,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -cocoindex main.py server -ci +cocoindex server -ci main.py ``` And then open the url https://cocoindex.io/cocoinsight. diff --git a/examples/fastapi_server_docker/dockerfile b/examples/fastapi_server_docker/dockerfile index 9afd0fa3..70a041cb 100644 --- a/examples/fastapi_server_docker/dockerfile +++ b/examples/fastapi_server_docker/dockerfile @@ -10,4 +10,4 @@ COPY . . RUN cat .env -CMD ["sh", "-c", "echo yes | cocoindex main.py setup && cocoindex main.py update && python main.py"] +CMD ["sh", "-c", "echo yes | cocoindex setup main.py && cocoindex update main.py && python main.py"] diff --git a/examples/gdrive_text_embedding/README.md b/examples/gdrive_text_embedding/README.md index 29d1aeb1..73167ab8 100644 --- a/examples/gdrive_text_embedding/README.md +++ b/examples/gdrive_text_embedding/README.md @@ -34,7 +34,7 @@ pip install -e . Setup: ```sh -cocoindex main.py setup +cocoindex setup main.py ``` Run: @@ -53,13 +53,13 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ```sh -cocoindex main.py server -ci +cocoindex server -ci main.py ``` You can also add a `-L` flag to make the server keep updating the index to reflect source changes at the same time: ```sh -cocoindex main.py server -ci -L +cocoindex server -ci -L main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/image_search_example/README.md b/examples/image_search_example/README.md index 39073f67..3299ab45 100644 --- a/examples/image_search_example/README.md +++ b/examples/image_search_example/README.md @@ -48,7 +48,7 @@ pip install -r requirements.txt ## Run Backend ``` -cocoindex main.py setup +cocoindex setup main.py uvicorn main:app --reload --host 0.0.0.0 --port 8000 ``` diff --git a/examples/manuals_llm_extraction/README.md b/examples/manuals_llm_extraction/README.md index a6a371e6..22ef240a 100644 --- a/examples/manuals_llm_extraction/README.md +++ b/examples/manuals_llm_extraction/README.md @@ -32,13 +32,13 @@ pip install -e . Setup: ```bash -cocoindex main.py setup +cocoindex setup main.py ``` Update index: ```bash -cocoindex main.py update +cocoindex update main.py ``` ### Query the index @@ -65,7 +65,7 @@ CocoInsight is a tool to help you understand your data pipeline and data index. Run CocoInsight to understand your RAG data pipeline: ``` -cocoindex main.py server -ci +cocoindex server -ci main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). It connects to your local CocoIndex server with zero data retention. diff --git a/examples/pdf_embedding/README.md b/examples/pdf_embedding/README.md index 10cd537d..44771b94 100644 --- a/examples/pdf_embedding/README.md +++ b/examples/pdf_embedding/README.md @@ -14,13 +14,13 @@ pip install -e . Setup: ```bash -cocoindex main.py setup +cocoindex setup main.py ``` Update index: ```bash -cocoindex main.py update +cocoindex update main.py ``` Run: @@ -35,7 +35,7 @@ CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute vi Run CocoInsight to understand your RAG data pipeline: ``` -cocoindex main.py server -ci +cocoindex server -ci main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). \ No newline at end of file diff --git a/examples/product_recommendation/README.md b/examples/product_recommendation/README.md index 9a60d6a8..98e9be55 100644 --- a/examples/product_recommendation/README.md +++ b/examples/product_recommendation/README.md @@ -28,13 +28,13 @@ pip install -e . Setup: ```bash -cocoindex main.py setup +cocoindex setup main.py ``` Update index: ```bash -cocoindex main.py update +cocoindex update main.py ``` ### Browse the knowledge graph @@ -58,7 +58,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -cocoindex main.py server -ci +cocoindex server -ci main.py ``` And then open the url https://cocoindex.io/cocoinsight. diff --git a/examples/text_embedding/README.md b/examples/text_embedding/README.md index a8718540..ace7a716 100644 --- a/examples/text_embedding/README.md +++ b/examples/text_embedding/README.md @@ -35,13 +35,13 @@ pip install -e . Setup: ```bash -cocoindex main.py setup +cocoindex setup main.py ``` Update index: ```bash -cocoindex main.py update +cocoindex update main.py ``` Run: @@ -56,7 +56,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ``` -cocoindex main.py server -ci +cocoindex server -ci main.py ``` Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). diff --git a/examples/text_embedding/Text_Embedding.ipynb b/examples/text_embedding/Text_Embedding.ipynb index c9f58709..ff6c77e4 100644 --- a/examples/text_embedding/Text_Embedding.ipynb +++ b/examples/text_embedding/Text_Embedding.ipynb @@ -340,7 +340,7 @@ { "cell_type": "code", "source": [ - "!yes yes | cocoindex main.py setup" + "!yes yes | cocoindex setup main.py" ], "metadata": { "id": "oBStjaI0Cli_" @@ -360,7 +360,7 @@ { "cell_type": "code", "source": [ - "!cocoindex main.py update" + "!cocoindex update main.py" ], "metadata": { "id": "M9g6xIZHCn5T" diff --git a/examples/text_embedding_qdrant/README.md b/examples/text_embedding_qdrant/README.md index 39ce04a8..42bce2f3 100644 --- a/examples/text_embedding_qdrant/README.md +++ b/examples/text_embedding_qdrant/README.md @@ -59,13 +59,13 @@ We use Qdrant client to query the index, and reuse the embedding operation in th - Setup: ```bash - cocoindex main.py setup + cocoindex setup main.py ``` - Update index: ```bash - cocoindex main.py update + cocoindex update main.py ``` - Run: @@ -79,7 +79,7 @@ I used CocoInsight (Free beta now) to troubleshoot the index generation and unde It just connects to your local CocoIndex server, with Zero pipeline data retention. Run following command to start CocoInsight: ```bash -cocoindex main.py server -ci +cocoindex server -ci main.py ``` Open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). From d42735b739a8c117042a3c0fbb7580a60b5d347b Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 12:57:38 -0700 Subject: [PATCH 4/8] chore: update example main function to call `cocoindex.init()` if needed --- examples/amazon_s3_embedding/main.py | 8 ++++++-- examples/code_embedding/main.py | 14 +++++++++----- examples/docs_to_knowledge_graph/main.py | 6 ------ examples/fastapi_server_docker/main.py | 8 ++++---- examples/gdrive_text_embedding/main.py | 8 ++++++-- examples/image_search_example/main.py | 11 +++-------- examples/pdf_embedding/main.py | 7 +++++-- examples/text_embedding/main.py | 7 +++++-- examples/text_embedding_qdrant/main.py | 2 +- 9 files changed, 39 insertions(+), 32 deletions(-) diff --git a/examples/amazon_s3_embedding/main.py b/examples/amazon_s3_embedding/main.py index 25bf0d61..9bcc8e3e 100644 --- a/examples/amazon_s3_embedding/main.py +++ b/examples/amazon_s3_embedding/main.py @@ -1,3 +1,5 @@ +from dotenv import load_dotenv + import cocoindex import os @@ -50,7 +52,7 @@ def amazon_s3_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scop model="sentence-transformers/all-MiniLM-L6-v2")), default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -def _run(): +def _main(): # Use a `FlowLiveUpdater` to keep the flow data updated. with cocoindex.FlowLiveUpdater(amazon_s3_text_embedding_flow): # Run queries in a loop to demonstrate the query capabilities. @@ -70,4 +72,6 @@ def _run(): break if __name__ == "__main__": - _run() + load_dotenv() + cocoindex.init() + _main() diff --git a/examples/code_embedding/main.py b/examples/code_embedding/main.py index d87b173c..b7bd68f7 100644 --- a/examples/code_embedding/main.py +++ b/examples/code_embedding/main.py @@ -8,7 +8,8 @@ def extract_extension(filename: str) -> str: """Extract the extension of a filename.""" return os.path.splitext(filename)[1] -def code_to_embedding(text: cocoindex.DataSlice) -> cocoindex.DataSlice: +@cocoindex.transform_flow() +def code_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]: """ Embed the text using a SentenceTransformer model. """ @@ -65,23 +66,26 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5): ] def _main(): + # Initialize the database connection pool. + pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) # Run queries in a loop to demonstrate the query capabilities. while True: try: query = input("Enter search query (or Enter to quit): ") if query == '': break - results, _ = query_handler.search(query, 10) + # Run the query function with the database connection pool and the query. + results = search(pool, query) print("\nSearch results:") for result in results: - print(f"[{result.score:.3f}] {result.data['filename']}") - print(f" {result.data['code']}") + print(f"[{result['score']:.3f}] {result['filename']}") + print(f" {result['code']}") print("---") print() except KeyboardInterrupt: break if __name__ == "__main__": - load_dotenv(override=True) + load_dotenv() cocoindex.init() _main() diff --git a/examples/docs_to_knowledge_graph/main.py b/examples/docs_to_knowledge_graph/main.py index 8c0ec9e9..ef4b9ed2 100644 --- a/examples/docs_to_knowledge_graph/main.py +++ b/examples/docs_to_knowledge_graph/main.py @@ -146,9 +146,3 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D ), primary_key_fields=["id"], ) - -def _run(): - pass - -if __name__ == "__main__": - _run() diff --git a/examples/fastapi_server_docker/main.py b/examples/fastapi_server_docker/main.py index 8cb42b62..7ff9d943 100644 --- a/examples/fastapi_server_docker/main.py +++ b/examples/fastapi_server_docker/main.py @@ -2,6 +2,7 @@ import uvicorn from fastapi import FastAPI +from dotenv import load_dotenv from src.cocoindex_funs import code_embedding_flow, code_to_embedding @@ -20,8 +21,7 @@ def query_endpoint(string: str): results, _ = query_handler.search(string, 10) return results -def _run(): - uvicorn.run(fastapi_app, host="0.0.0.0", port=8080) - if __name__ == "__main__": - _run() + load_dotenv() + cocoindex.init() + uvicorn.run(fastapi_app, host="0.0.0.0", port=8080) diff --git a/examples/gdrive_text_embedding/main.py b/examples/gdrive_text_embedding/main.py index 2ed24916..7e37ca7e 100644 --- a/examples/gdrive_text_embedding/main.py +++ b/examples/gdrive_text_embedding/main.py @@ -1,3 +1,5 @@ +from dotenv import load_dotenv + import cocoindex import datetime import os @@ -49,7 +51,7 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: model="sentence-transformers/all-MiniLM-L6-v2")), default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -def _run(): +def _main(): # Use a `FlowLiveUpdater` to keep the flow data updated. with cocoindex.FlowLiveUpdater(gdrive_text_embedding_flow): # Run queries in a loop to demonstrate the query capabilities. @@ -69,4 +71,6 @@ def _run(): break if __name__ == "__main__": - _run() + load_dotenv() + cocoindex.init() + _main() diff --git a/examples/image_search_example/main.py b/examples/image_search_example/main.py index cf0448d5..7ea2e9eb 100644 --- a/examples/image_search_example/main.py +++ b/examples/image_search_example/main.py @@ -1,3 +1,4 @@ +from dotenv import load_dotenv import cocoindex import datetime import os @@ -90,8 +91,8 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: # --- CocoIndex initialization on startup --- @app.on_event("startup") def startup_event(): - settings = cocoindex.Settings.from_env() - cocoindex.init(settings) + load_dotenv() + cocoindex.init() app.state.query_handler = cocoindex.query.SimpleSemanticsQueryHandler( name="ImageObjectSearch", flow=image_object_embedding_flow, @@ -115,9 +116,3 @@ def search(q: str = Query(..., description="Search query"), limit: int = Query(5 "score": result.score }) return {"results": out} - -def _run(): - pass - -if __name__ == "__main__": - _run() \ No newline at end of file diff --git a/examples/pdf_embedding/main.py b/examples/pdf_embedding/main.py index 2520cadb..94a56401 100644 --- a/examples/pdf_embedding/main.py +++ b/examples/pdf_embedding/main.py @@ -1,5 +1,6 @@ import tempfile +from dotenv import load_dotenv from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered @@ -74,7 +75,7 @@ def pdf_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde query_transform_flow=text_to_embedding, default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) -def _run(): +def _main(): # Run queries in a loop to demonstrate the query capabilities. while True: try: @@ -92,4 +93,6 @@ def _run(): break if __name__ == "__main__": - _run() + load_dotenv() + cocoindex.init() + _main() diff --git a/examples/text_embedding/main.py b/examples/text_embedding/main.py index 6461ac48..db867f43 100644 --- a/examples/text_embedding/main.py +++ b/examples/text_embedding/main.py @@ -1,3 +1,4 @@ +from dotenv import load_dotenv from psycopg_pool import ConnectionPool import cocoindex import os @@ -59,7 +60,7 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5): for row in cur.fetchall() ] -def _run(): +def _main(): # Initialize the database connection pool. pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) # Run queries in a loop to demonstrate the query capabilities. @@ -80,4 +81,6 @@ def _run(): break if __name__ == "__main__": - _run() + load_dotenv() + cocoindex.init() + _main() diff --git a/examples/text_embedding_qdrant/main.py b/examples/text_embedding_qdrant/main.py index 2f53c9a3..81a621f2 100644 --- a/examples/text_embedding_qdrant/main.py +++ b/examples/text_embedding_qdrant/main.py @@ -92,6 +92,6 @@ def _main(): if __name__ == "__main__": - load_dotenv(override=True) + load_dotenv() cocoindex.init() _main() From fda611c65c043bc27628c238d0b0a6d5ea7941c5 Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 13:00:04 -0700 Subject: [PATCH 5/8] docs: add `cocoindex.init()` call for quickstart --- docs/docs/getting_started/quickstart.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/docs/getting_started/quickstart.md b/docs/docs/getting_started/quickstart.md index 7e4c6b0a..13338937 100644 --- a/docs/docs/getting_started/quickstart.md +++ b/docs/docs/getting_started/quickstart.md @@ -244,6 +244,9 @@ Now we can add the main logic to the program. It uses the query function we just ```python title="quickstart.py" if __name__ == "__main__": + # Initialize CocoIndex library states + cocoindex.init() + # Initialize the database connection pool. pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) # Run queries in a loop to demonstrate the query capabilities. From fc0bdb6a70463ef2aebe0fb7a44c984d2c77e526 Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 16:14:07 -0700 Subject: [PATCH 6/8] docs: further update docs for new CLI --- docs/docs/core/basics.md | 2 +- docs/docs/core/cli.mdx | 26 +++++- docs/docs/core/flow_def.mdx | 2 +- docs/docs/core/flow_methods.mdx | 4 +- docs/docs/core/initialization.mdx | 138 ------------------------------ docs/docs/core/settings.mdx | 116 +++++++++++++++++++++++++ docs/docs/ops/storages.md | 4 +- docs/docusaurus.config.ts | 11 +++ docs/package.json | 1 + docs/sidebars.ts | 2 +- docs/yarn.lock | 15 ++++ 11 files changed, 173 insertions(+), 148 deletions(-) delete mode 100644 docs/docs/core/initialization.mdx create mode 100644 docs/docs/core/settings.mdx diff --git a/docs/docs/core/basics.md b/docs/docs/core/basics.md index 4c8fb243..190477ae 100644 --- a/docs/docs/core/basics.md +++ b/docs/docs/core/basics.md @@ -101,4 +101,4 @@ As an indexing flow is long-lived, it needs to store intermediate data to keep t CocoIndex uses internal storage for this purpose. Currently, CocoIndex uses Postgres database as the internal storage. -See [Initialization](initialization) for configuring its location, and `cocoindex setup` CLI command (see [CocoIndex CLI](cli)) creates tables for the internal storage. \ No newline at end of file +See [Settings](settings#databaseconnectionspec) for configuring its location, and `cocoindex setup` CLI command (see [CocoIndex CLI](cli)) creates tables for the internal storage. \ No newline at end of file diff --git a/docs/docs/core/cli.mdx b/docs/docs/core/cli.mdx index c0c31017..398c5c31 100644 --- a/docs/docs/core/cli.mdx +++ b/docs/docs/core/cli.mdx @@ -10,11 +10,11 @@ import TabItem from '@theme/TabItem'; CocoIndex CLI is a standalone tool for easily managing and inspecting your flows and indexes. -## Invoking the CLI +## Invoke the CLI Once CocoIndex is installed, you can invoke the CLI directly using the `cocoindex` command. Most commands require an `APP_TARGET` argument, which tells the CLI where your flow definitions are located. -**APP_TARGET Format:** +### APP_TARGET Format The `APP_TARGET` can be: 1. A **path to a Python file** defining your flows (e.g., `main.py`, `path/to/my_flows.py`). @@ -23,14 +23,34 @@ The `APP_TARGET` can be: * `path/to/my_flows.py:MyFlow` * `my_package.flows:MyFlow` -**Global Options:** +### Environment Variables + +Environment variables are needed as CocoIndex library settings, as described in [CocoIndex Settings](settings#list-of-environment-variables). + +You can set environment variables in an environment file. + +* By default, the `cocoindex` CLI searches upward from the current directory for a `.env` file. +* You can use `--env-file ` to specify one explicitly: + + ```sh + cocoindex --env-file path/to/custom.env ... + ``` + +Loaded variables do *NOT* override existing system ones. +If no file is found, only existing system environment variables are used. + +### Global Options + +CocoIndex CLI supports the following global options: * `--env-file `: Load environment variables from a specified `.env` file. If not provided, `.env` in the current directory is loaded if it exists. * `--version`: Show the CocoIndex version and exit. * `--help`: Show the main help message and exit. :::caution Deprecated Usage + The old method of invoking the CLI using `python main.py cocoindex ...` via the `@cocoindex.main_fn()` decorator is now deprecated. Please remove `@cocoindex.main_fn()` from your scripts and use the standalone cocoindex command as described. + ::: ## Subcommands diff --git a/docs/docs/core/flow_def.mdx b/docs/docs/core/flow_def.mdx index 45bd7aba..a0f1c2cc 100644 --- a/docs/docs/core/flow_def.mdx +++ b/docs/docs/core/flow_def.mdx @@ -313,7 +313,7 @@ Following metrics are supported: ### Getting App Namespace -You can use the [`app_namespace` setting](initialization#app-namespace) or `COCOINDEX_APP_NAMESPACE` environment variable to specify the app namespace, +You can use the [`app_namespace` setting](settings#app-namespace) or `COCOINDEX_APP_NAMESPACE` environment variable to specify the app namespace, to organize flows across different environments (e.g., dev, staging, production), team members, etc. In the code, You can call `flow.get_app_namespace()` to get the app namespace, and use it to name certain backends. It takes the following arguments: diff --git a/docs/docs/core/flow_methods.mdx b/docs/docs/core/flow_methods.mdx index 88fcc46a..fe52bb7a 100644 --- a/docs/docs/core/flow_methods.mdx +++ b/docs/docs/core/flow_methods.mdx @@ -1,5 +1,5 @@ --- -title: Flow Running +title: Run a Flow toc_max_heading_level: 4 description: Run a CocoIndex Flow, including build / update data in the target storage and evaluate the flow without changing the target storage. --- @@ -7,7 +7,7 @@ description: Run a CocoIndex Flow, including build / update data in the target s import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Running a CocoIndex Flow +# Run a CocoIndex Flow After a flow is defined as discussed in [Flow Definition](/docs/core/flow_def), you can start to transform data with it. diff --git a/docs/docs/core/initialization.mdx b/docs/docs/core/initialization.mdx deleted file mode 100644 index ae99ff0c..00000000 --- a/docs/docs/core/initialization.mdx +++ /dev/null @@ -1,138 +0,0 @@ ---- -title: Initialization -description: Initialize and set environment for CocoIndex library ---- - -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -# Initialize the CocoIndex Library - -Before everything starts, the CocoIndex library needs to be initialized with settings. -We'll talk about the code skeleton to initialize the library in your code, and the way to provide settings for CocoIndex. - -## Initialize the library - -There're two options to initialize in your code: - -* Use Cocoindex CLI. It's easier to start with. -* Explicit initialization. It's more flexible. - -### CLI-Based Initialization - -When you use the `cocoindex` command-line tool, the library is automatically initialized for you: - -1. **Environment File Loading**: - * By default, the `cocoindex` CLI searches upward from the current directory for a `.env` file. - * You can use `--env-file ` to specify one explicitly: - - ```sh - cocoindex --env-file path/to/custom.env ... - ``` - - * If no file is found, only existing system environment variables are used. - * Loaded variables do **not** override existing system ones. - -2. **Automatic Library Initialization**: - * Then, the CLI automatically prepares everything using loaded environment variables — no manual setup required. - * Your script (e.g. `main.py`) is just used to discover defined flows. - - See [Environment Variables](#environment-variables) for supported variables. - - The primary way to interact with CocoIndex in this setup is via CLI commands that operate on your script: - You interact with CocoIndex via CLI commands that operate on your script: - - ```sh - # Example: List flows defined in my_app.py - cocoindex ls my_app.py - - # Example: Update a specific flow in my_app.py - cocoindex update my_app.py:MyFlowName - ``` - - See [CocoIndex CLI](/docs/core/cli) for more details. - -### Explicit Initialization - -Alternatively, for flexibility, you can also explicitly initialize the library by the `init()` function: - - - - -```python -import cocoindex - -def main(): - ... - cocoindex.init( - cocoindex.Settings( - database=cocoindex.DatabaseConnectionSpec( - url="postgres://cocoindex:cocoindex@localhost/cocoindex" - ))) - ... - -... -if __name__ == "__main__": - main() -``` - - - - -## Settings - -`cocoindex.Settings` is used to configure the CocoIndex library. It's a dataclass that contains the following fields: - -* `app_namespace` (type: `str`, required): The namespace of the application. -* `database` (type: `DatabaseConnectionSpec`, required): The connection to the Postgres database. - -### App Namespace - -The `app_namespace` field helps organize flows across different environments (e.g., dev, staging, production), team members, etc. When set, it prefixes flow names with the namespace. - -For example, if the namespace is `Staging`, for a flow with name specified as `Flow1` in code, the full name of the flow will be `Staging.Flow1`. -You can also get the current app namespace by calling `cocoindex.get_app_namespace()` (see [Getting App Namespace](flow_def#getting-app-namespace) for more details). - -If not set, all flows are in a default unnamed namespace. - -You can also control it by the `COCOINDEX_APP_NAMESPACE` environment variable. - -### DatabaseConnectionSpec - -`DatabaseConnectionSpec` configures the connection to a database. Only Postgres is supported for now. It has the following fields: - -* `url` (type: `str`, required): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`. -* `user` (type: `str`, optional): The username for the Postgres database. If not provided, username will come from `url`. -* `password` (type: `str`, optional): The password for the Postgres database. If not provided, password will come from `url`. - -:::tip - -Please be careful that all values in `url` needs to be url-encoded if they contain special characters. -For this reason, prefer to use the separated `user` and `password` fields for username and password. - -::: - -:::info - -If you use the Postgres database hosted by [Supabase](https://supabase.com/), please click **Connect** on your project dashboard and find the following URL: - -* If you're on a IPv6 network, use the URL under **Direct connection**. You can visit [IPv6 test](https://test-ipv6.com/) to see if you have IPv6 Internet connection. -* Otherwise, use the URL under **Session pooler**. - -::: - -## Environment Variables - -When using the CLI, settings are primarily loaded from environment variables. The CLI will: - -* Use the `--env-file` option if provided. -* Otherwise, try to locate a `.env` file by searching upward from the current directory. - -Each setting field has a corresponding environment variable: - -| environment variable | corresponding field in `Settings` | required? | -|---------------------|-------------------|----------| -| `COCOINDEX_APP_NAMESPACE` | `app_namespace` | No | -| `COCOINDEX_DATABASE_URL` | `database.url` | Yes | -| `COCOINDEX_DATABASE_USER` | `database.user` | No | -| `COCOINDEX_DATABASE_PASSWORD` | `database.password` | No | diff --git a/docs/docs/core/settings.mdx b/docs/docs/core/settings.mdx new file mode 100644 index 00000000..a34af137 --- /dev/null +++ b/docs/docs/core/settings.mdx @@ -0,0 +1,116 @@ +--- +title: CocoIndex Settings +description: Provide settings for CocoIndex, e.g. database connection, app namespace, etc. +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# CocoIndex Settings + +Certain settings need to be provided for CocoIndex to work, e.g. database connections, app namespace, etc. + +## Launch CocoIndex + +You have two ways to launch CocoIndex: + +* Use [Cocoindex CLI](cli). It's handy for most routine indexing building and management tasks. + It will load settings from environment variables, either already set in your environment, or specified in `.env` file. + See [CLI](cli#environment-variables) for more details. + +* Call CocoIndex functionality from your own Python application or library. + It's needed when you want to leverage CocoIndex support for query, or have your custom logic to trigger indexing, etc. + + + + + You need to explicitly call `cocoindex.init()` before doing anything with CocoIndex, and settings will be loaded during the call. + + * If it's called without any argument, it will load settings from environment variables. + Only existing environment variables already set in your environment will be used. + If you want to load environment variables from a specific `.env` file, consider call `load_dotenv()` provided by the [`python-dotenv`](https://github.com/theskumar/python-dotenv) package. + + ```py + from dotenv import load_dotenv + import cocoindex + + load_dotenv() + cocoindex.init() + ``` + + * It takes an optional `cocoindex.Settings` dataclass object as argument, so you can also construct settings explicitly and pass to it: + + ```py + import cocoindex + + cocoindex.init( + cocoindex.Settings( + database=cocoindex.DatabaseConnectionSpec( + url="postgres://cocoindex:cocoindex@localhost/cocoindex" + ) + ) + ) + ``` + + + +## List of Settings + +`cocoindex.Settings` is a dataclass that contains the following fields: + +* `app_namespace` (type: `str`, required): The namespace of the application. +* `database` (type: `DatabaseConnectionSpec`, required): The connection to the Postgres database. + +### App Namespace + +The `app_namespace` field helps organize flows across different environments (e.g., dev, staging, production), team members, etc. When set, it prefixes flow names with the namespace. + +For example, if the namespace is `Staging`, for a flow with name specified as `Flow1` in code, the full name of the flow will be `Staging.Flow1`. +You can also get the current app namespace by calling `cocoindex.get_app_namespace()` (see [Getting App Namespace](flow_def#getting-app-namespace) for more details). + +If not set, all flows are in a default unnamed namespace. + +*Environment variable*: `COCOINDEX_APP_NAMESPACE` + +### DatabaseConnectionSpec + +`DatabaseConnectionSpec` configures the connection to a database. Only Postgres is supported for now. It has the following fields: + +* `url` (type: `str`, required): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`. + + *Environment variable* for `Settings.database.url`: `COCOINDEX_DATABASE_URL` + +* `user` (type: `str`, optional): The username for the Postgres database. If not provided, username will come from `url`. + + *Environment variable* for `Settings.database.user`: `COCOINDEX_DATABASE_USER` + +* `password` (type: `str`, optional): The password for the Postgres database. If not provided, password will come from `url`. + + *Environment variable* for `Settings.database.password`: `COCOINDEX_DATABASE_PASSWORD` + +:::tip + +Please be careful that all values in `url` needs to be url-encoded if they contain special characters. +For this reason, prefer to use the separated `user` and `password` fields for username and password. + +::: + +:::info + +If you use the Postgres database hosted by [Supabase](https://supabase.com/), please click **Connect** on your project dashboard and find the following URL: + +* If you're on a IPv6 network, use the URL under **Direct connection**. You can visit [IPv6 test](https://test-ipv6.com/) to see if you have IPv6 Internet connection. +* Otherwise, use the URL under **Session pooler**. + +::: + +## List of Environment Variables + +This is the list of environment variables, each of which has a corresponding field in `Settings`: + +| environment variable | corresponding field in `Settings` | required? | +|---------------------|-------------------|----------| +| `COCOINDEX_DATABASE_URL` | `database.url` | Yes | +| `COCOINDEX_DATABASE_USER` | `database.user` | No | +| `COCOINDEX_DATABASE_PASSWORD` | `database.password` | No | +| `COCOINDEX_APP_NAMESPACE` | `app_namespace` | No | \ No newline at end of file diff --git a/docs/docs/ops/storages.md b/docs/docs/ops/storages.md index 89a7196e..8c30c0be 100644 --- a/docs/docs/ops/storages.md +++ b/docs/docs/ops/storages.md @@ -37,7 +37,7 @@ It should be a unique table, meaning that no other export target should export t The spec takes the following fields: * `database` (type: [auth reference](../core/flow_def#auth-registry) to `DatabaseConnectionSpec`, optional): The connection to the Postgres database. - See [DatabaseConnectionSpec](../core/initialization#databaseconnectionspec) for its specific fields. + See [DatabaseConnectionSpec](../core/settings#databaseconnectionspec) for its specific fields. If not provided, will use the same database as the [internal storage](/docs/core/basics#internal-storage). * `table_name` (type: `str`, optional): The name of the table to store to. If unspecified, will use the table name `[${AppNamespace}__]${FlowName}__${TargetName}`, e.g. `DemoFlow__doc_embeddings` or `Staging__DemoFlow__doc_embeddings`. @@ -419,7 +419,7 @@ The `Neo4j` storage exports each row as a relationship to Neo4j Knowledge Graph. Neo4j also provides a declaration spec `Neo4jDeclaration`, to configure indexing options for nodes only referenced by relationships. It has the following fields: * `connection` (type: auth reference to `Neo4jConnectionSpec`) -* Fields for [nodes to declare](#nodes-to-declare), including +* Fields for [nodes to declare](#declare-extra-node-labels), including * `nodes_label` (required) * `primary_key_fields` (required) * `vector_indexes` (optional) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 290c467a..26f3c30e 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -49,6 +49,17 @@ const config: Config = { ], }), }), + [ + '@docusaurus/plugin-client-redirects', + { + redirects: [ + { + from: '/core/initialization', + to: '/core/settings', + }, + ], + }, + ], ], presets: [ diff --git a/docs/package.json b/docs/package.json index be599801..222da2ee 100644 --- a/docs/package.json +++ b/docs/package.json @@ -16,6 +16,7 @@ }, "dependencies": { "@docusaurus/core": "3.7.0", + "@docusaurus/plugin-client-redirects": "^3.7.0", "@docusaurus/preset-classic": "3.7.0", "@docusaurus/theme-mermaid": "^3.7.0", "@mdx-js/react": "^3.0.0", diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 497c2c43..1c0c6d26 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -19,8 +19,8 @@ const sidebars: SidebarsConfig = { items: [ 'core/basics', 'core/data_types', - 'core/initialization', 'core/flow_def', + 'core/settings', 'core/flow_methods', 'core/cli', 'core/custom_function', diff --git a/docs/yarn.lock b/docs/yarn.lock index 54384cb9..6bde6195 100644 --- a/docs/yarn.lock +++ b/docs/yarn.lock @@ -1604,6 +1604,21 @@ react-helmet-async "npm:@slorber/react-helmet-async@*" react-loadable "npm:@docusaurus/react-loadable@6.0.0" +"@docusaurus/plugin-client-redirects@^3.7.0": + version "3.7.0" + resolved "https://registry.yarnpkg.com/@docusaurus/plugin-client-redirects/-/plugin-client-redirects-3.7.0.tgz#b5cf92529768c457c01ad350bfc50862c6149463" + integrity sha512-6B4XAtE5ZVKOyhPgpgMkb7LwCkN+Hgd4vOnlbwR8nCdTQhLjz8MHbGlwwvZ/cay2SPNRX5KssqKAlcHVZP2m8g== + dependencies: + "@docusaurus/core" "3.7.0" + "@docusaurus/logger" "3.7.0" + "@docusaurus/utils" "3.7.0" + "@docusaurus/utils-common" "3.7.0" + "@docusaurus/utils-validation" "3.7.0" + eta "^2.2.0" + fs-extra "^11.1.1" + lodash "^4.17.21" + tslib "^2.6.0" + "@docusaurus/plugin-content-blog@3.7.0": version "3.7.0" resolved "https://registry.yarnpkg.com/@docusaurus/plugin-content-blog/-/plugin-content-blog-3.7.0.tgz#7bd69de87a1f3adb652e1473ef5b7ccc9468f47e" From 1812488771081d610a01aedbbdcbe1dcbf0b7bec Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 16:16:42 -0700 Subject: [PATCH 7/8] chore: bump examples dep versions for cocoindex --- examples/amazon_s3_embedding/pyproject.toml | 2 +- examples/code_embedding/pyproject.toml | 2 +- examples/docs_to_knowledge_graph/pyproject.toml | 2 +- examples/fastapi_server_docker/requirements.txt | 2 +- examples/gdrive_text_embedding/pyproject.toml | 2 +- examples/image_search_example/requirements.txt | 2 +- examples/manuals_llm_extraction/pyproject.toml | 2 +- examples/pdf_embedding/pyproject.toml | 2 +- examples/product_recommendation/pyproject.toml | 2 +- examples/text_embedding/pyproject.toml | 2 +- examples/text_embedding_qdrant/pyproject.toml | 6 +++++- 11 files changed, 15 insertions(+), 11 deletions(-) diff --git a/examples/amazon_s3_embedding/pyproject.toml b/examples/amazon_s3_embedding/pyproject.toml index 340ddf03..e9562439 100644 --- a/examples/amazon_s3_embedding/pyproject.toml +++ b/examples/amazon_s3_embedding/pyproject.toml @@ -3,7 +3,7 @@ name = "amazon-s3-text-embedding" version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on Amazon S3 files." requires-python = ">=3.11" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"] +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1"] [tool.setuptools] packages = [] diff --git a/examples/code_embedding/pyproject.toml b/examples/code_embedding/pyproject.toml index 44d9fe7b..040b5863 100644 --- a/examples/code_embedding/pyproject.toml +++ b/examples/code_embedding/pyproject.toml @@ -3,7 +3,7 @@ name = "code-embedding" version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on source code." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"] +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1"] [tool.setuptools] packages = [] diff --git a/examples/docs_to_knowledge_graph/pyproject.toml b/examples/docs_to_knowledge_graph/pyproject.toml index 540d01b8..91c5f0d3 100644 --- a/examples/docs_to_knowledge_graph/pyproject.toml +++ b/examples/docs_to_knowledge_graph/pyproject.toml @@ -3,7 +3,7 @@ name = "manuals-to-kg" version = "0.1.0" description = "Simple example for cocoindex: extract triples from files and build knowledge graph." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"] +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1"] [tool.setuptools] packages = [] diff --git a/examples/fastapi_server_docker/requirements.txt b/examples/fastapi_server_docker/requirements.txt index 0658a483..18d7c270 100644 --- a/examples/fastapi_server_docker/requirements.txt +++ b/examples/fastapi_server_docker/requirements.txt @@ -1,4 +1,4 @@ -cocoindex>=0.1.35 +cocoindex>=0.1.42 python-dotenv>=1.0.1 fastapi==0.115.12 fastapi-cli==0.0.7 diff --git a/examples/gdrive_text_embedding/pyproject.toml b/examples/gdrive_text_embedding/pyproject.toml index 3b930b3f..5b702ff7 100644 --- a/examples/gdrive_text_embedding/pyproject.toml +++ b/examples/gdrive_text_embedding/pyproject.toml @@ -3,7 +3,7 @@ name = "gdrive-text-embedding" version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on Google Drive files." requires-python = ">=3.11" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1"] +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1"] [tool.setuptools] packages = [] diff --git a/examples/image_search_example/requirements.txt b/examples/image_search_example/requirements.txt index 73d13b2b..d701794a 100644 --- a/examples/image_search_example/requirements.txt +++ b/examples/image_search_example/requirements.txt @@ -1,4 +1,4 @@ -cocoindex>=0.1.35 +cocoindex>=0.1.42 python-dotenv>=1.0.1 requests>=2.31.0 uvicorn>=0.29.0 diff --git a/examples/manuals_llm_extraction/pyproject.toml b/examples/manuals_llm_extraction/pyproject.toml index 91316dc8..b65f8d85 100644 --- a/examples/manuals_llm_extraction/pyproject.toml +++ b/examples/manuals_llm_extraction/pyproject.toml @@ -4,7 +4,7 @@ version = "0.1.0" description = "Simple example for cocoindex: extract structured information from a Markdown file using LLM." requires-python = ">=3.10" dependencies = [ - "cocoindex>=0.1.39", + "cocoindex>=0.1.42", "python-dotenv>=1.0.1", "marker-pdf>=1.5.2", ] diff --git a/examples/pdf_embedding/pyproject.toml b/examples/pdf_embedding/pyproject.toml index 3f85bfe7..5250509d 100644 --- a/examples/pdf_embedding/pyproject.toml +++ b/examples/pdf_embedding/pyproject.toml @@ -4,7 +4,7 @@ version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on local PDF files." requires-python = ">=3.10" dependencies = [ - "cocoindex>=0.1.39", + "cocoindex>=0.1.42", "python-dotenv>=1.0.1", "marker-pdf>=1.5.2", ] diff --git a/examples/product_recommendation/pyproject.toml b/examples/product_recommendation/pyproject.toml index 2a0cdcc1..66e1b59d 100644 --- a/examples/product_recommendation/pyproject.toml +++ b/examples/product_recommendation/pyproject.toml @@ -3,7 +3,7 @@ name = "cocoindex-ecommerce-taxonomy" version = "0.1.0" description = "Simple example for CocoIndex: extract taxonomy from e-commerce products and build knowledge graph." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1", "jinja2>=3.1.6"] +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1", "jinja2>=3.1.6"] [tool.setuptools] packages = [] diff --git a/examples/text_embedding/pyproject.toml b/examples/text_embedding/pyproject.toml index 08129111..5eb6648d 100644 --- a/examples/text_embedding/pyproject.toml +++ b/examples/text_embedding/pyproject.toml @@ -4,7 +4,7 @@ version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on local text files." requires-python = ">=3.10" dependencies = [ - "cocoindex>=0.1.40", + "cocoindex>=0.1.42", "python-dotenv>=1.0.1", "psycopg[binary,pool]", ] diff --git a/examples/text_embedding_qdrant/pyproject.toml b/examples/text_embedding_qdrant/pyproject.toml index 70454200..49059dd6 100644 --- a/examples/text_embedding_qdrant/pyproject.toml +++ b/examples/text_embedding_qdrant/pyproject.toml @@ -3,7 +3,11 @@ name = "text-embedding-qdrant" version = "0.1.0" description = "Simple example for cocoindex: build embedding index based on local text files." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.39", "python-dotenv>=1.0.1", "qdrant-client>=1.6.0"] +dependencies = [ + "cocoindex>=0.1.42", + "python-dotenv>=1.0.1", + "qdrant-client>=1.6.0", +] [tool.setuptools] packages = [] From 144c3a1a2669325070868aae6ddbae3cdda39531 Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 20 May 2025 16:29:46 -0700 Subject: [PATCH 8/8] chore: further cleanups for examples --- examples/code_embedding/main.py | 2 +- examples/docs_to_knowledge_graph/pyproject.toml | 2 +- examples/manuals_llm_extraction/main.py | 8 +------- examples/manuals_llm_extraction/pyproject.toml | 6 +----- examples/product_recommendation/main.py | 8 +------- examples/product_recommendation/pyproject.toml | 2 +- 6 files changed, 6 insertions(+), 22 deletions(-) diff --git a/examples/code_embedding/main.py b/examples/code_embedding/main.py index b7bd68f7..89333176 100644 --- a/examples/code_embedding/main.py +++ b/examples/code_embedding/main.py @@ -25,7 +25,7 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind data_scope["files"] = flow_builder.add_source( cocoindex.sources.LocalFile(path="../..", included_patterns=["*.py", "*.rs", "*.toml", "*.md", "*.mdx"], - excluded_patterns=[".*", "target", "**/node_modules"])) + excluded_patterns=["**/.*", "target", "**/node_modules"])) code_embeddings = data_scope.add_collector() with data_scope["files"].row() as file: diff --git a/examples/docs_to_knowledge_graph/pyproject.toml b/examples/docs_to_knowledge_graph/pyproject.toml index 91c5f0d3..a9a06cb2 100644 --- a/examples/docs_to_knowledge_graph/pyproject.toml +++ b/examples/docs_to_knowledge_graph/pyproject.toml @@ -3,7 +3,7 @@ name = "manuals-to-kg" version = "0.1.0" description = "Simple example for cocoindex: extract triples from files and build knowledge graph." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1"] +dependencies = ["cocoindex>=0.1.42"] [tool.setuptools] packages = [] diff --git a/examples/manuals_llm_extraction/main.py b/examples/manuals_llm_extraction/main.py index 0337dd54..31d57f05 100644 --- a/examples/manuals_llm_extraction/main.py +++ b/examples/manuals_llm_extraction/main.py @@ -114,10 +114,4 @@ def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: coco "modules", cocoindex.storages.Postgres(table_name="modules_info"), primary_key_fields=["filename"], - ) - -def _run(): - pass - -if __name__ == "__main__": - _run() + ) \ No newline at end of file diff --git a/examples/manuals_llm_extraction/pyproject.toml b/examples/manuals_llm_extraction/pyproject.toml index b65f8d85..0c2c4c39 100644 --- a/examples/manuals_llm_extraction/pyproject.toml +++ b/examples/manuals_llm_extraction/pyproject.toml @@ -3,11 +3,7 @@ name = "manuals-llm-extraction" version = "0.1.0" description = "Simple example for cocoindex: extract structured information from a Markdown file using LLM." requires-python = ">=3.10" -dependencies = [ - "cocoindex>=0.1.42", - "python-dotenv>=1.0.1", - "marker-pdf>=1.5.2", -] +dependencies = ["cocoindex>=0.1.42", "marker-pdf>=1.5.2"] [tool.setuptools] packages = [] diff --git a/examples/product_recommendation/main.py b/examples/product_recommendation/main.py index df678637..63af3bb7 100644 --- a/examples/product_recommendation/main.py +++ b/examples/product_recommendation/main.py @@ -173,10 +173,4 @@ def store_product_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde ), ), primary_key_fields=["id"], - ) - -def _run(): - pass - -if __name__ == "__main__": - _run() + ) \ No newline at end of file diff --git a/examples/product_recommendation/pyproject.toml b/examples/product_recommendation/pyproject.toml index 66e1b59d..0ed0ab9e 100644 --- a/examples/product_recommendation/pyproject.toml +++ b/examples/product_recommendation/pyproject.toml @@ -3,7 +3,7 @@ name = "cocoindex-ecommerce-taxonomy" version = "0.1.0" description = "Simple example for CocoIndex: extract taxonomy from e-commerce products and build knowledge graph." requires-python = ">=3.10" -dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1", "jinja2>=3.1.6"] +dependencies = ["cocoindex>=0.1.42", "jinja2>=3.1.6"] [tool.setuptools] packages = []