diff --git a/docs/source/conf.py b/docs/source/conf.py index 1959fc36..fad0ca01 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,9 +16,12 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # +import datetime import os import sys +import pandas_gbq + # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ @@ -62,7 +65,9 @@ # General information about the project. project = u"pandas-gbq" -copyright = u"2017, PyData Development Team" +copyright = u"2017-{}, PyData Development Team".format( + datetime.datetime.now().year +) author = u"PyData Development Team" # The version info for the project you're documenting, acts as replacement for @@ -70,9 +75,9 @@ # built documents. # # The short X.Y version. -version = u"0.1.0" +version = pandas_gbq.__version__ # The full version, including alpha/beta/rc tags. -release = u"0.1.0" +release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/index.rst b/docs/source/index.rst index 8e895145..cbbdabf7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,16 +8,24 @@ Welcome to pandas-gbq's documentation! The :mod:`pandas_gbq` module provides a wrapper for Google's BigQuery analytics web service to simplify retrieving results from BigQuery tables -using SQL-like queries. Result sets are parsed into a pandas -DataFrame with a shape and data types derived from the source table. -Additionally, DataFrames can be inserted into new BigQuery tables or appended -to existing tables. +using SQL-like queries. Result sets are parsed into a :class:`pandas.DataFrame` +with a shape and data types derived from the source table. Additionally, +DataFrames can be inserted into new BigQuery tables or appended to existing +tables. .. warning:: - To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ - for details on the service itself. + To use this module, you will need a valid BigQuery account. Use the + `BigQuery sandbox `__ to + try the service for free. + +While BigQuery uses standard SQL syntax, it has some important differences +from traditional databases both in functionality, API limitations (size and +quantity of queries or uploads), and how Google charges for use of the +service. BiqQuery is best for analyzing large sets of data quickly. It is not +a direct replacement for a transactional database. Refer to the `BigQuery +Documentation `__ for +details on the service itself. Contents: @@ -29,7 +37,6 @@ Contents: howto/authentication.rst reading.rst writing.rst - tables.rst api.rst contributing.rst changelog.rst diff --git a/docs/source/tables.rst b/docs/source/tables.rst deleted file mode 100644 index dcf891ee..00000000 --- a/docs/source/tables.rst +++ /dev/null @@ -1,16 +0,0 @@ -.. _create_tables: - -Creating Tables -=============== - -.. code-block:: ipython - - In [10]: gbq.generate_bq_schema(df, default_type='STRING') - - Out[10]: {'fields': [{'name': 'my_bool1', 'type': 'BOOLEAN'}, - {'name': 'my_bool2', 'type': 'BOOLEAN'}, - {'name': 'my_dates', 'type': 'TIMESTAMP'}, - {'name': 'my_float64', 'type': 'FLOAT'}, - {'name': 'my_int64', 'type': 'INTEGER'}, - {'name': 'my_string', 'type': 'STRING'}]} - diff --git a/docs/source/writing.rst b/docs/source/writing.rst index e649d7fd..0c5b41e0 100644 --- a/docs/source/writing.rst +++ b/docs/source/writing.rst @@ -3,8 +3,8 @@ Writing DataFrames ================== -Assume we want to write a DataFrame ``df`` into a BigQuery table using -:func:`~pandas_gbq.to_gbq`. +Assume we want to write a :class:`~pandas.DataFrame` named ``df`` into a +BigQuery table using :func:`~pandas_gbq.to_gbq`. .. ipython:: python @@ -21,40 +21,62 @@ Assume we want to write a DataFrame ``df`` into a BigQuery table using .. code-block:: python - to_gbq(df, 'my_dataset.my_table', projectid) + import pandas_gbq + pandas_gbq.to_gbq(df, 'my_dataset.my_table', project_id=projectid) -.. note:: +The destination table and destination dataset will automatically be created +if they do not already exist. - The destination table and destination dataset will automatically be created if they do not already exist. -The ``if_exists`` argument can be used to dictate whether to ``'fail'``, ``'replace'`` -or ``'append'`` if the destination table already exists. The default value is ``'fail'``. +Writing to an Existing Table +---------------------------- + +Use the ``if_exists`` argument to dictate whether to ``'fail'``, +``'replace'`` or ``'append'`` if the destination table already exists. The +default value is ``'fail'``. For example, assume that ``if_exists`` is set to ``'fail'``. The following snippet will raise a ``TableCreationError`` if the destination table already exists. .. code-block:: python - to_gbq(df, 'my_dataset.my_table', projectid, if_exists='fail') + import pandas_gbq + pandas_gbq.to_gbq( + df, 'my_dataset.my_table', project_id=projectid, if_exists='fail', + ) + +If the ``if_exists`` argument is set to ``'append'``, the destination +dataframe will be written to the table using the defined table schema and +column types. The dataframe must contain fields (matching name and type) +currently in the destination table. + + +.. _writing-schema: + +Inferring the Table Schema +-------------------------- -.. note:: +The :func:`~pandas_gbq.to_gbq` method infers the BigQuery table schema based +on the dtypes of the uploaded :class:`~pandas.DataFrame`. - If the ``if_exists`` argument is set to ``'append'``, the destination - dataframe will be written to the table using the defined table schema and - column types. The dataframe must contain fields (matching name and type) - currently in the destination table. +========================= ================== +dtype BigQuery Data Type +========================= ================== +i (integer) INTEGER +b (boolean) BOOLEAN +f (float) FLOAT +O (object) STRING +S (zero-terminated bytes) STRING +U (Unicode string) STRING +M (datetime) TIMESTAMP +========================= ================== -.. note:: +If the data type inference does not suit your needs, supply a BigQuery schema +as the ``table_schema`` parameter of :func:`~pandas_gbq.to_gbq`. - If an error occurs while streaming data to BigQuery, see - `Troubleshooting BigQuery Errors `__. -.. note:: +Troubleshooting Errors +---------------------- - While BigQuery uses SQL-like syntax, it has some important differences - from traditional databases both in functionality, API limitations (size - and quantity of queries or uploads), and how Google charges for use of the - service. You should refer to `Google BigQuery documentation - `__ often as the service is always - evolving. BiqQuery is best for analyzing large sets of data quickly, but - it is not a direct replacement for a transactional database. +If an error occurs while writing data to BigQuery, see +`Troubleshooting BigQuery Errors `__. diff --git a/noxfile.py b/noxfile.py index 819742c3..c4c8a3c0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,6 +5,7 @@ import os import os.path +import shutil import nox @@ -51,6 +52,28 @@ def cover(session, python=latest_python): session.run("coverage", "erase") +@nox.session(python=latest_python) +def docs(session): + """Build the docs.""" + + session.install("-r", os.path.join("docs", "requirements-docs.txt")) + session.install("-e", ".") + + shutil.rmtree(os.path.join("docs", "source", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-W", # warnings as errors + "-T", # show full traceback on exception + "-N", # no colors + "-b", + "html", + "-d", + os.path.join("docs", "source", "_build", "doctrees", ""), + os.path.join("docs", "source", ""), + os.path.join("docs", "source", "_build", "html", ""), + ) + + @nox.session(python=supported_pythons) def system(session): session.install("pytest", "pytest-cov") diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 2fa31e4f..3967b284 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -937,14 +937,18 @@ def to_gbq( List of BigQuery table fields to which according DataFrame columns conform to, e.g. ``[{'name': 'col1', 'type': 'STRING'},...]``. - If schema is not provided, it will be - generated according to dtypes of DataFrame columns. - If schema is provided, it may contain all or a subset of DataFrame - columns. If a subset is provided, the rest will be inferred from - the DataFrame dtypes. - pandas_gbq.gbq._generate_bq_schema() may be used to create an - initial schema, though it doesn't preserve column order. - See BigQuery API documentation on available names of a field. + + - If ``table_schema`` is provided, it may contain all or a subset of + DataFrame columns. If a subset is provided, the rest will be + inferred from the DataFrame dtypes. + - If ``table_schema`` is **not** provided, it will be + generated according to dtypes of DataFrame columns. See + `Inferring the Table Schema + `__. + for a description of the schema inference. + + See `BigQuery API documentation on valid column names + __. .. versionadded:: 0.3.1 location : str, optional @@ -985,6 +989,7 @@ def to_gbq( """ _test_google_api_imports() + from pandas_gbq import schema if verbose is not None and SHOW_VERBOSE_DEPRECATION: warnings.warn( @@ -1029,7 +1034,7 @@ def to_gbq( if not table_schema: table_schema = default_schema else: - table_schema = _update_bq_schema( + table_schema = schema.update_schema( default_schema, dict(fields=table_schema) ) @@ -1091,15 +1096,16 @@ def generate_bq_schema(df, default_type="STRING"): def _generate_bq_schema(df, default_type="STRING"): - from pandas_gbq import schema + """DEPRECATED: Given a dataframe, generate a Google BigQuery schema. - return schema.generate_bq_schema(df, default_type=default_type) - - -def _update_bq_schema(schema_old, schema_new): + This is a private method, but was used in external code to work around + issues in the default schema generation. Now that individual columns can + be overridden: https://github.com/pydata/pandas-gbq/issues/218, this + method can be removed after there is time to migrate away from this + method. """ from pandas_gbq import schema - return schema.update_schema(schema_old, schema_new) + return schema.generate_bq_schema(df, default_type=default_type) class _Table(GbqConnector): diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index c59ed68e..91963b7c 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -11,6 +11,8 @@ def generate_bq_schema(dataframe, default_type="STRING"): does not exist in the schema. """ + # If you update this mapping, also update the table at + # `docs/source/writing.rst`. type_mapping = { "i": "INTEGER", "b": "BOOLEAN",