Skip to content

Bug when using multi-row inserts with SQLite database #29921

Closed
@jconstanzo

Description

@jconstanzo

Code Sample

import logging, sqlite3
import pandas as pd

from datetime import date, timedelta
from random import normalvariate, randint

#%% Storing function.
def store_df_data():
    """
    Create a new SQLite database, generate random data (as DataFrame) and 
    try to save it in the database.
    """
    # Defining logger.
    logging.basicConfig(format="%(levelname)-5s - %(message)s", level=logging.INFO)
    logger = logging.getLogger("sqlite_test")

    # Create SQLite database.
    conn = sqlite3.connect("financial_dataframe.db")
    cursor = conn.cursor()

    # Create table (only if it doesn't exist).
    sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='financial_data'"
    cursor.execute(sql)

    if cursor.fetchone() is None:
        sql = "CREATE TABLE financial_data (id INTEGER PRIMARY KEY, "
        sql += "date TEXT NOT NULL, cp REAL NOT NULL, volume REAL NOT NULL)"
        cursor.execute(sql)

    # Generate data and adjust dataframe.
    df = simulate_goog_data(30, date(2019, 10, 1), 1205.1)

    # Insert data one row per time.
    logger.info("====================================="
              + "=====================================")
    logger.info("Inserting data one per row.")

    try:
        df.to_sql("financial_data", con=conn, if_exists="append", index=False)
        logger.info("Data inserted successfully!")
    except:
        logger.exception("Error while inserting data one per row:")

    # Generate data again.
    df = simulate_goog_data(30, date(2019, 11, 1), df.iloc[-1]["cp"])

    # Insert data with multirow method.
    logger.info("====================================="
              + "=====================================")
    logger.info("Inserting data with multirow method.")

    try:
        df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
                  method="multi")
        logger.info("Data inserted successfully!")
    except:
        logger.exception("Error while inserting data with multirow method:")

    # Generate data again.
    df = simulate_goog_data(30, date(2019, 12, 1), df.iloc[-1]["cp"])

    # Insert data with multirow method and chunksize.
    logger.info("====================================="
              + "=====================================")
    logger.info("Inserting data with multirow method and chunksize.")

    try:
        df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
                  method="multi", chunksize=10)
        logger.info("Data inserted successfully!")
    except:
        logger.exception("Error while inserting data with multirow and chunksize:")

    # Close pointers.
    cursor.close()
    conn.close()


#%% Simulating function.
def simulate_goog_data(n_data, initial_date, initial_close):
    """
    Simulate data from GOOG.

    :Parameters:
        n_data : Max. range of days to generate data.
        initial_date : First date for the simulation.
        initial_close : First close for the simulation.
    """
    # Mean and standard deviation from GOOG return (october 2019 daily data).
    mean_goog = 0.002240703
    std_dev_goog = 0.011957324

    # Minimum and maximum value for GOOG's volume.
    min_vol_goog = 840000
    max_vol_goog = 2615000

    # Initializing lists.
    date_list = list()
    close_list = list()
    volume_list = list()

    # First close and date.
    current_date = initial_date
    current_close = initial_close

    # Generate data (only for working days).
    for _ in range(n_data):
        if current_date.isoweekday() < 6:
            date_list.append(current_date)
            close_list.append(current_close)
            volume_list.append(randint(min_vol_goog, max_vol_goog))

            # Update goog's close.
            current_close *= (1 + normalvariate(mean_goog, std_dev_goog))

        # Move forward one day.
        current_date += timedelta(days=1)

    # Return dataframe.
    return pd.DataFrame({"date": date_list, "cp": close_list, "volume": volume_list})


#%% Executor.
if __name__ == "__main__":
    store_df_data()

Problem description

The piece of code described above tries to insert simulated financial data in a SQLite database, using method to_sql from pandas.DataFrame. It tries to make these insertions in three different ways:

  1. Inserting all data one row at a time. The insertion performs successfully.
  2. Inserting all data using attribute method="multi" to performs a multi-row insert. It crushes.
  3. Inserting all data using attribute method="multi" and setting a chunksize. It also crushes.

It seems that it's impossible insert data in a SQLite database using multi-row option, since it throws the error:

TypeError: insert() takes exactly 2 arguments (1 given)

Could you please check this?

Thank you very much in advance!

Output

INFO  - ==========================================================================
INFO  - Inserting data one per row.
INFO  - Data inserted successfully!
INFO  - ==========================================================================
INFO  - Inserting data with multirow method.
ERROR - Error while inserting data with multirow method:
Traceback (most recent call last):
  File "bug_multirow_sqlite.py", line 52, in store_df_data
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi")
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
    dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
    chunksize=chunksize, dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
    table.insert(chunksize, method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
    exec_insert(conn, keys, chunk_iter)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
    conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)
INFO  - ==========================================================================
INFO  - Inserting data with multirow method and chunksize.
ERROR - Error while inserting data with multirow method and chunksize:
Traceback (most recent call last):
  File "bug_multirow_sqlite.py", line 64, in store_df_data
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi", chunksize=10)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
    dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
    chunksize=chunksize, dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
    table.insert(chunksize, method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
    exec_insert(conn, keys, chunk_iter)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
    conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)
Code was run in:
  • Windows 10 - Python 3.7.3 - pandas 0.24.2
  • Red Hat 7.6 - Python 3.6.8 - pandas 0.25.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIO SQLto_sql, read_sql, read_sql_query

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions