471 Update CSV datasets tutorial (#472)

Nic-Ma · web-flow · commit ec78d2f42c57 · 2021-12-07T09:51:07.000Z
* [DLMED] update csv tutorial

Signed-off-by: Nic Ma &lt;nma@nvidia.com&gt;

* [DLMED] update tcia

Signed-off-by: Nic Ma &lt;nma@nvidia.com&gt;
diff --git a/modules/csv_datasets.ipynb b/modules/csv_datasets.ipynb
@@ -263,7 +263,7 @@
     }
    ],
    "source": [
-    "dataset = CSVDataset(filename=filepath1)\n",
+    "dataset = CSVDataset(src=filepath1)\n",
     "# construct pandas table to show the data, `CSVDataset` inherits from PyTorch Dataset\n",
     "print(pd.DataFrame(dataset.data))"
    ]
@@ -308,7 +308,7 @@
     }
    ],
    "source": [
-    "dataset = CSVDataset([filepath1, filepath2, filepath3], on=\"subject_id\")\n",
+    "dataset = CSVDataset(src=[filepath1, filepath2, filepath3], on=\"subject_id\")\n",
     "# construct pandas table to show the joined data of 3 tables\n",
     "print(pd.DataFrame(dataset.data))"
    ]
@@ -339,7 +339,7 @@
    ],
    "source": [
     "dataset = CSVDataset(\n",
-    "    filename=[filepath1, filepath2, filepath3],\n",
+    "    src=[filepath1, filepath2, filepath3],\n",
     "    row_indices=[[0, 2], 3],  # load row: 0, 1, 3\n",
     "    col_names=[\"subject_id\", \"label\", \"ehr_1\", \"ehr_7\", \"meta_1\"],\n",
     ")\n",
@@ -396,7 +396,7 @@
    ],
    "source": [
     "dataset = CSVDataset(\n",
-    "    filename=[filepath1, filepath2, filepath3],\n",
+    "    src=[filepath1, filepath2, filepath3],\n",
     "    col_names=[\"subject_id\", \"image\", *[f\"ehr_{i}\" for i in range(11)], \"meta_0\", \"meta_1\", \"meta_2\"],\n",
     "    col_groups={\"ehr\": [f\"ehr_{i}\" for i in range(11)], \"meta\": [\"meta_0\", \"meta_1\", \"meta_2\"]},\n",
     ")\n",
@@ -433,7 +433,7 @@
    ],
    "source": [
     "dataset = CSVDataset(\n",
-    "    filename=[filepath1, filepath2, filepath3],\n",
+    "    src=[filepath1, filepath2, filepath3],\n",
     "    col_names=[\"subject_id\", \"label\", \"ehr_0\", \"ehr_1\", \"ehr_9\", \"meta_1\"],\n",
     "    col_types={\"label\": {\"default\": \"No label\"}, \"ehr_1\": {\"type\": int, \"default\": 0}},\n",
     "    how=\"outer\",  # will load the NaN values in this merge mode\n",
@@ -481,7 +481,7 @@
    ],
    "source": [
     "dataset = CSVDataset(\n",
-    "    filename=[filepath1, filepath2, filepath3],\n",
+    "    src=[filepath1, filepath2, filepath3],\n",
     "    col_groups={\"ehr\": [f\"ehr_{i}\" for i in range(5)]},\n",
     "    transform=Compose([LoadImaged(keys=\"image\"), ToNumpyd(keys=\"ehr\")]),\n",
     ")\n",
@@ -527,7 +527,7 @@
     }
    ],
    "source": [
-    "dataset = CSVIterableDataset(filename=[filepath1, filepath2, filepath3], shuffle=False)\n",
+    "dataset = CSVIterableDataset(src=[filepath1, filepath2, filepath3], shuffle=False)\n",
     "# set num workers = 0 for mac / win\n",
     "num_workers = 2 if sys.platform == \"linux\" else 0\n",
     "dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=2)\n",
@@ -546,7 +546,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -563,7 +563,7 @@
     "dataset = CSVIterableDataset(\n",
     "    chunksize=2,\n",
     "    buffer_size=4,\n",
-    "    filename=[filepath1, filepath2, filepath3],\n",
+    "    src=[filepath1, filepath2, filepath3],\n",
     "    col_names=[\"subject_id\", \"label\", \"ehr_1\", \"ehr_7\", \"meta_1\"],\n",
     "    transform=ToNumpyd(keys=\"ehr_1\"),\n",
     "    shuffle=True,\n",
diff --git a/modules/tcia_csv_processing.ipynb b/modules/tcia_csv_processing.ipynb
@@ -175,7 +175,7 @@
     "                download_url(url=u, filepath=f)\n",
     "\n",
     "        super().__init__(\n",
-    "            filename=filename,\n",
+    "            src=filename,\n",
     "            row_indices=row_indices,\n",
     "            col_names=col_names,\n",
     "            col_types=col_types,\n",