From 759d48d3b070764ab235f75e1acb5960f4d9a9ee Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Fri, 4 Nov 2016 11:36:18 -0400 Subject: [PATCH 1/2] Create compressed salary testing data. Create compressed versions of the salary dataset for testing #14576. Rename `salary.table.csv` to `salaries.tsv` because the dataset is tab rather than comma delimited. Remove the word table because it's implied by the extension. Rename `salary.table.gz` to `salaries.tsv.gz`, since compressed files should append to not strip the original extension. Created new files by running the following commands: ```sh cd pandas/io/tests/parser/data bzip2 --keep salaries.tsv xz --keep salaries.tsv zip salaries.tsv.zip salaries.tsv ``` --- pandas/io/tests/parser/common.py | 6 +++--- .../data/{salary.table.csv => salaries.tsv} | 0 pandas/io/tests/parser/data/salaries.tsv.bz2 | Bin 0 -> 283 bytes .../data/{salary.table.gz => salaries.tsv.gz} | Bin pandas/io/tests/parser/data/salaries.tsv.xz | Bin 0 -> 336 bytes pandas/io/tests/parser/data/salaries.tsv.zip | Bin 0 -> 445 bytes pandas/io/tests/parser/test_network.py | 6 +++--- 7 files changed, 6 insertions(+), 6 deletions(-) rename pandas/io/tests/parser/data/{salary.table.csv => salaries.tsv} (100%) create mode 100644 pandas/io/tests/parser/data/salaries.tsv.bz2 rename pandas/io/tests/parser/data/{salary.table.gz => salaries.tsv.gz} (100%) create mode 100644 pandas/io/tests/parser/data/salaries.tsv.xz create mode 100644 pandas/io/tests/parser/data/salaries.tsv.zip diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 3be02c55ea10a..8ac24a22c96ac 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -630,10 +630,10 @@ def test_read_csv_parse_simple_list(self): def test_url(self): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/parser/data/salary.table.csv') + 'pandas/io/tests/parser/data/salaries.tsv') url_table = self.read_table(url) dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salary.table.csv') + localtable = os.path.join(dirpath, 'salaries.tsv') local_table = self.read_table(localtable) tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing @@ -641,7 +641,7 @@ def test_url(self): @tm.slow def test_file(self): dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salary.table.csv') + localtable = os.path.join(dirpath, 'salaries.tsv') local_table = self.read_table(localtable) try: diff --git a/pandas/io/tests/parser/data/salary.table.csv b/pandas/io/tests/parser/data/salaries.tsv similarity index 100% rename from pandas/io/tests/parser/data/salary.table.csv rename to pandas/io/tests/parser/data/salaries.tsv diff --git a/pandas/io/tests/parser/data/salaries.tsv.bz2 b/pandas/io/tests/parser/data/salaries.tsv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a68b4e62bf34a64118a776c3575a848d2dee3eea GIT binary patch literal 283 zcmV+$0p$KdT4*^jL0KkKSwin59RLA)-T(kFKmdQ>0s;s?FadjPCWfk?`V564U}|{@ zw3NibCX|?(r>UmYq#z#udx$b-45?apVahHUu7f_`FRN_siIg?f?>dqH$ITaOEQHr9du@O{PIlb?An)*U(TfOTX zbpsCAbm5!s!W(C(!5NmFQ?yeZP^@L8$($VyP+WvF%a>z(y78UnoV7}xCJwa<@d0TE z#Mku-jM^4(+(0?TNRg(UtccGpkwvS9M@c>Pbnrp>s&pj? z?{#o?>=k#NICS!%dd2YXQg%@sI>ulS#?Cxh1cpnuCht<_GBz9_iVNOp*glYq|CXA11Wb zPp-Z_P{&*cfI&}IE_@P6UJ!a+r?9`gmV_zs%8-StgMBYn%D83mo{#8c-x{fIE_I}t%QC)7UlykOvN~LyGyJ4luv^P5Df@1 zSoyPi3vu5SCKqWQha7lxYqoESQ@84CB2x0;fz$N16pb5^?-^cbb$+%$5YC*l#Q*?h iBLnR-8?NpE0jmP}1pok6{p&Ze#Ao{g000001X)`D)|(0d literal 0 HcmV?d00001 diff --git a/pandas/io/tests/parser/data/salaries.tsv.zip b/pandas/io/tests/parser/data/salaries.tsv.zip new file mode 100644 index 0000000000000000000000000000000000000000..294f65b36771d2cfba97a51f9fa6df494d511fc2 GIT binary patch literal 445 zcmWIWW@Zs#U|`^2NDD~z^sm%i&(Fxf@RNywfrmkcp*S%ou_!aOSg)kGEHs3Zfq8+m zbi|95G7+T}+zgB?FPIq^z=Utm?$RR$JgaMi?{NH?ETxdT%H(GKBoCFNr>pgN8kjV0 z*q?v@`D@Mo35VbQO|aK@Y4$kqp52U(eUC+HS55Vbl9JE$pIIJGQTn>4{=-c6)LcWu zhgx?kkDTDFikp_*E^qHtw&&N!c*Y4o-l^~SKJm4`!?Dij=duNb&s;qoolBlHce#Ad zyUyyk=~M633VwcfUX4Fe^W$0WxX#o4TIbX1qxb#(_HlAUt&!S&R+Ym0^9{J->!}4{?zXs86LxE@v*6z8>q6LW?aE4 o0Ss;i24E;NENKL>Py?A263A!)9N^8$22#righ4<$6r`O205eCa6aWAK literal 0 HcmV?d00001 diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index 7e2f039853e2f..aef2e0ec0abe1 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -18,19 +18,19 @@ class TestUrlGz(tm.TestCase): def setUp(self): dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salary.table.csv') + localtable = os.path.join(dirpath, 'salaries.tsv') self.local_table = read_table(localtable) @tm.network def test_url_gz(self): url = ('https://raw.github.com/pandas-dev/pandas/' - 'master/pandas/io/tests/parser/data/salary.table.gz') + 'master/pandas/io/tests/parser/data/salaries.tsv.gz') url_table = read_table(url, compression="gzip", engine="python") tm.assert_frame_equal(url_table, self.local_table) @tm.network def test_url_gz_infer(self): - url = 'https://s3.amazonaws.com/pandas-test/salary.table.gz' + url = 'https://s3.amazonaws.com/pandas-test/salaries.tsv.gz' url_table = read_table(url, compression="infer", engine="python") tm.assert_frame_equal(url_table, self.local_table) From 24341b53341455433abcb6d01a2c7b4071e35316 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Wed, 9 Nov 2016 10:39:56 -0500 Subject: [PATCH 2/2] Revert to CSV from TSV extension See https://github.com/pandas-dev/pandas/pull/14587#issuecomment-259250599 --- pandas/io/tests/parser/common.py | 6 +++--- .../parser/data/{salaries.tsv => salaries.csv} | 0 .../data/{salaries.tsv.bz2 => salaries.csv.bz2} | Bin .../data/{salaries.tsv.gz => salaries.csv.gz} | Bin .../data/{salaries.tsv.xz => salaries.csv.xz} | Bin .../data/{salaries.tsv.zip => salaries.csv.zip} | Bin pandas/io/tests/parser/test_network.py | 6 +++--- 7 files changed, 6 insertions(+), 6 deletions(-) rename pandas/io/tests/parser/data/{salaries.tsv => salaries.csv} (100%) rename pandas/io/tests/parser/data/{salaries.tsv.bz2 => salaries.csv.bz2} (100%) rename pandas/io/tests/parser/data/{salaries.tsv.gz => salaries.csv.gz} (100%) rename pandas/io/tests/parser/data/{salaries.tsv.xz => salaries.csv.xz} (100%) rename pandas/io/tests/parser/data/{salaries.tsv.zip => salaries.csv.zip} (100%) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 8ac24a22c96ac..f0fdc9398084f 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -630,10 +630,10 @@ def test_read_csv_parse_simple_list(self): def test_url(self): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/parser/data/salaries.tsv') + 'pandas/io/tests/parser/data/salaries.csv') url_table = self.read_table(url) dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salaries.tsv') + localtable = os.path.join(dirpath, 'salaries.csv') local_table = self.read_table(localtable) tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing @@ -641,7 +641,7 @@ def test_url(self): @tm.slow def test_file(self): dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salaries.tsv') + localtable = os.path.join(dirpath, 'salaries.csv') local_table = self.read_table(localtable) try: diff --git a/pandas/io/tests/parser/data/salaries.tsv b/pandas/io/tests/parser/data/salaries.csv similarity index 100% rename from pandas/io/tests/parser/data/salaries.tsv rename to pandas/io/tests/parser/data/salaries.csv diff --git a/pandas/io/tests/parser/data/salaries.tsv.bz2 b/pandas/io/tests/parser/data/salaries.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/salaries.tsv.bz2 rename to pandas/io/tests/parser/data/salaries.csv.bz2 diff --git a/pandas/io/tests/parser/data/salaries.tsv.gz b/pandas/io/tests/parser/data/salaries.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/salaries.tsv.gz rename to pandas/io/tests/parser/data/salaries.csv.gz diff --git a/pandas/io/tests/parser/data/salaries.tsv.xz b/pandas/io/tests/parser/data/salaries.csv.xz similarity index 100% rename from pandas/io/tests/parser/data/salaries.tsv.xz rename to pandas/io/tests/parser/data/salaries.csv.xz diff --git a/pandas/io/tests/parser/data/salaries.tsv.zip b/pandas/io/tests/parser/data/salaries.csv.zip similarity index 100% rename from pandas/io/tests/parser/data/salaries.tsv.zip rename to pandas/io/tests/parser/data/salaries.csv.zip diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index aef2e0ec0abe1..964c927c3c496 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -18,19 +18,19 @@ class TestUrlGz(tm.TestCase): def setUp(self): dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salaries.tsv') + localtable = os.path.join(dirpath, 'salaries.csv') self.local_table = read_table(localtable) @tm.network def test_url_gz(self): url = ('https://raw.github.com/pandas-dev/pandas/' - 'master/pandas/io/tests/parser/data/salaries.tsv.gz') + 'master/pandas/io/tests/parser/data/salaries.csv.gz') url_table = read_table(url, compression="gzip", engine="python") tm.assert_frame_equal(url_table, self.local_table) @tm.network def test_url_gz_infer(self): - url = 'https://s3.amazonaws.com/pandas-test/salaries.tsv.gz' + url = 'https://s3.amazonaws.com/pandas-test/salaries.csv.gz' url_table = read_table(url, compression="infer", engine="python") tm.assert_frame_equal(url_table, self.local_table)