From 98d4c6b7bf0081b18ad8e135167efcec5e35235a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 15:42:43 -0500 Subject: [PATCH 1/4] Refactor case_sparql_select code This patch is code-motion to give function names to chunks of `case_sparql_select:main`. Some upcoming patch series are going to add features that, when taken together, introduce non-trivial parameter-value cross-dependencies. Moving functionality to functions enables combinatoric testing in a `pytest` space, rather than resorting to copying, pasting, and tweaking many Makefile lines. A future patch series will add the `pytest` script. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 195 ++++++++++++++-------- 1 file changed, 127 insertions(+), 68 deletions(-) diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index eaa98cb..8b25f30 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -49,74 +49,44 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main() -> None: - parser = argparse.ArgumentParser() - - # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. - logging.basicConfig( - level=logging.DEBUG - if ("--debug" in sys.argv or "-d" in sys.argv) - else logging.INFO - ) - - parser.add_argument("-d", "--debug", action="store_true") - parser.add_argument( - "--built-version", - choices=tuple(built_version_choices_list), - default="case-" + CURRENT_CASE_VERSION, - help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.", - ) - parser.add_argument( - "--disallow-empty-results", - action="store_true", - help="Raise error if no results are returned for query.", - ) - parser.add_argument( - "--use-prefixes", - action="store_true", - help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)", - ) - parser.add_argument( - "out_table", - help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", - ) - parser.add_argument( - "in_sparql", - help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", - ) - parser.add_argument("in_graph", nargs="+") - args = parser.parse_args() - - graph = rdflib.Graph() - for in_graph_filename in args.in_graph: - graph.parse(in_graph_filename) - - # Inherit prefixes defined in input context dictionary. - nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()} - - select_query_text = None - with open(args.in_sparql, "r") as in_fh: - select_query_text = in_fh.read().strip() - _logger.debug("select_query_text = %r." % select_query_text) - - if "subClassOf" in select_query_text: - case_utils.ontology.load_subclass_hierarchy( - graph, built_version=args.built_version - ) - +def query_text_to_variables(select_query_text: str) -> typing.List[str]: # Build columns list from SELECT line. select_query_text_lines = select_query_text.split("\n") select_line = [ line for line in select_query_text_lines if line.startswith("SELECT ") ][0] variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ") + return variables + + +def graph_and_query_to_data_frame( + graph: rdflib.Graph, + select_query_text: str, + *args: typing.Any, + built_version: str = "case-" + CURRENT_CASE_VERSION, + disallow_empty_results: bool = False, + use_prefixes: bool = False, + **kwargs: typing.Any, +) -> pd.DataFrame: + # Inherit prefixes defined in input context dictionary. + nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()} + + # Avoid side-effects on input parameter. + if "subClassOf" in select_query_text: + _graph = rdflib.Graph() + _graph += graph + case_utils.ontology.load_subclass_hierarchy(_graph, built_version=built_version) + else: + _graph = graph + + variables = query_text_to_variables(select_query_text) tally = 0 records = [] select_query_object = rdflib.plugins.sparql.processor.prepareQuery( select_query_text, initNs=nsdict ) - for (row_no, row) in enumerate(graph.query(select_query_object)): + for (row_no, row) in enumerate(_graph.query(select_query_object)): tally = row_no + 1 record = [] for (column_no, column) in enumerate(row): @@ -131,7 +101,7 @@ def main() -> None: # .decode() is because hexlify returns bytes. column_value = binascii.hexlify(column.toPython()).decode() elif isinstance(column, rdflib.URIRef): - if args.use_prefixes: + if use_prefixes: column_value = graph.namespace_manager.qname(column.toPython()) else: column_value = column.toPython() @@ -141,39 +111,128 @@ def main() -> None: _logger.debug("row[0]column[%d] = %r." % (column_no, column_value)) record.append(column_value) records.append(record) + if tally == 0: - if args.disallow_empty_results: + if disallow_empty_results: raise ValueError("Failed to return any results.") df = pd.DataFrame(records, columns=variables) + return df + +def data_frame_to_table_text( + df: pd.DataFrame, + *args: typing.Any, + output_mode: str, + **kwargs: typing.Any, +) -> str: table_text: typing.Optional[str] = None - if args.out_table.endswith(".csv") or args.out_table.endswith(".tsv"): - # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html + + if output_mode in {"csv", "tsv"}: sep: str - if args.out_table.endswith(".csv"): + if output_mode == "csv": sep = "," - elif args.out_table.endswith(".tsv"): + elif output_mode == "tsv": sep = "\t" else: raise NotImplementedError( "Output extension not implemented in CSV-style output." ) table_text = df.to_csv(sep=sep) - elif args.out_table.endswith(".html"): + elif output_mode == "html": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) - elif args.out_table.endswith(".md"): + elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ # Assume Github-flavored Markdown. + table_text = df.to_markdown(tablefmt="github") - if table_text is None: - raise NotImplementedError( - "Unsupported output extension for output filename %r.", args.out_table - ) + else: + if table_text is None: + raise NotImplementedError("Unimplemented output mode: %r." % output_mode) + assert table_text is not None + + return table_text + + +def main() -> None: + parser = argparse.ArgumentParser() + + # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. + logging.basicConfig( + level=logging.DEBUG + if ("--debug" in sys.argv or "-d" in sys.argv) + else logging.INFO + ) + + parser.add_argument("-d", "--debug", action="store_true") + parser.add_argument( + "--built-version", + choices=tuple(built_version_choices_list), + default="case-" + CURRENT_CASE_VERSION, + help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.", + ) + parser.add_argument( + "--disallow-empty-results", + action="store_true", + help="Raise error if no results are returned for query.", + ) + parser.add_argument( + "--use-prefixes", + action="store_true", + help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)", + ) + parser.add_argument( + "out_table", + help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", + ) + parser.add_argument( + "in_sparql", + help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", + ) + + parser.add_argument("in_graph", nargs="+") + args = parser.parse_args() + output_mode: str + if args.out_table.endswith(".csv"): + output_mode = "csv" + elif args.out_table.endswith(".html"): + output_mode = "html" + elif args.out_table.endswith(".json"): + output_mode = "json" + elif args.out_table.endswith(".md"): + output_mode = "md" + elif args.out_table.endswith(".tsv"): + output_mode = "tsv" + else: + raise NotImplementedError("Output file extension not implemented.") + + graph = rdflib.Graph() + for in_graph_filename in args.in_graph: + graph.parse(in_graph_filename) + + select_query_text: typing.Optional[str] = None + with open(args.in_sparql, "r") as in_fh: + select_query_text = in_fh.read().strip() + if select_query_text is None: + raise ValueError("Failed to load query.") + _logger.debug("select_query_text = %r." % select_query_text) + + df = graph_and_query_to_data_frame( + graph, + select_query_text, + built_version=args.built_version, + disallow_empty_results=args.disallow_empty_results is True, + use_prefixes=args.use_prefixes is True, + ) + + table_text = data_frame_to_table_text( + df, + output_mode=output_mode, + ) with open(args.out_table, "w") as out_fh: out_fh.write(table_text) if table_text[-1] != "\n": From e69959272993d009bc9983724a7b8a162359f8c3 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 16:36:32 -0500 Subject: [PATCH 2/4] Add case_sparql_select CSV and TSV output No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 23 ++++++- .../.check-prefixed_results-indented.json | 6 ++ .../.check-prefixed_results.json | 1 + .../.check-w3-output-indented.json | 10 +++ .../case_sparql_select/.check-w3-output.json | 1 + .../case_utils/case_sparql_select/.gitignore | 2 +- tests/case_utils/case_sparql_select/Makefile | 68 +++++++++++++++++++ 7 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-indented.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output.json diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 8b25f30..16d3c67 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -123,6 +123,8 @@ def graph_and_query_to_data_frame( def data_frame_to_table_text( df: pd.DataFrame, *args: typing.Any, + json_indent: typing.Optional[int] = None, + json_orient: str, output_mode: str, **kwargs: typing.Any, ) -> str: @@ -143,6 +145,12 @@ def data_frame_to_table_text( # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) + elif output_mode == "json": + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html + + table_text = df.to_json( + indent=json_indent, orient=json_orient, date_format="iso" + ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ @@ -179,6 +187,17 @@ def main() -> None: action="store_true", help="Raise error if no results are returned for query.", ) + parser.add_argument( + "--json-indent", + type=int, + help="Number of whitespace characters to use for indentation. Only applicable for JSON output.", + ) + parser.add_argument( + "--json-orient", + default="columns", + choices=("columns", "index", "records", "split", "table", "values"), + help="Orientation to use for Pandas DataFrame JSON output. Only applicable for JSON output.", + ) parser.add_argument( "--use-prefixes", action="store_true", @@ -186,7 +205,7 @@ def main() -> None: ) parser.add_argument( "out_table", - help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", + help="Expected extensions are .html for HTML tables, .json for JSON tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values. Note that JSON is a Pandas output JSON format (chosen by '--json-orient'), and not JSON-LD.", ) parser.add_argument( "in_sparql", @@ -231,6 +250,8 @@ def main() -> None: table_text = data_frame_to_table_text( df, + json_indent=args.json_indent, + json_orient=args.json_orient, output_mode=output_mode, ) with open(args.out_table, "w") as out_fh: diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json b/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json new file mode 100644 index 0000000..c356eca --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json @@ -0,0 +1,6 @@ +{ + "?nFile":{ + "0":"kb:file-1", + "1":"kb:file-2" + } +} diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.json b/tests/case_utils/case_sparql_select/.check-prefixed_results.json new file mode 100644 index 0000000..63a9d7f --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.json @@ -0,0 +1 @@ +{"?nFile":{"0":"kb:file-1","1":"kb:file-2"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-indented.json b/tests/case_utils/case_sparql_select/.check-w3-output-indented.json new file mode 100644 index 0000000..8f426c6 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-indented.json @@ -0,0 +1,10 @@ +{ + "?name":{ + "0":"Johnny Lee Outlaw", + "1":"Peter Goodguy" + }, + "?mbox":{ + "0":"mailto:jlow@example.com", + "1":"mailto:peter@example.org" + } +} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.json b/tests/case_utils/case_sparql_select/.check-w3-output.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.gitignore b/tests/case_utils/case_sparql_select/.gitignore index 9d94989..122f73b 100644 --- a/tests/case_utils/case_sparql_select/.gitignore +++ b/tests/case_utils/case_sparql_select/.gitignore @@ -1,3 +1,3 @@ prefixed* subclass-*.md -w3-output.* +w3-output* diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 68f11ec..70bb0ec 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -20,12 +20,16 @@ tests_srcdir := $(top_srcdir)/tests all: \ prefixed_results.csv \ prefixed_results.html \ + prefixed_results.json \ + prefixed_results-indented.json \ prefixed_results.md \ prefixed_results.tsv \ subclass-explicit-none.md \ subclass-implicit-any.md \ w3-output.csv \ w3-output.html \ + w3-output.json \ + w3-output-indented.json \ w3-output.md \ w3-output.tsv @@ -35,6 +39,8 @@ all: \ check-subclass-implicit-any \ check-w3-csv \ check-w3-html \ + check-w3-json \ + check-w3-json-indented \ check-w3-markdown \ check-w3-tsv @@ -46,6 +52,7 @@ all: \ check: \ check-w3-csv \ check-w3-html \ + check-w3-json \ check-w3-markdown \ check-w3-tsv \ check-prefixed_results \ @@ -54,6 +61,7 @@ check: \ check-prefixed_results: \ check-prefixed_results-csv \ check-prefixed_results-html \ + check-prefixed_results-json \ check-prefixed_results-md \ check-prefixed_results-tsv @@ -67,6 +75,19 @@ check-prefixed_results-html: \ prefixed_results.html diff $^ +check-prefixed_results-json: \ + check-prefixed_results-json-indented \ + .check-prefixed_results.json \ + prefixed_results.json + diff \ + .check-prefixed_results.json \ + prefixed_results.json + +check-prefixed_results-json-indented: \ + .check-prefixed_results-indented.json \ + prefixed_results-indented.json + diff $^ + check-prefixed_results-md: \ .check-prefixed_results.md \ prefixed_results.md @@ -101,6 +122,19 @@ check-w3-html: \ w3-output.html diff $^ +check-w3-json: \ + .check-w3-output.json \ + check-w3-json-indented \ + w3-output.json + diff \ + .check-w3-output.json \ + w3-output.json + +check-w3-json-indented: \ + .check-w3-output-indented.json \ + w3-output-indented.json + diff $^ + check-w3-markdown: \ .check-w3-output.md \ w3-output.md @@ -119,6 +153,7 @@ clean: *.html \ *.md \ *.tsv \ + *output*.json \ _* prefixed_results.%: \ @@ -136,6 +171,22 @@ prefixed_results.%: \ subclass.json mv _$@ $@ +prefixed_results-indented.json: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + $(top_srcdir)/case_utils/ontology/__init__.py \ + $(top_srcdir)/case_utils/ontology/version_info.py \ + subclass.json \ + subclass.sparql + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + --json-indent 4 \ + --use-prefixes \ + _$@ \ + subclass.sparql \ + subclass.json + mv _$@ $@ + subclass-explicit-none.md: \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ @@ -180,3 +231,20 @@ w3-output.%: \ w3-input-2.ttl \ w3-input-3.json mv _$@ $@ + +w3-output-indented.json: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + $(top_srcdir)/case_utils/ontology/__init__.py \ + $(top_srcdir)/case_utils/ontology/version_info.py \ + w3-input-1.sparql \ + w3-input-2.ttl \ + w3-input-3.json + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + --json-indent 4 \ + _$@ \ + w3-input-1.sparql \ + w3-input-2.ttl \ + w3-input-3.json + mv _$@ $@ From b1bcad32ec2aa8939f38272d3252e8f642706f43 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 17:13:45 -0500 Subject: [PATCH 3/4] Add options to case_sparql_select to disable headers and index numbers The test pattern for these features encourages an enumerative approach, so a `pytest` script exercising the freshly broken-out functions is now included. No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 70 ++++++++++++++++- tests/case_utils/Makefile | 1 + ...check-w3-output-with_header-with_index.csv | 3 + ...heck-w3-output-with_header-with_index.html | 21 ++++++ ....check-w3-output-with_header-with_index.md | 4 + ...check-w3-output-with_header-with_index.tsv | 3 + ...ck-w3-output-with_header-without_index.csv | 3 + ...k-w3-output-with_header-without_index.html | 18 +++++ ...eck-w3-output-with_header-without_index.md | 4 + ...ck-w3-output-with_header-without_index.tsv | 3 + ...ck-w3-output-without_header-with_index.csv | 2 + ...k-w3-output-without_header-with_index.html | 14 ++++ ...eck-w3-output-without_header-with_index.md | 3 + ...ck-w3-output-without_header-with_index.tsv | 2 + ...w3-output-without_header-without_index.csv | 2 + ...3-output-without_header-without_index.html | 12 +++ ...-w3-output-without_header-without_index.md | 3 + ...w3-output-without_header-without_index.tsv | 2 + tests/case_utils/case_sparql_select/Makefile | 3 + .../test_data_frame_to_table_text_json.py | 75 +++++++++++++++++++ 20 files changed, 245 insertions(+), 3 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv create mode 100644 tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 8b25f30..2d8581c 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -124,10 +124,25 @@ def data_frame_to_table_text( df: pd.DataFrame, *args: typing.Any, output_mode: str, + use_header: bool, + use_index: bool, **kwargs: typing.Any, ) -> str: table_text: typing.Optional[str] = None + # Set up kwargs dicts. One kwarg behaves slightly differently for Markdown vs. other formats. + general_kwargs: typing.Dict[str, typing.Any] = dict() + md_kwargs: typing.Dict[str, typing.Any] = dict() + + # Note some output modes will drop 'header' from general_kwargs, due to alternate support or lack of support. + if use_header: + general_kwargs["header"] = True + else: + general_kwargs["header"] = False + md_kwargs["headers"] = tuple() + + general_kwargs["index"] = use_index + if output_mode in {"csv", "tsv"}: sep: str if output_mode == "csv": @@ -138,17 +153,22 @@ def data_frame_to_table_text( raise NotImplementedError( "Output extension not implemented in CSV-style output." ) - table_text = df.to_csv(sep=sep) + table_text = df.to_csv(sep=sep, **general_kwargs) elif output_mode == "html": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. - table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) + table_text = df.to_html( + classes=("table", "table-bordered", "table-condensed"), **general_kwargs + ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ # Assume Github-flavored Markdown. - table_text = df.to_markdown(tablefmt="github") + # Drop unsupported kwarg. + del general_kwargs["header"] + + table_text = df.to_markdown(tablefmt="github", **general_kwargs, **md_kwargs) else: if table_text is None: raise NotImplementedError("Unimplemented output mode: %r." % output_mode) @@ -193,6 +213,30 @@ def main() -> None: help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", ) + parser_header_group = parser.add_mutually_exclusive_group(required=False) + parser_header_group.add_argument( + "--header", + action="store_true", + help="Print column labels. This is the default behavior.", + ) + parser_header_group.add_argument( + "--no-header", + action="store_true", + help="Do not print column labels.", + ) + + parser_index_group = parser.add_mutually_exclusive_group(required=False) + parser_index_group.add_argument( + "--index", + action="store_true", + help="Print index (auto-incrementing row labels as left untitled column). This is the default behavior.", + ) + parser_index_group.add_argument( + "--no-index", + action="store_true", + help="Do not print index.", + ) + parser.add_argument("in_graph", nargs="+") args = parser.parse_args() @@ -221,6 +265,24 @@ def main() -> None: raise ValueError("Failed to load query.") _logger.debug("select_query_text = %r." % select_query_text) + # Process --header and --no-header. + use_header: bool + if args.header is True: + use_header = True + if args.no_header is True: + use_header = False + else: + use_header = True + + # Process --index and --no-index. + use_index: bool + if args.index is True: + use_index = True + if args.no_index is True: + use_index = False + else: + use_index = True + df = graph_and_query_to_data_frame( graph, select_query_text, @@ -232,6 +294,8 @@ def main() -> None: table_text = data_frame_to_table_text( df, output_mode=output_mode, + use_header=use_header, + use_index=use_index, ) with open(args.out_table, "w") as out_fh: out_fh.write(table_text) diff --git a/tests/case_utils/Makefile b/tests/case_utils/Makefile index e77c927..3c65a40 100644 --- a/tests/case_utils/Makefile +++ b/tests/case_utils/Makefile @@ -65,6 +65,7 @@ check: \ && pytest \ --ignore case_file \ --ignore case_sparql_construct \ + --ignore case_sparql_select \ --ignore case_validate \ --log-level=DEBUG diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv new file mode 100644 index 0000000..063e950 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv @@ -0,0 +1,3 @@ +,?name,?mbox +0,Johnny Lee Outlaw,mailto:jlow@example.com +1,Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html new file mode 100644 index 0000000..bee5944 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
?name?mbox
0Johnny Lee Outlawmailto:jlow@example.com
1Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md new file mode 100644 index 0000000..af68b84 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md @@ -0,0 +1,4 @@ +| | ?name | ?mbox | +|----|-------------------|--------------------------| +| 0 | Johnny Lee Outlaw | mailto:jlow@example.com | +| 1 | Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv new file mode 100644 index 0000000..a4fdfca --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv @@ -0,0 +1,3 @@ + ?name ?mbox +0 Johnny Lee Outlaw mailto:jlow@example.com +1 Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv new file mode 100644 index 0000000..6bd60fb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv @@ -0,0 +1,3 @@ +?name,?mbox +Johnny Lee Outlaw,mailto:jlow@example.com +Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html new file mode 100644 index 0000000..041fd3b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
?name?mbox
Johnny Lee Outlawmailto:jlow@example.com
Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md new file mode 100644 index 0000000..3aa8a01 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md @@ -0,0 +1,4 @@ +| ?name | ?mbox | +|-------------------|--------------------------| +| Johnny Lee Outlaw | mailto:jlow@example.com | +| Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv new file mode 100644 index 0000000..dd1e81d --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv @@ -0,0 +1,3 @@ +?name ?mbox +Johnny Lee Outlaw mailto:jlow@example.com +Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv new file mode 100644 index 0000000..7933d39 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv @@ -0,0 +1,2 @@ +0,Johnny Lee Outlaw,mailto:jlow@example.com +1,Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html new file mode 100644 index 0000000..b6a842b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
0Johnny Lee Outlawmailto:jlow@example.com
1Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md new file mode 100644 index 0000000..c5ee8c8 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md @@ -0,0 +1,3 @@ +|---|-------------------|--------------------------| +| 0 | Johnny Lee Outlaw | mailto:jlow@example.com | +| 1 | Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv new file mode 100644 index 0000000..992efe2 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv @@ -0,0 +1,2 @@ +0 Johnny Lee Outlaw mailto:jlow@example.com +1 Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv new file mode 100644 index 0000000..a4c2c82 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv @@ -0,0 +1,2 @@ +Johnny Lee Outlaw,mailto:jlow@example.com +Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html new file mode 100644 index 0000000..6dbc7c3 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
Johnny Lee Outlawmailto:jlow@example.com
Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md new file mode 100644 index 0000000..6ad505c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md @@ -0,0 +1,3 @@ +|-------------------|--------------------------| +| Johnny Lee Outlaw | mailto:jlow@example.com | +| Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv new file mode 100644 index 0000000..833da47 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv @@ -0,0 +1,2 @@ +Johnny Lee Outlaw mailto:jlow@example.com +Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 68f11ec..0523c8a 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -50,6 +50,9 @@ check: \ check-w3-tsv \ check-prefixed_results \ check-subclass + source $(tests_srcdir)/venv/bin/activate \ + && pytest \ + --log-level=DEBUG check-prefixed_results: \ check-prefixed_results-csv \ diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py new file mode 100644 index 0000000..00d20c3 --- /dev/null +++ b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +import pathlib +import typing + +import pytest +import rdflib + +import case_utils.case_sparql_select + +SRCDIR = pathlib.Path(__file__).parent + +GRAPH = rdflib.Graph() +GRAPH.parse(str(SRCDIR / "w3-input-2.ttl")) +GRAPH.parse(str(SRCDIR / "w3-input-3.json")) +assert len(GRAPH) > 0 + +SELECT_QUERY_TEXT: typing.Optional[str] = None +with (SRCDIR / "w3-input-1.sparql").open("r") as _fh: + SELECT_QUERY_TEXT = _fh.read().strip() +assert SELECT_QUERY_TEXT is not None + +DATA_FRAME = case_utils.case_sparql_select.graph_and_query_to_data_frame( + GRAPH, SELECT_QUERY_TEXT +) + + +def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[ + typing.Tuple[str, bool, bool] +]: + for use_header in [False, True]: + for use_index in [False, True]: + for output_mode in ["csv", "html", "md", "tsv"]: + yield (output_mode, use_header, use_index) + + +@pytest.mark.parametrize( + "output_mode, use_header, use_index", + make_data_frame_to_json_table_text_parameters(), +) +def test_data_frame_to_table_text_json( + output_mode: str, + use_header: bool, + use_index: bool, +) -> None: + table_text = case_utils.case_sparql_select.data_frame_to_table_text( + DATA_FRAME, + output_mode=output_mode, + use_header=use_header, + use_index=use_index, + ) + + output_filename_template = ".check-w3-output-%s_header-%s_index.%s" + header_part = "with" if use_header else "without" + index_part = "with" if use_index else "without" + output_filename = output_filename_template % ( + header_part, + index_part, + output_mode, + ) + with (SRCDIR / output_filename).open("w") as out_fh: + out_fh.write(table_text) + if table_text[-1] != "\n": + out_fh.write("\n") From ed44477f1ae35747b963d40b4ab61087273ac0cc Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 17:26:03 -0500 Subject: [PATCH 4/4] Merge JSON output and --no-(header,index) flag branches This patch addresses issues with some parameter-values being compatible with some of the DataFrame table rendering functions, but not with certain `orient` parameters of the JSON rendering function. The incompatible pairings are now caught at parameter-parsing time. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 16 +++++++-- ...with_header-with_index-orient-columns.json | 1 + ...t-with_header-with_index-orient-index.json | 1 + ...with_header-with_index-orient-records.json | 1 + ...t-with_header-with_index-orient-split.json | 1 + ...t-with_header-with_index-orient-table.json | 1 + ...-with_header-with_index-orient-values.json | 1 + ...ith_header-without_index-orient-split.json | 1 + ...ith_header-without_index-orient-table.json | 1 + ...hout_header-with_index-orient-columns.json | 1 + ...ithout_header-with_index-orient-index.json | 1 + ...hout_header-with_index-orient-records.json | 1 + ...ithout_header-with_index-orient-split.json | 1 + ...ithout_header-with_index-orient-table.json | 1 + ...thout_header-with_index-orient-values.json | 1 + ...out_header-without_index-orient-split.json | 1 + ...out_header-without_index-orient-table.json | 1 + .../test_data_frame_to_table_text_json.py | 33 ++++++++++++++++--- 18 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index eb41f96..870e44c 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -165,8 +165,11 @@ def data_frame_to_table_text( elif output_mode == "json": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html + # Drop unsupported kwarg. + del general_kwargs["header"] + table_text = df.to_json( - indent=json_indent, orient=json_orient, date_format="iso" + indent=json_indent, orient=json_orient, date_format="iso", **general_kwargs ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html @@ -253,7 +256,7 @@ def main() -> None: parser_index_group.add_argument( "--no-index", action="store_true", - help="Do not print index.", + help="Do not print index. If output is JSON, --json-orient must be 'split' or 'table'.", ) parser.add_argument("in_graph", nargs="+") @@ -302,6 +305,15 @@ def main() -> None: else: use_index = True + if ( + output_mode == "json" + and use_index is False + and args.json_orient not in {"split", "table"} + ): + raise ValueError( + "For JSON output, --no-index flag requires --json-orient to be either 'split' or 'table'." + ) + df = graph_and_query_to_data_frame( graph, select_query_text, diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json new file mode 100644 index 0000000..717d3ae --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json @@ -0,0 +1 @@ +{"0":{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},"1":{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json new file mode 100644 index 0000000..f053616 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json @@ -0,0 +1 @@ +[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json new file mode 100644 index 0000000..0401af1 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"index":[0,1],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json new file mode 100644 index 0000000..92f005c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":0,"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"index":1,"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json new file mode 100644 index 0000000..e49c3eb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json @@ -0,0 +1 @@ +[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json new file mode 100644 index 0000000..885bc91 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json new file mode 100644 index 0000000..c9e7c13 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"pandas_version":"1.4.0"},"data":[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json new file mode 100644 index 0000000..717d3ae --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json @@ -0,0 +1 @@ +{"0":{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},"1":{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json new file mode 100644 index 0000000..f053616 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json @@ -0,0 +1 @@ +[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json new file mode 100644 index 0000000..0401af1 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"index":[0,1],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json new file mode 100644 index 0000000..92f005c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":0,"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"index":1,"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json new file mode 100644 index 0000000..e49c3eb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json @@ -0,0 +1 @@ +[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json new file mode 100644 index 0000000..885bc91 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json new file mode 100644 index 0000000..c9e7c13 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"pandas_version":"1.4.0"},"data":[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py index 00d20c3..9e5cc24 100644 --- a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py +++ b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py @@ -37,36 +37,59 @@ def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[ - typing.Tuple[str, bool, bool] + typing.Tuple[str, str, bool, bool] ]: for use_header in [False, True]: for use_index in [False, True]: - for output_mode in ["csv", "html", "md", "tsv"]: - yield (output_mode, use_header, use_index) + for output_mode in ["csv", "html", "json", "md", "tsv"]: + if output_mode == "json": + for json_orient in [ + "columns", + "index", + "records", + "split", + "table", + "values", + ]: + # Handle incompatible parameter pairings for JSON mode. + if use_index is False: + if json_orient not in {"split", "table"}: + continue + + yield (json_orient, output_mode, use_header, use_index) + else: + yield ("columns", output_mode, use_header, use_index) @pytest.mark.parametrize( - "output_mode, use_header, use_index", + "json_orient, output_mode, use_header, use_index", make_data_frame_to_json_table_text_parameters(), ) def test_data_frame_to_table_text_json( + json_orient: str, output_mode: str, use_header: bool, use_index: bool, ) -> None: table_text = case_utils.case_sparql_select.data_frame_to_table_text( DATA_FRAME, + json_orient=json_orient, output_mode=output_mode, use_header=use_header, use_index=use_index, ) - output_filename_template = ".check-w3-output-%s_header-%s_index.%s" + output_filename_template = ".check-w3-output-%s_header-%s_index%s.%s" header_part = "with" if use_header else "without" index_part = "with" if use_index else "without" + if output_mode == "json": + json_orient_part = "-orient-" + json_orient + else: + json_orient_part = "" output_filename = output_filename_template % ( header_part, index_part, + json_orient_part, output_mode, ) with (SRCDIR / output_filename).open("w") as out_fh: