From 4436f495a699ce49a763b3af9707d2c563e916e2 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Mon, 10 May 2021 11:08:19 -0400 Subject: [PATCH] Add tests for hexBinary References: * [AC-139] FAQ question - Does CASE require hex data be entered uppercase or lowercase? Signed-off-by: Alex Nelson --- tests/hexbinary/test_hexbinary.py | 367 ++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 tests/hexbinary/test_hexbinary.py diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py new file mode 100644 index 0000000..04784bc --- /dev/null +++ b/tests/hexbinary/test_hexbinary.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +""" +This test suite tests some assumptions that might be made about hexBinary value comparison in Python's rdflib and its SPARQL engine. + +This script is expected to have pytest exit in a success state, reporting some tests passing, and some tests XFailing (i.e. being expected to fail). + +The overall finding is: in rdflib and rdflib's SPARQL engine, xsd:hexBinaryCanonical is not given any support not given to arbitrary string datatypes. This, and more specific, findings are affirmed by the tests: + +* Some of the tests serve as syntax reminders for SPARQL and pytest. + - test_sparql_syntax_bind_boolean + - test_pytest_syntax_xfail + - test_sparql_syntax_integer_coercion + - test_sparql_syntax_integer_cast +* SPARQL Literal datatype-casting can coerce known types, but will not cast strings of unknown datatypes. + - test_sparql_syntax_integer_cast + - test_sparql_cast_custom_type +* rdflib WILL match xsd:hexBinary data as casing-insensitive. So, Literals with values "ab" and "AB" match if both have the datatype xsd:hexBinary. + - test_rdflib_literal_hexbinary +* rdflib WILL NOT match xsd:hexBinaryCanonical data with xsd:hexBinary data, either as Literal objects or with a call to .toPython(). + - test_rdflib_literal_hexbinarycanonical + - test_rdflib_literal_topython_hexbinarycanonical +* The rdflib SPARQL engine WILL match xsd:hexBinary data as casing-insensitive. So, "ab" and "AB" match if both have the datatype xsd:hexBinary. + - test_sparql_compare_hexbinary_matchcase + - test_sparql_compare_hexbinary_mixcase + - test_graph_repeat + - test_graph_all_hexbinary_literals +* The rdflib SPARQL engine WILL match xsd:hexBinaryCanonical data with xsd:hexBinaryCanonical data, when casing matches. + - test_sparql_compare_hexbinarycanonical_matchcase +* The rdflib SPARQL engine WILL NOT match xsd:hexBinaryCanonical data with xsd:hexBinaryCanonical data, when casing does not match. + - test_sparql_compare_hexbinarycanonical_mixcase +* The rdflib SPARQL engine WILL NOT compare xsd:hexBinaryCanonical data with xsd:hexBinary data. + - test_sparql_compare_hb_hbc_mixcase + - test_sparql_compare_hb_hbc_mixcase_cast + - test_graph_hexbinarycanonical +""" + +import logging +import os + +import pytest +import rdflib.plugins.sparql + +_logger = logging.getLogger(os.path.basename(__file__)) + +# Variables used in several tests. +l_hb_lowercase = rdflib.Literal("ab", datatype=rdflib.XSD.hexBinary) +l_hb_uppercase = rdflib.Literal("AB", datatype=rdflib.XSD.hexBinary) +l_hbc_uppercase = rdflib.Literal("AB", datatype=rdflib.XSD.hexBinaryCanonical) +n_canonical1 = rdflib.URIRef("urn:example:canonical1") +n_lowercase1 = rdflib.URIRef("urn:example:lowercase1") +n_lowercase2 = rdflib.URIRef("urn:example:lowercase2") +n_uppercase1 = rdflib.URIRef("urn:example:uppercase1") +p_predicate = rdflib.URIRef("urn:example:predicate1") + +def test_sparql_syntax_bind_boolean(): + """ + This test serves as a syntax reminder for binding boolean values. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = 1 AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail(reason="hard-coded failure") +def test_pytest_syntax_xfail(): + """ + This test serves as a syntax reminder for the XFail decorator. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = 2 AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_syntax_integer_coercion(): + """ + This test serves as a syntax reminder for type coercions. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = "1"^^xsd:integer AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_syntax_integer_cast(): + """ + This test serves as a syntax reminder for the casting form of type coercions. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = xsd:integer("1") AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_cast_custom_type(): + """ + This test checks for nonexistent literal-datatype assignments. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = xsd:integer("1"^^xsd:hexBinaryTypoXXXX) AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinary_mixcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinary_matchcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinarycanonical_matchcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hexbinarycanonical_mixcase(): + """ + This test shows hexBinaryCanonical does not induce a casing-insensitive comparison. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hb_hbc_mixcase(): + """ + This test confirms that literal-comparison takes into account datatype when one type is unknown. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hb_hbc_mixcase_cast(): + """ + This test is a bit redundant with test_sparql_cast_custom_type, but is here as an explicit demonstration of failure to cast a hexBinary value. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinary = xsd:hexBinary("AB"^^xsd:hexBinaryCanonical) AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_rdflib_literal_hexbinary(): + _logger.debug("l_hb_lowercase = %r." % l_hb_lowercase) + _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) + _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) + _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) + + assert l_hb_lowercase == l_hb_lowercase + assert l_hb_lowercase.toPython() == l_hb_lowercase.toPython() + + assert l_hb_lowercase == l_hb_uppercase + assert l_hb_lowercase.toPython() == l_hb_uppercase.toPython() + +@pytest.mark.xfail +def test_rdflib_literal_hexbinarycanonical(): + _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) + _logger.debug("l_hbc_uppercase = %r." % l_hbc_uppercase) + + assert l_hb_uppercase == l_hbc_uppercase + +@pytest.mark.xfail +def test_rdflib_literal_topython_hexbinarycanonical(): + _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) + _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) + + assert l_hb_uppercase.toPython() == l_hbc_uppercase.toPython() + +def _query_all_value_matches(graph): + """ + Return set of all node names (as strings) that have a matching value, where + "matching" is determined by the SPARQL engine's type and data coercions. + """ + computed = set() + for result in graph.query("""\ +SELECT ?nNode1 ?nNode2 +WHERE { + ?nNode1 ?p ?lValue . + ?nNode2 ?p ?lValue . + FILTER ( ?nNode1 != ?nNode2 ) +}"""): + (n_node1, n_node2) = result + computed.add(n_node1.toPython()) + computed.add(n_node2.toPython()) + return computed + +def test_graph_repeat(): + """ + Two nodes are given the same literal value, and are found to match on literal values. + """ + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + expected = { + "urn:example:lowercase1", + "urn:example:lowercase2" + } + computed = _query_all_value_matches(graph) + assert computed == expected + +def test_graph_all_hexbinary_literals(): + """ + Two nodes with the same literal value, and another node with the uppercase of the literal hexBinary value, are found to match on literal values. + """ + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_uppercase1, + p_predicate, + l_hb_uppercase + )) + + expected = { + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1" + } + + computed = _query_all_value_matches(graph) + assert computed == expected + +@pytest.mark.xfail +def test_graph_hexbinarycanonical(): + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_uppercase1, + p_predicate, + l_hb_uppercase + )) + graph.add(( + n_canonical1, + p_predicate, + l_hbc_uppercase + )) + + expected = { + "urn:example:canonical1", + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1" + } + + computed = _query_all_value_matches(graph) + assert computed == expected