Skip to content

Add Python code formatting with Black as Make steps #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions case_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@

from . import local_uuid


def guess_format(
fpath : str,
fmap : typing.Optional[typing.Dict[str, str]] = None
fpath: str, fmap: typing.Optional[typing.Dict[str, str]] = None
) -> typing.Optional[str]:
warnings.warn("The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.", DeprecationWarning)
warnings.warn(
"The functionality in case_utils.guess_format is now upstream. Please revise your code to use rdflib.util.guess_format. The function arguments remain the same. case_utils.guess_format will be removed in case_utils 0.4.0.",
DeprecationWarning,
)

return rdflib.util.guess_format(fpath, fmap) # type: ignore
192 changes: 91 additions & 101 deletions case_utils/case_file/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,33 @@

NS_RDF = rdflib.RDF
NS_UCO_CORE = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/core#")
NS_UCO_OBSERVABLE = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/observable#")
NS_UCO_OBSERVABLE = rdflib.Namespace(
"https://unifiedcyberontology.org/ontology/uco/observable#"
)
NS_UCO_TYPES = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/types#")
NS_UCO_VOCABULARY = rdflib.Namespace("https://unifiedcyberontology.org/ontology/uco/vocabulary#")
NS_UCO_VOCABULARY = rdflib.Namespace(
"https://unifiedcyberontology.org/ontology/uco/vocabulary#"
)
NS_XSD = rdflib.XSD

# Shortcut syntax for defining an immutable named tuple is noted here:
# https://docs.python.org/3/library/typing.html#typing.NamedTuple
# via the "See also" box here: https://docs.python.org/3/library/collections.html#collections.namedtuple
class HashDict(typing.NamedTuple):
filesize : int
md5 : str
sha1 : str
sha256 : str
sha512 : str
filesize: int
md5: str
sha1: str
sha256: str
sha512: str


def create_file_node(
graph : rdflib.Graph,
filepath : str,
node_iri : typing.Optional[str] = None,
node_prefix : str = DEFAULT_PREFIX,
disable_hashes : bool = False,
disable_mtime : bool = False
graph: rdflib.Graph,
filepath: str,
node_iri: typing.Optional[str] = None,
node_prefix: str = DEFAULT_PREFIX,
disable_hashes: bool = False,
disable_mtime: bool = False,
) -> rdflib.URIRef:
r"""
This function characterizes the file at filepath.
Expand Down Expand Up @@ -82,65 +87,49 @@ def create_file_node(
node_slug = "file-" + case_utils.local_uuid.local_uuid()
node_iri = rdflib.Namespace(node_prefix)[node_slug]
n_file = rdflib.URIRef(node_iri)
graph.add((
n_file,
NS_RDF.type,
NS_UCO_OBSERVABLE.File
))
graph.add((n_file, NS_RDF.type, NS_UCO_OBSERVABLE.File))

basename = os.path.basename(filepath)
literal_basename = rdflib.Literal(basename)

file_stat = os.stat(filepath)
n_file_facet = rdflib.BNode()
graph.add((
n_file_facet,
NS_RDF.type,
NS_UCO_OBSERVABLE.FileFacet,
))
graph.add((
n_file_facet,
NS_UCO_OBSERVABLE.fileName,
literal_basename
))
graph.add((
n_file_facet,
NS_UCO_OBSERVABLE.sizeInBytes,
rdflib.Literal(int(file_stat.st_size))
))
graph.add((
n_file,
NS_UCO_CORE.hasFacet,
n_file_facet
))
graph.add(
(
n_file_facet,
NS_RDF.type,
NS_UCO_OBSERVABLE.FileFacet,
)
)
graph.add((n_file_facet, NS_UCO_OBSERVABLE.fileName, literal_basename))
graph.add(
(
n_file_facet,
NS_UCO_OBSERVABLE.sizeInBytes,
rdflib.Literal(int(file_stat.st_size)),
)
)
graph.add((n_file, NS_UCO_CORE.hasFacet, n_file_facet))

if not disable_mtime:
mtime_datetime = datetime.datetime.fromtimestamp(file_stat.st_mtime, tz=datetime.timezone.utc)
mtime_datetime = datetime.datetime.fromtimestamp(
file_stat.st_mtime, tz=datetime.timezone.utc
)
str_mtime = mtime_datetime.isoformat()
literal_mtime = rdflib.Literal(str_mtime, datatype=NS_XSD.dateTime)
graph.add((
n_file_facet,
NS_UCO_OBSERVABLE.modifiedTime,
literal_mtime
))
graph.add((n_file_facet, NS_UCO_OBSERVABLE.modifiedTime, literal_mtime))

if not disable_hashes:
n_contentdata_facet = rdflib.BNode()
graph.add((
n_file,
NS_UCO_CORE.hasFacet,
n_contentdata_facet
))
graph.add((
n_contentdata_facet,
NS_RDF.type,
NS_UCO_OBSERVABLE.ContentDataFacet
))
graph.add((n_file, NS_UCO_CORE.hasFacet, n_contentdata_facet))
graph.add(
(n_contentdata_facet, NS_RDF.type, NS_UCO_OBSERVABLE.ContentDataFacet)
)

# Compute hashes until they are re-computed and match once. (This is a lesson learned from working with a NAS that had a subtly faulty network cable.)

successful_hashdict : typing.Optional[HashDict] = None
last_hashdict : typing.Optional[HashDict] = None
successful_hashdict: typing.Optional[HashDict] = None
last_hashdict: typing.Optional[HashDict] = None
for attempt_no in [0, 1, 2, 3]:
# Hash file's contents.
# This hashing logic was partially copied from DFXML's walk_to_dfxml.py.
Expand Down Expand Up @@ -169,11 +158,11 @@ def create_file_node(
if not stashed_error is None:
raise stashed_error
current_hashdict = HashDict(
byte_tally,
md5obj.hexdigest(),
sha1obj.hexdigest(),
sha256obj.hexdigest(),
sha512obj.hexdigest()
byte_tally,
md5obj.hexdigest(),
sha1obj.hexdigest(),
sha256obj.hexdigest(),
sha512obj.hexdigest(),
)
if last_hashdict == current_hashdict:
successful_hashdict = current_hashdict
Expand All @@ -187,54 +176,56 @@ def create_file_node(
if successful_hashdict.filesize != file_stat.st_size:
# TODO - Discuss with AC whether this should be something stronger, like an assertion error.
warnings.warn(
"Inode file size and hashed file sizes disagree: %d vs. %d." % (
file_stat.st_size,
successful_hashdict.filesize
)
"Inode file size and hashed file sizes disagree: %d vs. %d."
% (file_stat.st_size, successful_hashdict.filesize)
)
# TODO - Discuss whether this property should be recorded even if hashes are not attempted.
graph.add((
n_contentdata_facet,
NS_UCO_OBSERVABLE.sizeInBytes,
rdflib.Literal(successful_hashdict.filesize)
))
graph.add(
(
n_contentdata_facet,
NS_UCO_OBSERVABLE.sizeInBytes,
rdflib.Literal(successful_hashdict.filesize),
)
)

# Add confirmed hashes into graph.
for key in successful_hashdict._fields:
if not key in ("md5", "sha1", "sha256", "sha512"):
continue
n_hash = rdflib.BNode()
graph.add((
n_contentdata_facet,
NS_UCO_OBSERVABLE.hash,
n_hash
))
graph.add((
n_hash,
NS_RDF.type,
NS_UCO_TYPES.Hash
))
graph.add((
n_hash,
NS_UCO_TYPES.hashMethod,
rdflib.Literal(key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab)
))
graph.add((n_contentdata_facet, NS_UCO_OBSERVABLE.hash, n_hash))
graph.add((n_hash, NS_RDF.type, NS_UCO_TYPES.Hash))
graph.add(
(
n_hash,
NS_UCO_TYPES.hashMethod,
rdflib.Literal(
key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab
),
)
)
hash_value = getattr(successful_hashdict, key)
graph.add((
n_hash,
NS_UCO_TYPES.hashValue,
rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary)
))
graph.add(
(
n_hash,
NS_UCO_TYPES.hashValue,
rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary),
)
)

return n_file


def main() -> None:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--base-prefix", default=DEFAULT_PREFIX)
parser.add_argument("--disable-hashes", action="store_true")
parser.add_argument("--disable-mtime", action="store_true")
parser.add_argument("--output-format", help="Override extension-based format guesser.")
parser.add_argument(
"--output-format", help="Override extension-based format guesser."
)
parser.add_argument("out_graph")
parser.add_argument("in_file")
args = parser.parse_args()
Expand All @@ -257,24 +248,23 @@ def main() -> None:
else:
output_format = args.output_format

serialize_kwargs : typing.Dict[str, typing.Any] = {
"format": output_format
}
serialize_kwargs: typing.Dict[str, typing.Any] = {"format": output_format}
if output_format == "json-ld":
context_dictionary = {k:v for (k,v) in graph.namespace_manager.namespaces()}
context_dictionary = {k: v for (k, v) in graph.namespace_manager.namespaces()}
serialize_kwargs["context"] = context_dictionary

node_iri = NS_BASE["file-" + case_utils.local_uuid.local_uuid()]
n_file = create_file_node(
graph,
args.in_file,
node_iri=node_iri,
node_prefix=args.base_prefix,
disable_hashes=args.disable_hashes,
disable_mtime=args.disable_mtime
graph,
args.in_file,
node_iri=node_iri,
node_prefix=args.base_prefix,
disable_hashes=args.disable_hashes,
disable_mtime=args.disable_mtime,
)

graph.serialize(args.out_graph, **serialize_kwargs)


if __name__ == "__main__":
main()
52 changes: 30 additions & 22 deletions case_utils/case_sparql_construct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,39 @@

_logger = logging.getLogger(os.path.basename(__file__))


def main() -> None:
parser = argparse.ArgumentParser()

# Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
logging.basicConfig(level=logging.DEBUG if ("--debug" in sys.argv or "-d" in sys.argv) else logging.INFO)
logging.basicConfig(
level=logging.DEBUG
if ("--debug" in sys.argv or "-d" in sys.argv)
else logging.INFO
)

built_version_choices_list = ["none", "case-" + CURRENT_CASE_VERSION]

parser.add_argument("-d", "--debug", action="store_true")
parser.add_argument(
"-d",
"--debug",
action="store_true"
"--built-version",
choices=tuple(built_version_choices_list),
default="case-" + CURRENT_CASE_VERSION,
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release.",
)
parser.add_argument(
"--built-version",
choices=tuple(built_version_choices_list),
default="case-"+CURRENT_CASE_VERSION,
help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release."
"--disallow-empty-results",
action="store_true",
help="Raise error if no results are returned for query.",
)
parser.add_argument(
"--disallow-empty-results",
action="store_true",
help="Raise error if no results are returned for query."
"--output-format", help="Override extension-based format guesser."
)
parser.add_argument("out_graph")
parser.add_argument(
"--output-format",
help="Override extension-based format guesser."
"in_sparql",
help="File containing a SPARQL CONSTRUCT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.",
)
parser.add_argument("out_graph")
parser.add_argument("in_sparql", help="File containing a SPARQL CONSTRUCT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.")
parser.add_argument("in_graph", nargs="+")
args = parser.parse_args()

Expand All @@ -72,7 +75,7 @@ def main() -> None:
out_graph = rdflib.Graph()

# Inherit prefixes defined in input context dictionary.
nsdict = {k:v for (k,v) in in_graph.namespace_manager.namespaces()}
nsdict = {k: v for (k, v) in in_graph.namespace_manager.namespaces()}
for prefix in sorted(nsdict.keys()):
out_graph.bind(prefix, nsdict[prefix])

Expand All @@ -83,9 +86,13 @@ def main() -> None:
assert not construct_query_text is None

if "subClassOf" in construct_query_text:
case_utils.ontology.load_subclass_hierarchy(in_graph, built_version=args.built_version)
case_utils.ontology.load_subclass_hierarchy(
in_graph, built_version=args.built_version
)

construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict)
construct_query_object = rdflib.plugins.sparql.prepareQuery(
construct_query_text, initNs=nsdict
)

# https://rdfextras.readthedocs.io/en/latest/working_with.html
construct_query_result = in_graph.query(construct_query_object)
Expand All @@ -102,14 +109,15 @@ def main() -> None:
else:
output_format = args.output_format

serialize_kwargs : typing.Dict[str, typing.Any] = {
"format": output_format
}
serialize_kwargs: typing.Dict[str, typing.Any] = {"format": output_format}
if output_format == "json-ld":
context_dictionary = {k:v for (k,v) in out_graph.namespace_manager.namespaces()}
context_dictionary = {
k: v for (k, v) in out_graph.namespace_manager.namespaces()
}
serialize_kwargs["context"] = context_dictionary

out_graph.serialize(args.out_graph, **serialize_kwargs)


if __name__ == "__main__":
main()
Loading