From 1a10c21f1e694b32591338e31693505c99268401 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Wed, 18 Nov 2015 21:43:36 -0500 Subject: [PATCH 01/19] fix: update provenance creation to use released prov library --- nipype/external/provcopy.py | 1952 ------------------------- nipype/info.py | 4 +- nipype/pipeline/utils.py | 21 +- nipype/utils/provenance.py | 77 +- nipype/utils/tests/test_provenance.py | 18 + requirements.txt | 1 + setup.py | 1 + 7 files changed, 62 insertions(+), 2012 deletions(-) delete mode 100644 nipype/external/provcopy.py create mode 100644 nipype/utils/tests/test_provenance.py diff --git a/nipype/external/provcopy.py b/nipype/external/provcopy.py deleted file mode 100644 index 42e250f967..0000000000 --- a/nipype/external/provcopy.py +++ /dev/null @@ -1,1952 +0,0 @@ -'''Python implemetation of the W3C Provenance Data Model (PROV-DM) - -Support for PROV-JSON import/export - -References: - -PROV-DM: http://www.w3.org/TR/prov-dm/ - -@author: Trung Dong Huynh -@copyright: University of Southampton 2013 -''' -from __future__ import absolute_import -from builtins import zip -from builtins import object - -import logging -import datetime -import json -import dateutil.parser -import collections -from collections import defaultdict - -try: - from rdflib.term import URIRef, BNode - from rdflib.term import Literal as RDFLiteral - from rdflib.graph import ConjunctiveGraph, Graph - from rdflib.namespace import RDF, RDFS -except ImportError: - pass - -from copy import deepcopy, copy - -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict -logger = logging.getLogger(__name__) - -from ..external.six import string_types - -# # PROV record constants - PROV-DM LC -# C1. Entities/Activities -PROV_REC_ENTITY = 1 -PROV_REC_ACTIVITY = 2 -PROV_REC_GENERATION = 11 -PROV_REC_USAGE = 12 -PROV_REC_COMMUNICATION = 13 -PROV_REC_START = 14 -PROV_REC_END = 15 -PROV_REC_INVALIDATION = 16 - -# C2. Derivations -PROV_REC_DERIVATION = 21 - -# C3. Agents/Responsibility -PROV_REC_AGENT = 3 -PROV_REC_ATTRIBUTION = 31 -PROV_REC_ASSOCIATION = 32 -PROV_REC_DELEGATION = 33 -PROV_REC_INFLUENCE = 34 -# C4. Bundles -PROV_REC_BUNDLE = 4 # This is the lowest value, so bundle(s) in JSON will be decoded first -# C5. Alternate -PROV_REC_ALTERNATE = 51 -PROV_REC_SPECIALIZATION = 52 -PROV_REC_MENTION = 53 -# C6. Collections -PROV_REC_MEMBERSHIP = 61 - -PROV_RECORD_TYPES = ( - (PROV_REC_ENTITY, u'Entity'), - (PROV_REC_ACTIVITY, u'Activity'), - (PROV_REC_GENERATION, u'Generation'), - (PROV_REC_USAGE, u'Usage'), - (PROV_REC_COMMUNICATION, u'Communication'), - (PROV_REC_START, u'Start'), - (PROV_REC_END, u'End'), - (PROV_REC_INVALIDATION, u'Invalidation'), - (PROV_REC_DERIVATION, u'Derivation'), - (PROV_REC_AGENT, u'Agent'), - (PROV_REC_ATTRIBUTION, u'Attribution'), - (PROV_REC_ASSOCIATION, u'Association'), - (PROV_REC_DELEGATION, u'Delegation'), - (PROV_REC_INFLUENCE, u'Influence'), - (PROV_REC_BUNDLE, u'Bundle'), - (PROV_REC_ALTERNATE, u'Alternate'), - (PROV_REC_SPECIALIZATION, u'Specialization'), - (PROV_REC_MENTION, u'Mention'), - (PROV_REC_MEMBERSHIP, u'Membership'), -) - -PROV_N_MAP = { - PROV_REC_ENTITY: u'entity', - PROV_REC_ACTIVITY: u'activity', - PROV_REC_GENERATION: u'wasGeneratedBy', - PROV_REC_USAGE: u'used', - PROV_REC_COMMUNICATION: u'wasInformedBy', - PROV_REC_START: u'wasStartedBy', - PROV_REC_END: u'wasEndedBy', - PROV_REC_INVALIDATION: u'wasInvalidatedBy', - PROV_REC_DERIVATION: u'wasDerivedFrom', - PROV_REC_AGENT: u'agent', - PROV_REC_ATTRIBUTION: u'wasAttributedTo', - PROV_REC_ASSOCIATION: u'wasAssociatedWith', - PROV_REC_DELEGATION: u'actedOnBehalfOf', - PROV_REC_INFLUENCE: u'wasInfluencedBy', - PROV_REC_ALTERNATE: u'alternateOf', - PROV_REC_SPECIALIZATION: u'specializationOf', - PROV_REC_MENTION: u'mentionOf', - PROV_REC_MEMBERSHIP: u'hadMember', - PROV_REC_BUNDLE: u'bundle', -} - -# # Identifiers for PROV's attributes -PROV_ATTR_ENTITY = 1 -PROV_ATTR_ACTIVITY = 2 -PROV_ATTR_TRIGGER = 3 -PROV_ATTR_INFORMED = 4 -PROV_ATTR_INFORMANT = 5 -PROV_ATTR_STARTER = 6 -PROV_ATTR_ENDER = 7 -PROV_ATTR_AGENT = 8 -PROV_ATTR_PLAN = 9 -PROV_ATTR_DELEGATE = 10 -PROV_ATTR_RESPONSIBLE = 11 -PROV_ATTR_GENERATED_ENTITY = 12 -PROV_ATTR_USED_ENTITY = 13 -PROV_ATTR_GENERATION = 14 -PROV_ATTR_USAGE = 15 -PROV_ATTR_SPECIFIC_ENTITY = 16 -PROV_ATTR_GENERAL_ENTITY = 17 -PROV_ATTR_ALTERNATE1 = 18 -PROV_ATTR_ALTERNATE2 = 19 -PROV_ATTR_BUNDLE = 20 -PROV_ATTR_INFLUENCEE = 21 -PROV_ATTR_INFLUENCER = 22 -PROV_ATTR_COLLECTION = 23 - -# Literal properties -PROV_ATTR_TIME = 100 -PROV_ATTR_STARTTIME = 101 -PROV_ATTR_ENDTIME = 102 - -PROV_RECORD_ATTRIBUTES = ( - # Relations properties - (PROV_ATTR_ENTITY, u'prov:entity'), - (PROV_ATTR_ACTIVITY, u'prov:activity'), - (PROV_ATTR_TRIGGER, u'prov:trigger'), - (PROV_ATTR_INFORMED, u'prov:informed'), - (PROV_ATTR_INFORMANT, u'prov:informant'), - (PROV_ATTR_STARTER, u'prov:starter'), - (PROV_ATTR_ENDER, u'prov:ender'), - (PROV_ATTR_AGENT, u'prov:agent'), - (PROV_ATTR_PLAN, u'prov:plan'), - (PROV_ATTR_DELEGATE, u'prov:delegate'), - (PROV_ATTR_RESPONSIBLE, u'prov:responsible'), - (PROV_ATTR_GENERATED_ENTITY, u'prov:generatedEntity'), - (PROV_ATTR_USED_ENTITY, u'prov:usedEntity'), - (PROV_ATTR_GENERATION, u'prov:generation'), - (PROV_ATTR_USAGE, u'prov:usage'), - (PROV_ATTR_SPECIFIC_ENTITY, u'prov:specificEntity'), - (PROV_ATTR_GENERAL_ENTITY, u'prov:generalEntity'), - (PROV_ATTR_ALTERNATE1, u'prov:alternate1'), - (PROV_ATTR_ALTERNATE2, u'prov:alternate2'), - (PROV_ATTR_BUNDLE, u'prov:bundle'), - (PROV_ATTR_INFLUENCEE, u'prov:influencee'), - (PROV_ATTR_INFLUENCER, u'prov:influencer'), - (PROV_ATTR_COLLECTION, u'prov:collection'), - # Literal properties - (PROV_ATTR_TIME, u'prov:time'), - (PROV_ATTR_STARTTIME, u'prov:startTime'), - (PROV_ATTR_ENDTIME, u'prov:endTime'), -) - -PROV_ATTRIBUTE_LITERALS = set([PROV_ATTR_TIME, PROV_ATTR_STARTTIME, PROV_ATTR_ENDTIME]) - -PROV_RECORD_IDS_MAP = dict((PROV_N_MAP[rec_type_id], rec_type_id) for rec_type_id in PROV_N_MAP) -PROV_ID_ATTRIBUTES_MAP = dict((prov_id, attribute) for (prov_id, attribute) in PROV_RECORD_ATTRIBUTES) -PROV_ATTRIBUTES_ID_MAP = dict((attribute, prov_id) for (prov_id, attribute) in PROV_RECORD_ATTRIBUTES) - - -# Converting an attribute to the normal form for comparison purposes -_normalise_attributes = lambda attr: (str(attr[0]), str(attr[1])) - - -# Datatypes -attr2rdf = lambda attr: PROV[PROV_ID_ATTRIBUTES_MAP[attr].split('prov:')[1]].rdf_representation() - - -def _parse_xsd_dateTime(s): - return dateutil.parser.parse(s) - - -def _ensure_datetime(time): - if isinstance(time, string_types): - return _parse_xsd_dateTime(time) - else: - return time - - -def parse_xsd_dateTime(s): - try: - return _parse_xsd_dateTime(s) - except ValueError: - pass - return None - -DATATYPE_PARSERS = { - datetime.datetime: parse_xsd_dateTime, -} - - -def parse_datatype(value, datatype): - if datatype in DATATYPE_PARSERS: - # found the required parser - return DATATYPE_PARSERS[datatype](value) - else: - # No parser found for the given data type - raise Exception(u'No parser found for the data type <%s>' % str(datatype)) - - -# Mappings for XSD datatypes to Python standard types -XSD_DATATYPE_PARSERS = { - u"xsd:string": str, - u"xsd:double": float, - u"xsd:long": int, - u"xsd:int": int, - u"xsd:boolean": bool, - u"xsd:dateTime": parse_xsd_dateTime, -} - - -def parse_xsd_types(value, datatype): - # if the datatype is a QName, convert it to a Unicode string - datatype = str(datatype) - return XSD_DATATYPE_PARSERS[datatype](value) if datatype in XSD_DATATYPE_PARSERS else None - - -def _ensure_multiline_string_triple_quoted(s): - format_str = u'"""%s"""' if isinstance(s, string_types) and '\n' in s else u'"%s"' - return format_str % s - - -def encoding_PROV_N_value(value): - if isinstance(value, string_types): - return _ensure_multiline_string_triple_quoted(value) - elif isinstance(value, datetime.datetime): - return value.isoformat() - elif isinstance(value, float): - return u'"%f" %%%% xsd:float' % value - else: - return str(value) - - -class AnonymousIDGenerator(object): - def __init__(self): - self._cache = {} - self._count = 0 - - def get_anon_id(self, obj, local_prefix="id"): - if obj not in self._cache: - self._count += 1 - self._cache[obj] = Identifier('_:%s%d' % (local_prefix, self._count)) - return self._cache[obj] - - -class Literal(object): - def __init__(self, value, datatype=None, langtag=None): - self._value = value - if langtag: - if datatype is None: - logger.debug('Assuming prov:InternationalizedString as the type of "%s"@%s' % (value, langtag)) - datatype = PROV["InternationalizedString"] - elif datatype != PROV["InternationalizedString"]: - logger.warn('Invalid data type (%s) for "%s"@%s, overridden as prov:InternationalizedString.' % (value, langtag)) - datatype = PROV["InternationalizedString"] - self._datatype = datatype - self._langtag = langtag - - def __unicode__(self): - return self.provn_representation() - - def __str__(self): - return str(self).encode('utf-8') - - def __eq__(self, other): - return self._value == other._value and self._datatype == other._datatype and self._langtag == other._langtag if isinstance(other, Literal) else False - - def __hash__(self): - return hash((self._value, self._datatype, self._langtag)) - - def get_value(self): - return self._value - - def get_datatype(self): - return self._datatype - - def get_langtag(self): - return self._langtag - - def has_no_langtag(self): - return self._langtag is None - - def provn_representation(self): - if self._langtag: - # a language tag can only go with prov:InternationalizedString - return u'%s@%s' % (_ensure_multiline_string_triple_quoted(self._value), str(self._langtag)) - else: - return u'%s %%%% %s' % (_ensure_multiline_string_triple_quoted(self._value), str(self._datatype)) - - def json_representation(self): - if self._langtag: - # a language tag can only go with prov:InternationalizedString - return {'$': str(self._value), 'lang': self._langtag} - else: - if isinstance(self._datatype, QName): - return {'$': str(self._value), 'type': str(self._datatype)} - else: - # Assuming it is a valid identifier - return {'$': str(self._value), 'type': self._datatype.get_uri()} - - def rdf_representation(self): - if self._langtag: - # a langtag can only goes with string - return RDFLiteral(self._value, lang=str(self._langtag)) - else: - return RDFLiteral(self._value, datatype=self._datatype.get_uri()) - - -class Identifier(object): - def __init__(self, uri): - self._uri = str(uri) # Ensure this is a unicode string - - def get_uri(self): - return self._uri - - def __unicode__(self): - return self._uri - - def __str__(self): - return str(self).encode('utf-8') - - def __eq__(self, other): - return self.get_uri() == other.get_uri() if isinstance(other, Identifier) else False - - def __hash__(self): - return hash(self.get_uri()) - - def provn_representation(self): - return u'"%s" %%%% xsd:anyURI' % self._uri - - def json_representation(self): - return {'$': self._uri, 'type': u'xsd:anyURI'} - - def rdf_representation(self): - return URIRef(self.get_uri()) - - -class QName(Identifier): - def __init__(self, namespace, localpart): - self._namespace = namespace - self._localpart = localpart - self._str = u':'.join([namespace._prefix, localpart]) if namespace._prefix else localpart - - def get_namespace(self): - return self._namespace - - def get_localpart(self): - return self._localpart - - def get_uri(self): - return u''.join([self._namespace._uri, self._localpart]) - - def __unicode__(self): - return self._str - - def __str__(self): - return str(self).encode('utf-8') - - def provn_representation(self): - return u"'%s'" % self._str - - def json_representation(self): - return {'$': self._str, 'type': u'xsd:QName'} - - -class Namespace(object): - def __init__(self, prefix, uri): - self._prefix = prefix - self._uri = uri - self._cache = dict() - - def get_prefix(self): - return self._prefix - - def get_uri(self): - return self._uri - - def contains(self, identifier): - uri = identifier if isinstance(identifier, (str, string_types)) else (identifier.get_uri() if isinstance(identifier, Identifier) else None) - return uri.startswith(self._uri) if uri else False - - def qname(self, identifier): - uri = identifier if isinstance(identifier, (str, string_types)) else (identifier.get_uri() if isinstance(identifier, Identifier) else None) - if uri and uri.startswith(self._uri): - return QName(self, uri[len(self._uri):]) - else: - return None - - def __eq__(self, other): - return (self._uri == other._uri and self._prefix == other._prefix) if isinstance(other, Namespace) else False - - def __hash__(self): - return hash((self._uri, self._prefix)) - - def __getitem__(self, localpart): - if localpart in self._cache: - return self._cache[localpart] - else: - qname = QName(self, localpart) - self._cache[localpart] = qname - return qname - -XSD = Namespace("xsd", 'http://www.w3.org/2001/XMLSchema-datatypes#') -PROV = Namespace("prov", 'http://www.w3.org/ns/prov#') - - -# Exceptions -class ProvException(Exception): - """Base class for exceptions in this module.""" - pass - - -class ProvExceptionMissingRequiredAttribute(ProvException): - def __init__(self, record_type, attribute_id): - self.record_type = record_type - self.attribute_id = attribute_id - self.args += (PROV_N_MAP[record_type], attribute_id) - - def __str__(self): - return 'Missing the required attribute "%s" in %s' % (PROV_ID_ATTRIBUTES_MAP[self.attribute_id], PROV_N_MAP[self.record_type]) - - -class ProvExceptionNotValidAttribute(ProvException): - def __init__(self, record_type, attribute, attribute_types): - self.record_type = record_type - self.attribute = attribute - self.attribute_types = attribute_types - self.args += (PROV_N_MAP[record_type], str(attribute), attribute_types) - - def __str__(self): - return 'Invalid attribute value: %s. %s expected' % (self.attribute, self.attribute_types) - - -class ProvExceptionCannotUnifyAttribute(ProvException): - def __init__(self, identifier, record_type1, record_type2): - self.identifier = identifier - self.record_type1 = record_type1 - self.record_type2 = record_type2 - self.args += (identifier, PROV_N_MAP[record_type1], PROV_N_MAP[record_type2]) - - def __str__(self): - return 'Cannot unify two records of type %s and %s with same identifier (%s)' % (self.identifier, PROV_N_MAP[self.record_type1], PROV_N_MAP[self.record_type2]) - - -class ProvExceptionContraint(ProvException): - def __init__(self, record_type, attribute1, attribute2, msg): - self.record_type = record_type - self.attribute1 = attribute1 - self.attribute2 = attribute2 - self.args += (PROV_N_MAP[record_type], attribute1, attribute2, msg) - self.msg = msg - - -# PROV records -class ProvRecord(object): - """Base class for PROV _records.""" - def __init__(self, bundle, identifier, attributes=None, other_attributes=None, asserted=True, allowed_types=None, infered_for=None): - self._bundle = bundle - self._identifier = identifier - self._asserted = asserted - self._attributes = None - self._extra_attributes = None - if attributes or other_attributes: - self.add_attributes(attributes, other_attributes) - if not asserted: - self._allowed_types = allowed_types - self._infered_for = infered_for - - def get_type(self): - pass - - def get_allowed_types(self): - if self._asserted: - return [self.__class__] - else: - return [self.__class__] + list(self._allowed_types) - - def get_prov_type(self): - pass - - def get_asserted_types(self): - if self._extra_attributes: - prov_type = PROV['type'] - return set([value for attr, value in self._extra_attributes if attr == prov_type]) - return set() - - def add_asserted_type(self, type_identifier): - asserted_types = self.get_asserted_types() - if type_identifier not in asserted_types: - if self._extra_attributes is None: - self._extra_attributes = set() - self._extra_attributes.add((PROV['type'], type_identifier)) - - def get_attribute(self, attr_name): - attr_name = self._bundle.valid_identifier(attr_name) - if not self._extra_attributes: - return [] - results = [value for attr, value in self._extra_attributes if attr == attr_name] - return results - - def get_identifier(self): - return self._identifier - - def get_label(self): - label = None - if self._extra_attributes: - for attribute in self._extra_attributes: - if attribute[0]: - if attribute[0] == PROV['label']: - label = attribute[1] - # use the first label found - break - return label if label else self._identifier - - def get_value(self): - return self.get_attribute(PROV['value']) - - def _auto_literal_conversion(self, literal): - '''This method normalise datatype for literals - ''' - if isinstance(literal, URIRef): - return literal - - if isinstance(literal, string_types): - return str(literal) - - if isinstance(literal, Literal) and literal.has_no_langtag(): - # try convert generic Literal object to Python standard type if possible - # this is to match JSON decoding's literal conversion - value = parse_xsd_types(literal.get_value(), literal.get_datatype()) - if value is not None: - return value - - # No conversion here, return the original value - return literal - - def parse_extra_attributes(self, extra_attributes): - if isinstance(extra_attributes, dict): - # Converting the dictionary into a list of tuples (i.e. attribute-value pairs) - extra_attributes = list(extra_attributes.items()) - attr_set = set((self._bundle.valid_identifier(attribute), - self._auto_literal_conversion(value)) - for attribute, value in extra_attributes) - return attr_set - - def add_extra_attributes(self, extra_attributes): - if extra_attributes: - if self._extra_attributes is None: - self._extra_attributes = set() - # Check attributes for valid qualified names - attr_set = self.parse_extra_attributes(extra_attributes) - self._extra_attributes.update(attr_set) - - def add_attributes(self, attributes, extra_attributes): - if attributes: - if self._attributes is None: - self._attributes = attributes - else: - self._attributes.update(dict((k, v) for k, v in attributes.items() if v is not None)) - self.add_extra_attributes(extra_attributes) - - def get_attributes(self): - return (self._attributes, self._extra_attributes) - - def get_bundle(self): - return self._bundle - - def _parse_identifier(self, value): - try: - return value.get_identifier() - except: - return self._bundle.valid_identifier(value) - - def _parse_record(self, attribute, attribute_types): - # check to see if there is an existing record matching the attribute (as the record's identifier) - existing_record = self._bundle.get_record(attribute) - if existing_record is None: - # try to see if there is a bundle with the id - existing_record = self._bundle.get_bundle(attribute) - if existing_record and isinstance(existing_record, attribute_types): - return existing_record - else: - if hasattr(attribute_types, '__getitem__'): - # it is a list - klass = attribute_types[0] # get the first class - else: - klass = attribute_types # only one class provided - attribute_types = [attribute_types] - if issubclass(klass, ProvRecord): - # Create an inferred record for the id given: - return self._bundle.add_inferred_record(klass, attribute, self, attribute_types) - return None - - def _parse_attribute(self, attribute, attribute_types): - if attribute_types is Identifier: - if isinstance(attribute, ProvRecord): - # This is a record, return its identifier (if any) - return attribute.get_identifier() - # Otherwise, trying to parse the attribute as an identifier - return self._parse_identifier(attribute) - - # putting all the types in to a tuple: - if not isinstance(attribute_types, collections.Iterable): - attribute_types = (attribute_types,) - - # attempt to find an existing record having the same identifier - if any([issubclass(x, ProvRecord) for x in attribute_types]): - record = self._parse_record(attribute, attribute_types) - if record: - return record - # Try to parse it with known datatype parsers - for datatype in attribute_types: - data = parse_datatype(attribute, datatype) - if data is not None: - return data - return None - - def _validate_attribute(self, attribute, attribute_types): - if isinstance(attribute, attribute_types): - # The attribute is of a required type - # Return it - if isinstance(attribute, ProvRecord) and attribute._identifier in self._bundle._id_map: - return self._bundle._id_map[attribute._identifier] - else: - return attribute - else: - # The attribute is not of a valid type - if isinstance(attribute, ProvRecord): - # It is definitely not valid since no further parsing is possible - raise ProvExceptionNotValidAttribute(self.get_type(), attribute, attribute_types) - # Attempt to parse it - parsed_value = self._parse_attribute(attribute, attribute_types) - if parsed_value is None: - raise ProvExceptionNotValidAttribute(self.get_type(), attribute, attribute_types) - return parsed_value - - def required_attribute(self, attributes, attribute_id, attribute_types): - if attribute_id not in attributes: - # Raise an exception about the missing attribute - raise ProvExceptionMissingRequiredAttribute(self.get_type(), attribute_id) - # Found the required attribute - attribute = attributes.get(attribute_id) - return self._validate_attribute(attribute, attribute_types) - - def optional_attribute(self, attributes, attribute_id, attribute_types): - if not attributes or attribute_id not in attributes: - # Because this is optional, return nothing - return None - # Found the optional attribute - attribute = attributes.get(attribute_id) - if attribute is None: - return None - # Validate its type - return self._validate_attribute(attribute, attribute_types) - - def __eq__(self, other): - if self.get_prov_type() != other.get_prov_type(): - return False - if self._identifier and not (self._identifier == other._identifier): - return False - if self._asserted != other._asserted: - return False - if self._attributes and other._attributes: - if len(self._attributes) != len(other._attributes): - return False - for attr, value_a in list(self._attributes.items()): - value_b = other._attributes[attr] - if isinstance(value_a, ProvRecord) and value_a._identifier: - if not (value_a._identifier == value_b._identifier): - return False - elif not (value_a == value_b): - return False - elif other._attributes and not self._attributes: - other_attrs = [(key, value) for key, value in list(other._attributes.items()) if value is not None] - if other_attrs: - # the other's attributes set is not empty. - return False - elif self._attributes and not other._attributes: - my_attrs = [(key, value) for key, value in list(self._attributes.items()) if value is not None] - if my_attrs: - # my attributes set is not empty. - return False - sattr = sorted(self._extra_attributes, key=_normalise_attributes) if self._extra_attributes else None - oattr = sorted(other._extra_attributes, key=_normalise_attributes) if other._extra_attributes else None - if sattr != oattr: - if logger.isEnabledFor(logging.DEBUG): - for spair, opair in zip(sattr, oattr): - # Log the first unequal pair of attributes - if spair != opair: - logger.debug("Equality (ProvRecord): unequal attribute-value pairs - %s = %s - %s = %s", - spair[0], spair[1], opair[0], opair[1]) - break - return False - return True - - def __unicode__(self): - return self.get_provn() - - def __str__(self): - return str(self).encode('utf-8') - - def get_provn(self, _indent_level=0): - items = [] - if self._identifier: - items.append(str(self._identifier)) - if self._attributes: - for (attr, value) in list(self._attributes.items()): - if value is None: - items.append(u'-') - else: - if isinstance(value, ProvRecord): - record_id = value.get_identifier() - items.append(str(record_id)) - else: - # Assuming this is a datetime or QName value - items.append(value.isoformat() if isinstance(value, datetime.datetime) else str(value)) - - if self._extra_attributes: - extra = [] - for (attr, value) in self._extra_attributes: - try: - # try if there is a prov-n representation defined - provn_represenation = value.provn_representation() - except: - provn_represenation = encoding_PROV_N_value(value) - extra.append(u'%s=%s' % (str(attr), provn_represenation)) - if extra: - items.append(u'[%s]' % u', '.join(extra)) - prov_n = u'%s(%s)' % (PROV_N_MAP[self.get_type()], u', '.join(items)) - return prov_n if self._asserted else u'// ' + prov_n - - def rdf(self, graph=None, subj=None): - if graph is None: - graph = Graph() - if subj is None: - # this method need a subject as relations may not have identifiers - return graph - if self._attributes: - for (attr, value) in list(self._attributes.items()): - if value is None: - continue - pred = attr2rdf(attr) - try: - # try if there is a RDF representation defined - obj = value.rdf_representation() - except: - obj = RDFLiteral(value) - graph.add((subj, pred, obj)) - if self._extra_attributes: - for (attr, value) in self._extra_attributes: - try: - # try if there is a RDF representation defined - obj = value.rdf_representation() - except Exception as e: - obj = RDFLiteral(value) - if attr == PROV['location']: - pred = PROV['atLocation'].rdf_representation() - if isinstance(value, (URIRef, QName)): - if isinstance(value, QName): - value = URIRef(value.get_uri()) - graph.add((subj, pred, value)) - graph.add((value, RDF.type, - PROV['Location'].rdf_representation())) - else: - graph.add((subj, pred, obj)) - continue - if attr == PROV['type']: - pred = RDF.type - elif attr == PROV['label']: - pred = RDFS.label - else: - pred = attr.rdf_representation() - graph.add((subj, pred, obj)) - return graph - - def is_asserted(self): - return self._asserted - - def is_element(self): - return False - - def is_relation(self): - return False - - -# Abstract classes for elements and relations -class ProvElement(ProvRecord): - def is_element(self): - return True - - def rdf(self, graph=None): - if graph is None: - graph = Graph() - uri = self.get_identifier().rdf_representation() - type_uri = self.get_prov_type().rdf_representation() - graph.add((uri, RDF.type, type_uri)) - ProvRecord.rdf(self, graph, uri) - return graph - - -class ProvRelation(ProvRecord): - def is_relation(self): - return True - - def rdf(self, graph=None): - if graph is None: - graph = Graph() - pred = PROV[PROV_N_MAP[self.get_type()]].rdf_representation() - items = [] - subj = None - obj = None - for idx, (attr, value) in enumerate(self._attributes.items()): - if idx == 0: - subj = value.get_identifier().rdf_representation() - elif idx == 1: - if value: - obj = value.get_identifier().rdf_representation() - items.append((attr2rdf(attr), obj)) - elif value: - try: - # try if there is a RDF representation defined - otherobj = value.rdf_representation() - except: - otherobj = RDFLiteral(value) - items.append((attr2rdf(attr), otherobj)) - if subj and obj: - graph.add((subj, pred, obj)) - if self._extra_attributes: - for (attr, value) in self._extra_attributes: - if not value: - continue - if attr == PROV['type']: - pred = RDF.type - elif attr == PROV['label']: - pred = RDFS.label - else: - pred = attr.rdf_representation() - try: - # try if there is a RDF representation defined - otherobj = value.rdf_representation() - except: - otherobj = RDFLiteral(value) - items.append((pred, otherobj)) - if obj and len(items) == 1: - items = [] - if items: - QRole = PROV['qualified' + str(self.get_prov_type()).split('prov:')[1]].rdf_representation() - bnode = BNode() - graph.add((subj, QRole, bnode)) - graph.add((bnode, RDF.type, self.get_prov_type().rdf_representation())) - for attr, value in items: - graph.add((bnode, attr, value)) - return graph - -# ## Component 1: Entities and Activities - - -class ProvEntity(ProvElement): - def get_type(self): - return PROV_REC_ENTITY - - def get_prov_type(self): - return PROV['Entity'] - - -class ProvActivity(ProvElement): - def get_type(self): - return PROV_REC_ACTIVITY - - def get_prov_type(self): - return PROV['Activity'] - - def add_attributes(self, attributes, extra_attributes): - startTime = self.optional_attribute(attributes, PROV_ATTR_STARTTIME, datetime.datetime) - endTime = self.optional_attribute(attributes, PROV_ATTR_ENDTIME, datetime.datetime) - if startTime and endTime and startTime > endTime: - raise ValueError('StartTime %s > EndTime %s' % (startTime, endTime)) - attributes = OrderedDict() - attributes[PROV_ATTR_STARTTIME] = startTime - attributes[PROV_ATTR_ENDTIME] = endTime - - ProvElement.add_attributes(self, attributes, extra_attributes) - - # Convenient methods - def set_time(self, startTime=None, endTime=None): - # The _attributes dict should have been initialised - if startTime is not None: - self._attributes[PROV_ATTR_STARTTIME] = startTime - if endTime is not None: - self._attributes[PROV_ATTR_ENDTIME] = endTime - - def get_startTime(self): - return self._attributes[PROV_ATTR_STARTTIME] - - def get_endTime(self): - return self._attributes[PROV_ATTR_ENDTIME] - - -class ProvGeneration(ProvRelation): - def get_type(self): - return PROV_REC_GENERATION - - def get_prov_type(self): - return PROV['Generation'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - entity = self.required_attribute(attributes, PROV_ATTR_ENTITY, (ProvEntity, ProvAgent)) - # Optional attributes - activity = self.optional_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - time = self.optional_attribute(attributes, PROV_ATTR_TIME, datetime.datetime) - - attributes = OrderedDict() - attributes[PROV_ATTR_ENTITY] = entity - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_TIME] = time - - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvUsage(ProvRelation): - def get_type(self): - return PROV_REC_USAGE - - def get_prov_type(self): - return PROV['Usage'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - activity = self.required_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - # Optional attributes - entity = self.optional_attribute(attributes, PROV_ATTR_ENTITY, (ProvEntity, ProvAgent)) - time = self.optional_attribute(attributes, PROV_ATTR_TIME, datetime.datetime) - - attributes = OrderedDict() - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_ENTITY] = entity - attributes[PROV_ATTR_TIME] = time - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvCommunication(ProvRelation): - def get_type(self): - return PROV_REC_COMMUNICATION - - def get_prov_type(self): - return PROV['Communication'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - informed = self.required_attribute(attributes, PROV_ATTR_INFORMED, ProvActivity) - informant = self.required_attribute(attributes, PROV_ATTR_INFORMANT, ProvActivity) - - attributes = OrderedDict() - attributes[PROV_ATTR_INFORMED] = informed - attributes[PROV_ATTR_INFORMANT] = informant - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvStart(ProvRelation): - def get_type(self): - return PROV_REC_START - - def get_prov_type(self): - return PROV['Start'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - activity = self.required_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - # Optional attributes - trigger = self.optional_attribute(attributes, PROV_ATTR_TRIGGER, (ProvEntity, ProvAgent)) - starter = self.optional_attribute(attributes, PROV_ATTR_STARTER, ProvActivity) - time = self.optional_attribute(attributes, PROV_ATTR_TIME, datetime.datetime) - - attributes = OrderedDict() - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_TRIGGER] = trigger - attributes[PROV_ATTR_STARTER] = starter - attributes[PROV_ATTR_TIME] = time - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvEnd(ProvRelation): - def get_type(self): - return PROV_REC_END - - def get_prov_type(self): - return PROV['End'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - activity = self.required_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - # Optional attributes - trigger = self.optional_attribute(attributes, PROV_ATTR_TRIGGER, (ProvEntity, ProvAgent)) - ender = self.optional_attribute(attributes, PROV_ATTR_ENDER, ProvActivity) - time = self.optional_attribute(attributes, PROV_ATTR_TIME, datetime.datetime) - - attributes = OrderedDict() - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_TRIGGER] = trigger - attributes[PROV_ATTR_ENDER] = ender - attributes[PROV_ATTR_TIME] = time - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvInvalidation(ProvRelation): - def get_type(self): - return PROV_REC_INVALIDATION - - def get_prov_type(self): - return PROV['Invalidation'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - entity = self.required_attribute(attributes, PROV_ATTR_ENTITY, (ProvEntity, ProvAgent)) - # Optional attributes - activity = self.optional_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - time = self.optional_attribute(attributes, PROV_ATTR_TIME, datetime.datetime) - - attributes = OrderedDict() - attributes[PROV_ATTR_ENTITY] = entity - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_TIME] = time - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -# ## Component 2: Derivations - -class ProvDerivation(ProvRelation): - def get_type(self): - return PROV_REC_DERIVATION - - def get_prov_type(self): - return PROV['Derivation'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - generatedEntity = self.required_attribute(attributes, PROV_ATTR_GENERATED_ENTITY, (ProvEntity, ProvAgent)) - usedEntity = self.required_attribute(attributes, PROV_ATTR_USED_ENTITY, (ProvEntity, ProvAgent)) - # Optional attributes - activity = self.optional_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - generation = self.optional_attribute(attributes, PROV_ATTR_GENERATION, ProvGeneration) - usage = self.optional_attribute(attributes, PROV_ATTR_USAGE, ProvUsage) - - attributes = OrderedDict() - attributes[PROV_ATTR_GENERATED_ENTITY] = generatedEntity - attributes[PROV_ATTR_USED_ENTITY] = usedEntity - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_GENERATION] = generation - attributes[PROV_ATTR_USAGE] = usage - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -# ## Component 3: Agents, Responsibility, and Influence - -class ProvAgent(ProvElement): - def get_type(self): - return PROV_REC_AGENT - - def get_prov_type(self): - return PROV['Agent'] - - -class ProvAttribution(ProvRelation): - def get_type(self): - return PROV_REC_ATTRIBUTION - - def get_prov_type(self): - return PROV['Attribution'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - entity = self.required_attribute(attributes, PROV_ATTR_ENTITY, (ProvEntity, ProvAgent)) - agent = self.required_attribute(attributes, PROV_ATTR_AGENT, (ProvAgent, ProvEntity)) - - attributes = OrderedDict() - attributes[PROV_ATTR_ENTITY] = entity - attributes[PROV_ATTR_AGENT] = agent - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvAssociation(ProvRelation): - def get_type(self): - return PROV_REC_ASSOCIATION - - def get_prov_type(self): - return PROV['Association'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - activity = self.required_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - # Optional attributes - agent = self.optional_attribute(attributes, PROV_ATTR_AGENT, (ProvAgent, ProvEntity)) - plan = self.optional_attribute(attributes, PROV_ATTR_PLAN, (ProvEntity, ProvAgent)) - - attributes = OrderedDict() - attributes[PROV_ATTR_ACTIVITY] = activity - attributes[PROV_ATTR_AGENT] = agent - attributes[PROV_ATTR_PLAN] = plan - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvDelegation(ProvRelation): - def get_type(self): - return PROV_REC_DELEGATION - - def get_prov_type(self): - return PROV['Delegation'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - delegate = self.required_attribute(attributes, PROV_ATTR_DELEGATE, (ProvAgent, ProvEntity)) - responsible = self.required_attribute(attributes, PROV_ATTR_RESPONSIBLE, (ProvAgent, ProvEntity)) - # Optional attributes - activity = self.optional_attribute(attributes, PROV_ATTR_ACTIVITY, ProvActivity) - - attributes = OrderedDict() - attributes[PROV_ATTR_DELEGATE] = delegate - attributes[PROV_ATTR_RESPONSIBLE] = responsible - attributes[PROV_ATTR_ACTIVITY] = activity - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvInfluence(ProvRelation): - def get_type(self): - return PROV_REC_INFLUENCE - - def get_prov_type(self): - return PROV['Influence'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - influencee = self.required_attribute(attributes, PROV_ATTR_INFLUENCEE, (ProvEntity, ProvActivity, ProvAgent)) - influencer = self.required_attribute(attributes, PROV_ATTR_INFLUENCER, (ProvAgent, ProvEntity, ProvActivity)) - - attributes = OrderedDict() - attributes[PROV_ATTR_INFLUENCEE] = influencee - attributes[PROV_ATTR_INFLUENCER] = influencer - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -# ## Component 4: Bundles - -# See below - -# ## Component 5: Alternate Entities - -class ProvSpecialization(ProvRelation): - def get_type(self): - return PROV_REC_SPECIALIZATION - - def get_prov_type(self): - return PROV['Specialization'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - specificEntity = self.required_attribute(attributes, PROV_ATTR_SPECIFIC_ENTITY, (ProvEntity, ProvAgent)) - generalEntity = self.required_attribute(attributes, PROV_ATTR_GENERAL_ENTITY, (ProvEntity, ProvAgent)) - - attributes = OrderedDict() - attributes[PROV_ATTR_SPECIFIC_ENTITY] = specificEntity - attributes[PROV_ATTR_GENERAL_ENTITY] = generalEntity - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvAlternate(ProvRelation): - def get_type(self): - return PROV_REC_ALTERNATE - - def get_prov_type(self): - return PROV['Alternate'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - alternate1 = self.required_attribute(attributes, PROV_ATTR_ALTERNATE1, (ProvEntity, ProvAgent)) - alternate2 = self.required_attribute(attributes, PROV_ATTR_ALTERNATE2, (ProvEntity, ProvAgent)) - - attributes = OrderedDict() - attributes[PROV_ATTR_ALTERNATE1] = alternate1 - attributes[PROV_ATTR_ALTERNATE2] = alternate2 - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -class ProvMention(ProvSpecialization): - def get_type(self): - return PROV_REC_MENTION - - def get_prov_type(self): - return PROV['Mention'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - specificEntity = self.required_attribute(attributes, PROV_ATTR_SPECIFIC_ENTITY, (ProvEntity, ProvAgent)) - generalEntity = self.required_attribute(attributes, PROV_ATTR_GENERAL_ENTITY, Identifier) - bundle = self.required_attribute(attributes, PROV_ATTR_BUNDLE, Identifier) - # ======================================================================= - # # This is disabled so that mentionOf can refer to bundle that is not defined in the same place - # bundle = self.required_attribute(attributes, PROV_ATTR_BUNDLE, ProvBundle) - # # Check if generalEntity is in the bundle - # if generalEntity.get_bundle() is not bundle: - # raise ProvExceptionContraint(PROV_REC_MENTION, generalEntity, bundle, 'The generalEntity must belong to the bundle') - # ======================================================================= - - attributes = OrderedDict() - attributes[PROV_ATTR_SPECIFIC_ENTITY] = specificEntity - attributes[PROV_ATTR_GENERAL_ENTITY] = generalEntity - attributes[PROV_ATTR_BUNDLE] = bundle - ProvRelation.add_attributes(self, attributes, extra_attributes) - - -# ## Component 6: Collections - -class ProvMembership(ProvRelation): - def get_type(self): - return PROV_REC_MEMBERSHIP - - def get_prov_type(self): - return PROV['Membership'] - - def add_attributes(self, attributes, extra_attributes): - # Required attributes - collection = self.required_attribute(attributes, PROV_ATTR_COLLECTION, (ProvEntity, ProvAgent)) - entity = self.required_attribute(attributes, PROV_ATTR_ENTITY, (ProvEntity, ProvAgent)) - - attributes = OrderedDict() - attributes[PROV_ATTR_COLLECTION] = collection - attributes[PROV_ATTR_ENTITY] = entity - ProvRelation.add_attributes(self, attributes, extra_attributes) - -# Class mappings from PROV record type -PROV_REC_CLS = { - PROV_REC_ENTITY: ProvEntity, - PROV_REC_ACTIVITY: ProvActivity, - PROV_REC_GENERATION: ProvGeneration, - PROV_REC_USAGE: ProvUsage, - PROV_REC_COMMUNICATION: ProvCommunication, - PROV_REC_START: ProvStart, - PROV_REC_END: ProvEnd, - PROV_REC_INVALIDATION: ProvInvalidation, - PROV_REC_DERIVATION: ProvDerivation, - PROV_REC_AGENT: ProvAgent, - PROV_REC_ATTRIBUTION: ProvAttribution, - PROV_REC_ASSOCIATION: ProvAssociation, - PROV_REC_DELEGATION: ProvDelegation, - PROV_REC_INFLUENCE: ProvInfluence, - PROV_REC_SPECIALIZATION: ProvSpecialization, - PROV_REC_ALTERNATE: ProvAlternate, - PROV_REC_MENTION: ProvMention, - PROV_REC_MEMBERSHIP: ProvMembership, -} - - -# Bundle -class NamespaceManager(dict): - def __init__(self, namespaces={}, default_namespaces={PROV.get_prefix(): PROV, XSD.get_prefix(): XSD}, default=None, parent=None): - self._default_namespaces = {} - self._default_namespaces.update(default_namespaces) - self.update(self._default_namespaces) - self._namespaces = {} - - if default is not None: - self.set_default_namespace(default) - else: - self._default = None - self.parent = parent - # TODO check if default is in the default namespaces - self._anon_id_count = 0 - self._rename_map = {} - self.add_namespaces(namespaces) - - def get_namespace(self, uri): - for namespace in list(self.values()): - if uri == namespace._uri: - return namespace - return None - - def get_registered_namespaces(self): - return list(self._namespaces.values()) - - def set_default_namespace(self, uri): - self._default = Namespace('', uri) - self[''] = self._default - - def get_default_namespace(self): - return self._default - - def add_namespace(self, namespace): - if namespace in list(self.values()): - # no need to do anything - return - if namespace in self._rename_map: - # already renamed and added - return - - prefix = namespace.get_prefix() - if prefix in self: - # Conflicting prefix - new_prefix = self._get_unused_prefix(prefix) - new_namespace = Namespace(new_prefix, namespace.get_uri()) - self._rename_map[namespace] = new_namespace - prefix = new_prefix - namespace = new_namespace - self._namespaces[prefix] = namespace - self[prefix] = namespace - return namespace - - def add_namespaces(self, namespaces): - if namespaces: - for prefix, uri in list(namespaces.items()): - ns = Namespace(prefix, uri) - self.add_namespace(ns) - - def get_valid_identifier(self, identifier): - if not identifier: - return None - if isinstance(identifier, Identifier): - if isinstance(identifier, QName): - # Register the namespace if it has not been registered before - namespace = identifier._namespace - prefix = namespace.get_prefix() - if prefix in self and self[prefix] == namespace: - # No need to add the namespace - existing_ns = self[prefix] - if existing_ns is namespace: - return identifier - else: - return existing_ns[identifier._localpart] # reuse the existing namespace - else: - ns = self.add_namespace(deepcopy(namespace)) # Do not reuse the namespace object - return ns[identifier._localpart] - else: - # return the original identifier - return identifier - elif isinstance(identifier, (str, string_types)): - if identifier.startswith('_:'): - return None - elif ':' in identifier: - # check if the identifier contains a registered prefix - prefix, local_part = identifier.split(':', 1) - if prefix in self: - # return a new QName - return self[prefix][local_part] - else: - # treat as a URI (with the first part as its scheme) - # check if the URI can be compacted - for namespace in list(self.values()): - if identifier.startswith(namespace.get_uri()): - # create a QName with the namespace - return namespace[identifier.replace(namespace.get_uri(), '')] - if self.parent is not None: - # try the parent namespace manager - return self.parent.get_valid_identifier(identifier) - else: - # return an Identifier with the given URI - return Identifier(identifier) - elif self._default: - # create and return an identifier in the default namespace - return self._default[identifier] - else: - # This is not an identifier - return None - - def get_anonymous_identifier(self, local_prefix='id'): - self._anon_id_count += 1 - return Identifier('_:%s%d' % (local_prefix, self._anon_id_count)) - - def _get_unused_prefix(self, original_prefix): - if original_prefix not in self: - return original_prefix - count = 1 - while True: - new_prefix = '_'.join((original_prefix, str(count))) - if new_prefix in self: - count += 1 - else: - return new_prefix - - -class ProvBundle(ProvEntity): - def __init__(self, bundle=None, identifier=None, attributes=None, other_attributes=None, asserted=True, namespaces={}): - # Initializing bundle-specific attributes - self._records = list() - self._id_map = dict() - self._bundles = dict() - self._namespaces = NamespaceManager(namespaces, parent=(bundle._namespaces if bundle is not None else None)) - - # Initializing record-specific attributes - super(ProvBundle, self).__init__(bundle, identifier, attributes, other_attributes, asserted) - - # Bundle configurations - def set_default_namespace(self, uri): - self._namespaces.set_default_namespace(uri) - - def get_default_namespace(self): - return self._namespaces.get_default_namespace() - - def add_namespace(self, namespace_or_prefix, uri=None): - if uri is None: - self._namespaces.add_namespace(namespace_or_prefix) - else: - self._namespaces.add_namespace(Namespace(namespace_or_prefix, uri)) - - def get_registered_namespaces(self): - return self._namespaces.get_registered_namespaces() - - def valid_identifier(self, identifier): - return self._namespaces.get_valid_identifier(identifier) - - def get_anon_id(self, record): - # TODO Implement a dict of self-generated anon ids for records without identifier - return self._namespaces.get_anonymous_identifier() - - def get_records(self, class_or_type_or_tuple=None): - # Only returning asserted records - results = [rec for rec in self._records if rec.is_asserted()] - if class_or_type_or_tuple: - return [rec for rec in results if isinstance(rec, class_or_type_or_tuple)] - else: - return results - - def get_record(self, identifier): - if identifier is None: - return None - valid_id = self.valid_identifier(identifier) - try: - return self._id_map[valid_id] - except: - # looking up the parent bundle - if self._bundle is not None: - return self._bundle.get_record(valid_id) - else: - return None - - def get_bundle(self, identifier): - try: - valid_id = self.valid_identifier(identifier) - return self._bundles[valid_id] - except: - # looking up the parent bundle - if self._bundle is not None: - return self._bundle.get_bundle(valid_id) - else: - return None - - # PROV-JSON serialization/deserialization - class JSONEncoder(json.JSONEncoder): - def default(self, o): - if isinstance(o, ProvBundle): - return o._encode_JSON_container() - else: - # Use the default encoder instead - return json.JSONEncoder.default(self, o) - - class JSONDecoder(json.JSONDecoder): - def decode(self, s): - json_container = json.JSONDecoder.decode(self, s) - result = ProvBundle() - result._decode_JSON_container(json_container) - return result - - def _encode_json_representation(self, value): - try: - return value.json_representation() - except AttributeError: - if isinstance(value, datetime.datetime): - return {'$': value.isoformat(), 'type': u'xsd:dateTime'} - else: - return value - - def _decode_json_representation(self, literal): - if isinstance(literal, dict): - # complex type - value = literal['$'] - datatype = literal['type'] if 'type' in literal else None - langtag = literal['lang'] if 'lang' in literal else None - if datatype == u'xsd:anyURI': - return Identifier(value) - elif datatype == u'xsd:QName': - return self.valid_identifier(value) - else: - # The literal of standard Python types is not converted here - # It will be automatically converted when added to a record by _auto_literal_conversion() - return Literal(value, self.valid_identifier(datatype), langtag) - else: - # simple type, just return it - return literal - - def _encode_JSON_container(self): - container = defaultdict(dict) - - if self._bundle is None: # This is a document - prefixes = {} - for namespace in self._namespaces.get_registered_namespaces(): - prefixes[namespace.get_prefix()] = namespace.get_uri() - if self._namespaces._default: - prefixes['default'] = self._namespaces._default.get_uri() - if prefixes: - container[u'prefix'] = prefixes - - id_generator = AnonymousIDGenerator() - real_or_anon_id = lambda record: record._identifier if record._identifier else id_generator.get_anon_id(record) - - for record in self._records: - if not record.is_asserted(): - continue # skipping inferred records - - rec_type = record.get_type() - rec_label = PROV_N_MAP[rec_type] - identifier = str(real_or_anon_id(record)) - - if rec_type == PROV_REC_BUNDLE: - # encoding the sub-bundle - record_json = record._encode_JSON_container() - else: - record_json = {} - if record._attributes: - for (attr, value) in list(record._attributes.items()): - if isinstance(value, ProvRecord): - attr_record_id = real_or_anon_id(value) - record_json[PROV_ID_ATTRIBUTES_MAP[attr]] = str(attr_record_id) - elif value is not None: - # Assuming this is a datetime value - record_json[PROV_ID_ATTRIBUTES_MAP[attr]] = value.isoformat() if isinstance(value, datetime.datetime) else str(value) - if record._extra_attributes: - for (attr, value) in record._extra_attributes: - attr_id = str(attr) - value_json = self._encode_json_representation(value) - if attr_id in record_json: - # Multi-value attribute - existing_value = record_json[attr_id] - try: - # Add the value to the current list of values - existing_value.append(value_json) - except: - # But if the existing value is not a list, it'll fail - # create the list for the existing value and the second value - record_json[attr_id] = [existing_value, value_json] - else: - record_json[attr_id] = value_json - container[rec_label][identifier] = record_json - - return container - - def _decode_JSON_container(self, jc): - if u'prefix' in jc: - prefixes = jc[u'prefix'] - for prefix, uri in list(prefixes.items()): - if prefix != 'default': - self.add_namespace(Namespace(prefix, uri)) - else: - self.set_default_namespace(uri) - records = sorted([(PROV_RECORD_IDS_MAP[rec_type], rec_id, jc[rec_type][rec_id]) - for rec_type in jc if rec_type != u'prefix' - for rec_id in jc[rec_type]], - key=lambda tuple_rec: tuple_rec[0]) - - record_map = {} - _parse_attr_value = lambda value: record_map[value] if (isinstance(value, string_types) and value in record_map) else self._decode_json_representation(value) - # Create all the records before setting their attributes - for (record_type, identifier, content) in records: - if record_type == PROV_REC_BUNDLE: - bundle = self.bundle(identifier) - bundle._decode_JSON_container(content) - else: - record_map[identifier] = self.add_record(record_type, identifier, None, None) - for (record_type, identifier, attributes) in records: - if record_type != PROV_REC_BUNDLE: - record = record_map[identifier] - - if hasattr(attributes, 'items'): # it is a dict - # There is only one element, create a singleton list - elements = [attributes] - else: - # expect it to be a list of dictionaries - elements = attributes - - for element in elements: - prov_attributes = {} - extra_attributes = [] - # Splitting PROV attributes and the others - membership_extra_members = None # this is for the multiple-entity membership hack to come - for attr, value in list(element.items()): - if attr in PROV_ATTRIBUTES_ID_MAP: - attr_id = PROV_ATTRIBUTES_ID_MAP[attr] - if isinstance(value, list): - # Multiple values - if len(value) == 1: - # Only a single value in the list, unpack it - value = value[0] - else: - if record.get_type() == PROV_REC_MEMBERSHIP and attr_id == PROV_ATTR_ENTITY: - # This is a membership relation with multiple entities - # HACK: create multiple membership relations, one for each entity - membership_extra_members = value[1:] # Store all the extra entities - value = value[0] # Create the first membership relation as normal for the first entity - else: - error_msg = 'The prov package does not support PROV attributes having multiple values.' - logger.error(error_msg) - raise ProvException(error_msg) - prov_attributes[attr_id] = _parse_attr_value(value) - else: - attr_id = self.valid_identifier(attr) - if isinstance(value, list): - # Parsing multi-value attribute - extra_attributes.extend((attr_id, self._decode_json_representation(value_single)) for value_single in value) - else: - # add the single-value attribute - extra_attributes.append((attr_id, self._decode_json_representation(value))) - record.add_attributes(prov_attributes, extra_attributes) - # HACK: creating extra (unidentified) membership relations - if membership_extra_members: - collection = prov_attributes[PROV_ATTR_COLLECTION] - for member in membership_extra_members: - self.membership(collection, _parse_attr_value(member), None, extra_attributes) - - # Miscellaneous functions - def is_document(self): - return self._bundle is None - - def is_bundle(self): - return self._bundle is not None - - def get_type(self): - return PROV_REC_BUNDLE - - def get_provn(self, _indent_level=0, asserted_only=True): - indentation = '' + (' ' * _indent_level) - newline = '\n' + (' ' * (_indent_level + 1)) - - # if this is the document, start the document; otherwise, start the bundle - records = ['document'] if self._bundle is None else ['bundle %s' % self._identifier] - - if self._bundle is None: - # Only output the namespaces of a document - default_namespace = self._namespaces.get_default_namespace() - if default_namespace: - records.append('default <%s>' % default_namespace.get_uri()) - - registered_namespaces = self._namespaces.get_registered_namespaces() - if registered_namespaces: - records.extend(['prefix %s <%s>' % (namespace.get_prefix(), namespace.get_uri()) for namespace in registered_namespaces]) - - if default_namespace or registered_namespaces: - # a blank line between the prefixes and the assertions - records.append('') - - # adding all the records - records.extend([record.get_provn(_indent_level + 1) for record in self._records if record.is_asserted() or not asserted_only]) - provn_str = newline.join(records) + '\n' - # closing the structure - provn_str += indentation + ('endDocument' if self._bundle is None else 'endBundle') - return provn_str - - def rdf(self, graph=None): - if self._bundle is None: - # top bundle - if graph is None: - graph = ConjunctiveGraph() - else: - # graph should not None here - uri = self.get_identifier().rdf_representation() - graph = Graph(graph.store, uri) - - for prefix, namespace in list(self._namespaces.items()): - graph.bind(prefix, namespace.get_uri()) - - for record in self._records: - if record.is_asserted(): - record.rdf(graph) - return graph - - def get_provjson(self, **kw): - """Return the `PROV-JSON `_ representation for the bundle/document. - - Parameters for `json.dumps `_ like `indent=4` can be also passed as keyword arguments. - """ - # Prevent overwriting the encoder class - if 'cls' in kw: - del kw['cls'] - json_content = json.dumps(self, cls=ProvBundle.JSONEncoder, **kw) - return json_content - - @staticmethod - def from_provjson(json_content, **kw): - """Construct the bundle/document from the given `PROV-JSON `_ representation. - - Parameters for `json.loads `_ can be also passed as keyword arguments. - """ # Prevent overwriting the decoder class - if 'cls' in kw: - del kw['cls'] - return json.loads(json_content, cls=ProvBundle.JSONDecoder, **kw) - - def get_flattened(self): - namespaces = dict((ns.get_prefix(), ns.get_uri()) for ns in self.get_registered_namespaces()) - document = ProvBundle(namespaces=namespaces) - default_ns_uri = self.get_default_namespace() - if default_ns_uri is not None: - document.set_default_namespace(default_ns_uri) - # Enumerate records and bundles - bundles = [] - records = [] - for record in self.get_records(): - if isinstance(record, ProvBundle): - bundles.append(record) - else: - records.append(record) - records = deepcopy(records) - for record in records: - document._add_record(record) - for bundle in bundles: - for record in bundle._records: - document.add_record(record.get_type(), copy(record._identifier), - deepcopy(record._attributes), deepcopy(record._extra_attributes), - record._asserted) - return document - - def __eq__(self, other): - if not isinstance(other, ProvBundle): - return False - other_records = set(other.get_records()) - this_records = set(self.get_records()) - if len(this_records) != len(other_records): - return False - # check if all records for equality - for record_a in this_records: - if record_a._identifier: - if record_a.get_type() == PROV_REC_BUNDLE: - record_b = other.get_bundle(record_a._identifier) - else: - record_b = other.get_record(record_a._identifier) - if record_b: - if record_a == record_b: - other_records.remove(record_b) - continue - else: - logger.debug("Equality (ProvBundle): Unequal PROV records:") - logger.debug("%s", str(record_a)) - logger.debug("%s", str(record_b)) - return False - else: - logger.debug("Equality (ProvBundle): Could not find a record with this identifier: %s", str(record_a._identifier)) - return False - else: - # Manually look for the record - found = False - for record_b in other_records: - if record_a == record_b: - other_records.remove(record_b) - found = True - break - if not found: - logger.debug("Equality (ProvBundle): Could not find this record: %s", str(record_a)) - return False - return True - - # Provenance statements - def _add_record(self, record): - if record._identifier: - if record.get_type() == PROV_REC_BUNDLE: - # Don't mix bunle ids with normal record ids. - self._bundles[record._identifier] = record - self._records.append(record) - else: - if record._identifier in self._id_map: - merge_target = self._id_map[record._identifier] - - if not merge_target._asserted and record._asserted: - if record.__class__ in merge_target.get_allowed_types(): - for attribute_id, attribute in merge_target._infered_for._attributes.items(): - if attribute == merge_target: - merge_target._infered_for._attributes[attribute_id] = record - self._records.remove(merge_target) - self._id_map[record._identifier] = record - self._records.append(record) - else: - raise ProvExceptionCannotUnifyAttribute(record._identifier, merge_target.get_type(), record.get_type()) - else: - if record.get_type() != merge_target.get_type(): - raise ProvExceptionCannotUnifyAttribute(record._identifier, merge_target.get_type(), record.get_type()) - merge_target.add_attributes(record._attributes, record._extra_attributes) - else: - self._records.append(record) - self._id_map[record._identifier] = record - else: - self._records.append(record) - - def add_record(self, record_type, identifier, attributes=None, other_attributes=None, asserted=True): - new_record = PROV_REC_CLS[record_type](self, self.valid_identifier(identifier), attributes, other_attributes, asserted) - self._add_record(new_record) - return new_record - - def add_inferred_record(self, record_cls, identifier, infered_for, allowed_types): - record_id = self.valid_identifier(identifier) - record = record_cls(self, record_id, asserted=False, allowed_types=allowed_types, infered_for=infered_for) - self._add_record(record) - return record - - def add_bundle(self, bundle, identifier=None): - '''Add a bundle to the current document - ''' - - if identifier is None: - identifier = bundle.get_identifier() - - if not identifier: - raise ProvException(u"The added bundle has no identifier") - - valid_id = self.valid_identifier(identifier) - bundle._identifier = valid_id - - if valid_id in self._bundles: - raise ProvException(u"A bundle with that identifier already exists") - - if len(bundle._bundles) > 0: - raise ProvException(u"A bundle may not contain bundles") - - self._bundles[valid_id] = bundle - self._records.append(bundle) - - for namespace in bundle.get_registered_namespaces(): - self.add_namespace(namespace) - - bundle._bundle = self - - def add_element(self, record_type, identifier, attributes=None, other_attributes=None): - return self.add_record(record_type, identifier, attributes, other_attributes) - - def entity(self, identifier, other_attributes=None): - return self.add_element(PROV_REC_ENTITY, identifier, None, other_attributes) - - def activity(self, identifier, startTime=None, endTime=None, other_attributes=None): - return self.add_element(PROV_REC_ACTIVITY, identifier, {PROV_ATTR_STARTTIME: _ensure_datetime(startTime), PROV_ATTR_ENDTIME: _ensure_datetime(endTime)}, other_attributes) - - def generation(self, entity, activity=None, time=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_GENERATION, identifier, {PROV_ATTR_ENTITY: entity, PROV_ATTR_ACTIVITY: activity, PROV_ATTR_TIME: _ensure_datetime(time)}, other_attributes) - - def usage(self, activity, entity=None, time=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_USAGE, identifier, {PROV_ATTR_ACTIVITY: activity, PROV_ATTR_ENTITY: entity, PROV_ATTR_TIME: _ensure_datetime(time)}, other_attributes) - - def start(self, activity, trigger=None, starter=None, time=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_START, identifier, {PROV_ATTR_ACTIVITY: activity, PROV_ATTR_TRIGGER: trigger, PROV_ATTR_STARTER: starter, PROV_ATTR_TIME: _ensure_datetime(time)}, other_attributes) - - def end(self, activity, trigger=None, ender=None, time=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_END, identifier, {PROV_ATTR_ACTIVITY: activity, PROV_ATTR_TRIGGER: trigger, PROV_ATTR_ENDER: ender, PROV_ATTR_TIME: _ensure_datetime(time)}, other_attributes) - - def invalidation(self, entity, activity=None, time=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_INVALIDATION, identifier, {PROV_ATTR_ENTITY: entity, PROV_ATTR_ACTIVITY: activity, PROV_ATTR_TIME: _ensure_datetime(time)}, other_attributes) - - def communication(self, informed, informant, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_COMMUNICATION, identifier, {PROV_ATTR_INFORMED: informed, PROV_ATTR_INFORMANT: informant}, other_attributes) - - def agent(self, identifier, other_attributes=None): - return self.add_element(PROV_REC_AGENT, identifier, None, other_attributes) - - def attribution(self, entity, agent, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_ATTRIBUTION, identifier, {PROV_ATTR_ENTITY: entity, PROV_ATTR_AGENT: agent}, other_attributes) - - def association(self, activity, agent=None, plan=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_ASSOCIATION, identifier, {PROV_ATTR_ACTIVITY: activity, PROV_ATTR_AGENT: agent, PROV_ATTR_PLAN: plan}, other_attributes) - - def delegation(self, delegate, responsible, activity=None, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_DELEGATION, identifier, {PROV_ATTR_DELEGATE: delegate, PROV_ATTR_RESPONSIBLE: responsible, PROV_ATTR_ACTIVITY: activity}, other_attributes) - - def influence(self, influencee, influencer, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_INFLUENCE, identifier, {PROV_ATTR_INFLUENCEE: influencee, PROV_ATTR_INFLUENCER: influencer}, other_attributes) - - def derivation(self, generatedEntity, usedEntity, activity=None, generation=None, usage=None, time=None, identifier=None, other_attributes=None): - attributes = {PROV_ATTR_GENERATED_ENTITY: generatedEntity, - PROV_ATTR_USED_ENTITY: usedEntity, - PROV_ATTR_ACTIVITY: activity, - PROV_ATTR_GENERATION: generation, - PROV_ATTR_USAGE: usage} - return self.add_record(PROV_REC_DERIVATION, identifier, attributes, other_attributes) - - def revision(self, generatedEntity, usedEntity, activity=None, generation=None, usage=None, time=None, identifier=None, other_attributes=None): - record = self.derivation(generatedEntity, usedEntity, activity, generation, usage, time, identifier, other_attributes) - record.add_asserted_type(PROV['Revision']) - return record - - def quotation(self, generatedEntity, usedEntity, activity=None, generation=None, usage=None, time=None, identifier=None, other_attributes=None): - record = self.derivation(generatedEntity, usedEntity, activity, generation, usage, time, identifier, other_attributes) - record.add_asserted_type(PROV['Quotation']) - return record - - def primary_source(self, generatedEntity, usedEntity, activity=None, generation=None, usage=None, time=None, identifier=None, other_attributes=None): - record = self.derivation(generatedEntity, usedEntity, activity, generation, usage, time, identifier, other_attributes) - record.add_asserted_type(PROV['PrimarySource']) - return record - - def specialization(self, specificEntity, generalEntity, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_SPECIALIZATION, identifier, {PROV_ATTR_SPECIFIC_ENTITY: specificEntity, PROV_ATTR_GENERAL_ENTITY: generalEntity}, other_attributes) - - def alternate(self, alternate1, alternate2, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_ALTERNATE, identifier, {PROV_ATTR_ALTERNATE1: alternate1, PROV_ATTR_ALTERNATE2: alternate2}, other_attributes) - - def mention(self, specificEntity, generalEntity, bundle, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_MENTION, identifier, {PROV_ATTR_SPECIFIC_ENTITY: specificEntity, PROV_ATTR_GENERAL_ENTITY: generalEntity, PROV_ATTR_BUNDLE: bundle}, other_attributes) - - def collection(self, identifier, other_attributes=None): - record = self.add_element(PROV_REC_ENTITY, identifier, None, other_attributes) - record.add_asserted_type(PROV['Collection']) - return record - - def membership(self, collection, entity, identifier=None, other_attributes=None): - return self.add_record(PROV_REC_MEMBERSHIP, identifier, {PROV_ATTR_COLLECTION: collection, PROV_ATTR_ENTITY: entity}, other_attributes) - - def bundle(self, identifier, other_attributes=None): - return self.add_element(PROV_REC_BUNDLE, identifier, None, other_attributes) - - # Aliases - wasGeneratedBy = generation - used = usage - wasStartedBy = start - wasEndedBy = end - wasInvalidatedBy = invalidation - wasInformedBy = communication - wasAttributedTo = attribution - wasAssociatedWith = association - actedOnBehalfOf = delegation - wasInfluencedBy = influence - wasDerivedFrom = derivation - wasRevisionOf = revision - wasQuotedFrom = quotation - hadPrimarySource = primary_source - alternateOf = alternate - specializationOf = specialization - mentionOf = mention - hadMember = membership - -# Add the newly defined ProvBundle into the PROV class dictionary -PROV_REC_CLS[PROV_REC_BUNDLE] = ProvBundle diff --git a/nipype/info.py b/nipype/info.py index 60ec97b827..14252c2fdf 100644 --- a/nipype/info.py +++ b/nipype/info.py @@ -111,6 +111,7 @@ def get_nipype_gitversion(): NOSE_MIN_VERSION = '1.2' FUTURE_MIN_VERSION = '0.15.2' SIMPLEJSON_MIN_VERSION = '3.8.0' +PROV_MIN_VERSION = '1.4.0' NAME = 'nipype' MAINTAINER = "nipype developers" @@ -138,5 +139,6 @@ def get_nipype_gitversion(): "traits>=%s" % TRAITS_MIN_VERSION, "nose>=%s" % NOSE_MIN_VERSION, "future>=%s" % FUTURE_MIN_VERSION, - "simplejson>=%s" % SIMPLEJSON_MIN_VERSION] + "simplejson>=%s" % SIMPLEJSON_MIN_VERSION, + "prov>=%s" % PROV_MIN_VERSION] STATUS = 'stable' diff --git a/nipype/pipeline/utils.py b/nipype/pipeline/utils.py index 19569d3332..4b246fe437 100644 --- a/nipype/pipeline/utils.py +++ b/nipype/pipeline/utils.py @@ -1136,7 +1136,7 @@ def merge_bundles(g1, g2): return g1 -def write_workflow_prov(graph, filename=None, format='turtle'): +def write_workflow_prov(graph, filename=None, format='all'): """Write W3C PROV Model JSON file """ if not filename: @@ -1155,7 +1155,7 @@ def write_workflow_prov(graph, filename=None, format='turtle'): nipype_ns['hashval']: hashval} process = ps.g.activity(get_id(), None, None, attrs) if isinstance(result.runtime, list): - process.add_extra_attributes({pm.PROV["type"]: nipype_ns["MapNode"]}) + process.add_attributes({pm.PROV["type"]: nipype_ns["MapNode"]}) # add info about sub processes for idx, runtime in enumerate(result.runtime): subresult = InterfaceResult(result.interface[idx], @@ -1185,18 +1185,11 @@ def write_workflow_prov(graph, filename=None, format='turtle'): starter=processes[nodes.index(edgeinfo[0])]) # write provenance - try: - if format in ['turtle', 'all']: - ps.g.rdf().serialize(filename + '.ttl', format='turtle') - except (ImportError, NameError): - format = 'all' - finally: - if format in ['provn', 'all']: - with open(filename + '.provn', 'wt') as fp: - fp.writelines(ps.g.get_provn()) - if format in ['json', 'all']: - with open(filename + '.json', 'wt') as fp: - pm.json.dump(ps.g, fp, cls=pm.ProvBundle.JSONEncoder) + if format in ['provn', 'all']: + with open(filename + '.provn', 'wt') as fp: + fp.writelines(ps.g.get_provn()) + if format in ['json', 'all']: + ps.g.serialize(filename + '.json', format='json') return ps.g diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index f6c6e1e6a5..39727668be 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -15,10 +15,7 @@ except ImportError: from ordereddict import OrderedDict -# try: -# import prov.model as pm -# except ImportError: -from ..external import provcopy as pm +import prov.model as pm from ..external.six import string_types from .. import get_info @@ -70,7 +67,7 @@ def get_hashval(inputdict, skip=None): for key in inputdict: if skip is not None and key in skip: continue - keys[key.get_uri()] = key + keys[key.uri] = key for key in sorted(keys): val = inputdict[keys[key]] outname = key @@ -79,10 +76,10 @@ def get_hashval(inputdict, skip=None): val = val.decode() except AttributeError: pass - if isinstance(val, pm.QName): - val = val.get_uri() + if isinstance(val, pm.QualifiedName): + val = val.uri if isinstance(val, pm.Literal): - val = val.get_value() + val = val.value dict_nofilename[outname] = _get_sorteddict(val) dict_withhash[outname] = _get_sorteddict(val, True) return (dict_withhash, md5(str(dict_nofilename)).hexdigest()) @@ -205,9 +202,9 @@ def prov_encode(graph, value, create_container=True): entities.append(item_entity) if isinstance(item, list): continue - if not isinstance(item_entity.get_value()[0], string_types): + if not isinstance(item_entity.value[0], string_types): raise ValueError('Not a string literal') - if 'file://' not in item_entity.get_value()[0]: + if 'file://' not in item_entity.value[0]: raise ValueError('No file found') id = get_id() entity = graph.collection(identifier=id) @@ -246,7 +243,7 @@ def write_provenance(results, filename='provenance', format='turtle'): class ProvStore(object): def __init__(self): - self.g = pm.ProvBundle(identifier=get_id()) + self.g = pm.ProvDocument() #ProvBundle(identifier=get_id()) self.g.add_namespace(foaf) self.g.add_namespace(dcterms) self.g.add_namespace(nipype_ns) @@ -277,11 +274,8 @@ def add_results(self, results): nipype_ns['platform']: safe_encode(runtime.platform), nipype_ns['version']: safe_encode(runtime.version), } - try: - a0_attrs[foaf["host"]] = pm.URIRef(runtime.hostname) - except AttributeError: - a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname, - pm.XSD['anyURI']) + a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname, + pm.XSD['anyURI']) try: a0_attrs.update({nipype_ns['command']: safe_encode(runtime.cmdline)}) @@ -296,9 +290,9 @@ def add_results(self, results): # environment id = get_id() env_collection = self.g.collection(id) - env_collection.add_extra_attributes({pm.PROV['type']: - nipype_ns['Environment'], - pm.PROV['label']: "Environment"}) + env_collection.add_attributes({pm.PROV['type']: + nipype_ns['Environment'], + pm.PROV['label']: "Environment"}) self.g.used(a0, id) # write environment entities for idx, (key, val) in enumerate(sorted(runtime.environ.items())): @@ -319,12 +313,12 @@ def add_results(self, results): if inputs: id = get_id() input_collection = self.g.collection(id) - input_collection.add_extra_attributes({pm.PROV['type']: - nipype_ns['Inputs'], - pm.PROV['label']: "Inputs"}) + input_collection.add_attributes({pm.PROV['type']: + nipype_ns['Inputs'], + pm.PROV['label']: "Inputs"}) # write input entities for idx, (key, val) in enumerate(sorted(inputs.items())): - in_entity = prov_encode(self.g, val).get_identifier() + in_entity = prov_encode(self.g, val).identifier self.g.hadMember(input_collection, in_entity) used_attr = {pm.PROV["label"]: key, nipype_ns["inPort"]: key} @@ -336,14 +330,14 @@ def add_results(self, results): output_collection = self.g.collection(id) if not isinstance(outputs, dict): outputs = outputs.get_traitsfree() - output_collection.add_extra_attributes({pm.PROV['type']: - nipype_ns['Outputs'], - pm.PROV['label']: - "Outputs"}) + output_collection.add_attributes({pm.PROV['type']: + nipype_ns['Outputs'], + pm.PROV['label']: + "Outputs"}) self.g.wasGeneratedBy(output_collection, a0) # write output entities for idx, (key, val) in enumerate(sorted(outputs.items())): - out_entity = prov_encode(self.g, val).get_identifier() + out_entity = prov_encode(self.g, val).identifier self.g.hadMember(output_collection, out_entity) gen_attr = {pm.PROV["label"]: key, nipype_ns["outPort"]: key} @@ -352,10 +346,10 @@ def add_results(self, results): # write runtime entities id = get_id() runtime_collection = self.g.collection(id) - runtime_collection.add_extra_attributes({pm.PROV['type']: - nipype_ns['Runtime'], - pm.PROV['label']: - "RuntimeInfo"}) + runtime_collection.add_attributes({pm.PROV['type']: + nipype_ns['Runtime'], + pm.PROV['label']: + "RuntimeInfo"}) self.g.wasGeneratedBy(runtime_collection, a0) for key, value in sorted(runtime.items()): if not value: @@ -384,17 +378,10 @@ def add_results(self, results): self.g.wasAssociatedWith(a0, software_agent) return self.g - def write_provenance(self, filename='provenance', format='turtle'): - try: - if format in ['turtle', 'all']: - self.g.rdf().serialize(filename + '.ttl', format='turtle') - except (ImportError, NameError): - format = 'all' - finally: - if format in ['provn', 'all']: - with open(filename + '.provn', 'wt') as fp: - fp.writelines(self.g.get_provn()) - if format in ['json', 'all']: - with open(filename + '.json', 'wt') as fp: - pm.simplejson.dump(self.g, fp, cls=pm.ProvBundle.JSONEncoder) + def write_provenance(self, filename='provenance', format='all'): + if format in ['provn', 'all']: + with open(filename + '.provn', 'wt') as fp: + fp.writelines(self.g.get_provn()) + if format in ['json', 'all']: + g.serialize(filename + '.json', format='json') return self.g diff --git a/nipype/utils/tests/test_provenance.py b/nipype/utils/tests/test_provenance.py new file mode 100644 index 0000000000..0dc963aeca --- /dev/null +++ b/nipype/utils/tests/test_provenance.py @@ -0,0 +1,18 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +from future import standard_library +standard_library.install_aliases() + +from nipype.testing import assert_equal, assert_true, assert_false + +from ..provenance import ProvStore + + +def test_provenance(): + ps = ProvStore() + from ...interfaces.base import CommandLine + results = CommandLine('echo hello').run() + ps.add_results(results) + provn = ps.g.get_provn() + prov_json = ps.g.serialize(format='json') + yield assert_true, 'echo hello' in provn diff --git a/requirements.txt b/requirements.txt index d3289968c5..a236c59846 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ nibabel>=2.0.1 nose>=1.2 future==0.15.2 simplejson +prov>=1.4.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 3d81cfe495..5a5159d166 100755 --- a/setup.py +++ b/setup.py @@ -274,6 +274,7 @@ def _package_status(pkg_name, version, version_getter, checker): pkg_chk('nose', NOSE_MIN_VERSION) pkg_chk('future', FUTURE_MIN_VERSION) pkg_chk('simplejson', SIMPLEJSON_MIN_VERSION) +pkg_chk('prov', PROV_MIN_VERSION) custom_dateutil_messages = {'missing opt': ('Missing optional package "%s"' ' provided by package ' '"python-dateutil"')} From 8cacff0eaf1d6f6fe5bb9d4fb6a1e062b3ca9696 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 19 Nov 2015 10:00:03 -0500 Subject: [PATCH 02/19] fix: account for sets --- nipype/utils/provenance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 39727668be..161a57b2b5 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -202,9 +202,9 @@ def prov_encode(graph, value, create_container=True): entities.append(item_entity) if isinstance(item, list): continue - if not isinstance(item_entity.value[0], string_types): + if not isinstance(list(item_entity.value)[0], string_types): raise ValueError('Not a string literal') - if 'file://' not in item_entity.value[0]: + if 'file://' not in list(item_entity.value)[0]: raise ValueError('No file found') id = get_id() entity = graph.collection(identifier=id) From 1e205a78d33d3b0412afc131a6609d7fc50a3f17 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 19 Nov 2015 13:09:57 -0500 Subject: [PATCH 03/19] fix: remove prov/rdf lib and add make check fixes --- .travis.yml | 2 - nipype/algorithms/tests/test_auto_ErrorMap.py | 35 -------------- nipype/algorithms/tests/test_auto_Overlap.py | 47 ------------------- nipype/interfaces/freesurfer/utils.py | 2 +- requirements.txt | 4 +- 5 files changed, 3 insertions(+), 87 deletions(-) delete mode 100644 nipype/algorithms/tests/test_auto_ErrorMap.py delete mode 100644 nipype/algorithms/tests/test_auto_Overlap.py diff --git a/.travis.yml b/.travis.yml index ea0c8e6a1f..fe9bf96a93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,8 +33,6 @@ install: - pip install python-coveralls - pip install nose-cov - pip install -r requirements.txt # finish remaining requirements -- pip install https://github.com/RDFLib/rdflib/archive/master.zip -- pip install https://github.com/trungdong/prov/archive/rdf.zip - python setup.py install script: - python -W once:FSL:UserWarning:nipype `which nosetests` --with-doctest --with-cov --cover-package nipype --cov-config .coveragerc --logging-level=INFO diff --git a/nipype/algorithms/tests/test_auto_ErrorMap.py b/nipype/algorithms/tests/test_auto_ErrorMap.py deleted file mode 100644 index 69484529dd..0000000000 --- a/nipype/algorithms/tests/test_auto_ErrorMap.py +++ /dev/null @@ -1,35 +0,0 @@ -# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT -from ...testing import assert_equal -from ..metrics import ErrorMap - - -def test_ErrorMap_inputs(): - input_map = dict(ignore_exception=dict(nohash=True, - usedefault=True, - ), - in_ref=dict(mandatory=True, - ), - in_tst=dict(mandatory=True, - ), - mask=dict(), - metric=dict(mandatory=True, - usedefault=True, - ), - out_map=dict(), - ) - inputs = ErrorMap.input_spec() - - for key, metadata in list(input_map.items()): - for metakey, value in list(metadata.items()): - yield assert_equal, getattr(inputs.traits()[key], metakey), value - - -def test_ErrorMap_outputs(): - output_map = dict(distance=dict(), - out_map=dict(), - ) - outputs = ErrorMap.output_spec() - - for key, metadata in list(output_map.items()): - for metakey, value in list(metadata.items()): - yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/algorithms/tests/test_auto_Overlap.py b/nipype/algorithms/tests/test_auto_Overlap.py deleted file mode 100644 index a5a3874bd1..0000000000 --- a/nipype/algorithms/tests/test_auto_Overlap.py +++ /dev/null @@ -1,47 +0,0 @@ -# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT -from ...testing import assert_equal -from ..misc import Overlap - - -def test_Overlap_inputs(): - input_map = dict(bg_overlap=dict(mandatory=True, - usedefault=True, - ), - ignore_exception=dict(nohash=True, - usedefault=True, - ), - mask_volume=dict(), - out_file=dict(usedefault=True, - ), - vol_units=dict(mandatory=True, - usedefault=True, - ), - volume1=dict(mandatory=True, - ), - volume2=dict(mandatory=True, - ), - weighting=dict(usedefault=True, - ), - ) - inputs = Overlap.input_spec() - - for key, metadata in list(input_map.items()): - for metakey, value in list(metadata.items()): - yield assert_equal, getattr(inputs.traits()[key], metakey), value - - -def test_Overlap_outputs(): - output_map = dict(dice=dict(), - diff_file=dict(), - jaccard=dict(), - labels=dict(), - roi_di=dict(), - roi_ji=dict(), - roi_voldiff=dict(), - volume_difference=dict(), - ) - outputs = Overlap.output_spec() - - for key, metadata in list(output_map.items()): - for metakey, value in list(metadata.items()): - yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/freesurfer/utils.py b/nipype/interfaces/freesurfer/utils.py index 73bef76ed3..0213dc30d4 100644 --- a/nipype/interfaces/freesurfer/utils.py +++ b/nipype/interfaces/freesurfer/utils.py @@ -360,7 +360,7 @@ def _list_outputs(self): source = self.inputs.source_file else: source = self.inputs.source_annot_file - + # Some recon-all files don't have a proper extension (e.g. "lh.thickness") # so we have to account for that here bad_extensions = [".%s" % e for e in ["area", "mid", "pial", "avg_curv", "curv", "inflated", diff --git a/requirements.txt b/requirements.txt index a236c59846..7fa14bd8f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ python-dateutil>=1.5 nibabel>=2.0.1 nose>=1.2 future==0.15.2 -simplejson -prov>=1.4.0 \ No newline at end of file +simplejson>=3.8.0 +prov>=1.4.0 From 48b7b6b6f62782c2202aaf685b03660415e0a169 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 19 Nov 2015 14:26:18 -0500 Subject: [PATCH 04/19] fix: ensure encoding happens before hashing --- nipype/utils/provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 161a57b2b5..2eb5bd101f 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -82,7 +82,7 @@ def get_hashval(inputdict, skip=None): val = val.value dict_nofilename[outname] = _get_sorteddict(val) dict_withhash[outname] = _get_sorteddict(val, True) - return (dict_withhash, md5(str(dict_nofilename)).hexdigest()) + return (dict_withhash, md5(dict_nofilename.encode()).hexdigest()) def _get_sorteddict(object, dictwithhash=False): From 601b8a399ef53f1bf009259de50e7309ab786d8e Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 19 Nov 2015 15:29:45 -0500 Subject: [PATCH 05/19] fix: changing extra_attributes call --- nipype/pipeline/utils.py | 2 +- nipype/utils/provenance.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nipype/pipeline/utils.py b/nipype/pipeline/utils.py index 4b246fe437..aa41c3cac4 100644 --- a/nipype/pipeline/utils.py +++ b/nipype/pipeline/utils.py @@ -1172,7 +1172,7 @@ def write_workflow_prov(graph, filename=None, format='all'): ps.g = merge_bundles(ps.g, sub_bundle) ps.g.wasGeneratedBy(sub_bundle, process) else: - process.add_extra_attributes({pm.PROV["type"]: nipype_ns["Node"]}) + process.add_attributes({pm.PROV["type"]: nipype_ns["Node"]}) result_bundle = ProvStore().add_results(result) ps.g = merge_bundles(ps.g, result_bundle) ps.g.wasGeneratedBy(result_bundle, process) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 2eb5bd101f..653d988c0e 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -82,7 +82,8 @@ def get_hashval(inputdict, skip=None): val = val.value dict_nofilename[outname] = _get_sorteddict(val) dict_withhash[outname] = _get_sorteddict(val, True) - return (dict_withhash, md5(dict_nofilename.encode()).hexdigest()) + sorted_dict = str(sorted(dict_nofilename.items())) + return (dict_withhash, md5(str(dict_nofilename).encode()).hexdigest()) def _get_sorteddict(object, dictwithhash=False): From 1c382327355acf96b34a007a71ba54645804a8c9 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 19 Nov 2015 15:30:33 -0500 Subject: [PATCH 06/19] fix: ordered dict to str --- nipype/utils/provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 653d988c0e..71890188b1 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -83,7 +83,7 @@ def get_hashval(inputdict, skip=None): dict_nofilename[outname] = _get_sorteddict(val) dict_withhash[outname] = _get_sorteddict(val, True) sorted_dict = str(sorted(dict_nofilename.items())) - return (dict_withhash, md5(str(dict_nofilename).encode()).hexdigest()) + return (dict_withhash, md5(sorted_dict.encode()).hexdigest()) def _get_sorteddict(object, dictwithhash=False): From db878138b1916ed5d8b38d31671f3c3810ec5ebc Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Mon, 23 Nov 2015 17:55:22 -0500 Subject: [PATCH 07/19] fix: changing commit format to support py3 --- nipype/COMMIT_INFO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/COMMIT_INFO.txt b/nipype/COMMIT_INFO.txt index dcaee0b8ed..7a32053bd4 100644 --- a/nipype/COMMIT_INFO.txt +++ b/nipype/COMMIT_INFO.txt @@ -1,6 +1,6 @@ # This is an ini file that may contain information about the code state [commit hash] # The line below may contain a valid hash if it has been substituted during 'git archive' -archive_subst_hash=$Format:%h$ +archive_subst_hash=$Format:%%h$ # This line may be modified by the install process install_hash= From 6f86bd9d91c9fc6e2ca91f76d7766d06bdbdec9e Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 15:48:27 -0500 Subject: [PATCH 08/19] fix: string decoding for prov literal and few pipeline job submission info statements --- nipype/pipeline/plugins/base.py | 13 ++++++------- nipype/utils/provenance.py | 12 ++++++------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py index 7d9ae77c2b..9b7adad343 100644 --- a/nipype/pipeline/plugins/base.py +++ b/nipype/pipeline/plugins/base.py @@ -342,7 +342,10 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): (self.depidx.sum(axis=0) == 0).__array__()) if len(jobids) > 0: # send all available jobs - logger.info('Submitting %d jobs' % len(jobids[:slots])) + if slots: + logger.info('Pending[%d] Submitting[%d] jobs Slots[%d]' % (num_jobs, len(jobids[:slots]), slots)) + else: + logger.info('Pending[%d] Submitting[%d] jobs Slots[inf]' % (num_jobs, len(jobids))) for jobid in jobids[:slots]: if isinstance(self.procs[jobid], MapNode): try: @@ -401,6 +404,8 @@ def _send_procs_to_workers(self, updatehash=False, graph=None): self.proc_pending[jobid] = False else: self.pending_tasks.insert(0, (tid, jobid)) + logger.info('Finished executing: %s ID: %d' % + (self.procs[jobid]._id, jobid)) else: break @@ -507,9 +512,6 @@ def _get_result(self, taskid): timed_out = True while (time() - t) < timeout: try: - logger.debug(os.listdir(os.path.realpath(os.path.join(node_dir, - '..')))) - logger.debug(os.listdir(node_dir)) glob(os.path.join(node_dir, 'result_*.pklz')).pop() timed_out = False break @@ -633,9 +635,6 @@ def _get_result(self, taskid): return None node_dir = self._pending[taskid] - logger.debug(os.listdir(os.path.realpath(os.path.join(node_dir, - '..')))) - logger.debug(os.listdir(node_dir)) glob(os.path.join(node_dir, 'result_*.pklz')).pop() results_file = glob(os.path.join(node_dir, 'result_*.pklz'))[0] diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 71890188b1..79468fbc54 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -119,7 +119,7 @@ def safe_encode(x, as_literal=True): if x is None: value = "Unknown" if as_literal: - return pm.Literal(value, pm.XSD['string']) + return pm.Literal(value.decode('utf-8'), pm.XSD['string']) else: return value try: @@ -131,7 +131,7 @@ def safe_encode(x, as_literal=True): try: return pm.URIRef(value) except AttributeError: - return pm.Literal(value, pm.XSD['anyURI']) + return pm.Literal(value.decode('utf-8'), pm.XSD['anyURI']) else: if len(x) > max_text_len: value = x[:max_text_len - 13] + ['...Clipped...'] @@ -139,7 +139,7 @@ def safe_encode(x, as_literal=True): value = x if not as_literal: return value - return pm.Literal(value, pm.XSD['string']) + return pm.Literal(value.decode('utf-8'), pm.XSD['string']) if isinstance(x, int): if not as_literal: return x @@ -158,7 +158,7 @@ def safe_encode(x, as_literal=True): outdict[key] = encoded_value if not as_literal: return simplejson.dumps(outdict) - return pm.Literal(simplejson.dumps(outdict), pm.XSD['string']) + return pm.Literal(simplejson.dumps(outdict).decode('utf-8'), pm.XSD['string']) if isinstance(x, list): try: nptype = np.array(x).dtype @@ -176,7 +176,7 @@ def safe_encode(x, as_literal=True): outlist = x if not as_literal: return simplejson.dumps(outlist) - return pm.Literal(simplejson.dumps(outlist), pm.XSD['string']) + return pm.Literal(simplejson.dumps(outlist).decode('utf-8'), pm.XSD['string']) if not as_literal: return dumps(x) return pm.Literal(dumps(x), nipype_ns['pickle']) @@ -185,7 +185,7 @@ def safe_encode(x, as_literal=True): value = "Could not encode: " + str(e) if not as_literal: return value - return pm.Literal(value, pm.XSD['string']) + return pm.Literal(value.decode('utf-8'), pm.XSD['string']) def prov_encode(graph, value, create_container=True): From ca2be1f23e6dba4ae92d0460a6fc093b145a8e32 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 15:49:23 -0500 Subject: [PATCH 09/19] tst: add prov test --- nipype/pipeline/tests/test_utils.py | 14 +++++++++++++- nipype/pipeline/utils.py | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/nipype/pipeline/tests/test_utils.py b/nipype/pipeline/tests/test_utils.py index 3e559acb53..6342893076 100644 --- a/nipype/pipeline/tests/test_utils.py +++ b/nipype/pipeline/tests/test_utils.py @@ -14,7 +14,7 @@ import nipype.interfaces.base as nib import nipype.interfaces.utility as niu from ... import config -from ..utils import merge_dict, clean_working_directory +from ..utils import merge_dict, clean_working_directory, write_workflow_prov def test_identitynode_removal(): @@ -330,3 +330,15 @@ def test_multi_disconnected_iterable(): eg = metawf.run(plugin='Linear') yield assert_equal, len(eg.nodes()), 60 rmtree(out_dir) + +def test_provenance(): + out_dir = mkdtemp() + metawf = pe.Workflow(name='meta') + metawf.base_dir = out_dir + metawf.add_nodes([create_wf('wf%d' % i) for i in range(30)]) + eg = metawf.run(plugin='Linear') + prov_base = os.path.join(out_dir, + 'workflow_provenance_test') + psg = write_workflow_prov(eg, prov_base, format='all') + yield assert_equal, psg, 1 + rmtree(out_dir) diff --git a/nipype/pipeline/utils.py b/nipype/pipeline/utils.py index aa41c3cac4..e692d8e3d2 100644 --- a/nipype/pipeline/utils.py +++ b/nipype/pipeline/utils.py @@ -1173,7 +1173,7 @@ def write_workflow_prov(graph, filename=None, format='all'): ps.g.wasGeneratedBy(sub_bundle, process) else: process.add_attributes({pm.PROV["type"]: nipype_ns["Node"]}) - result_bundle = ProvStore().add_results(result) + result_bundle = ProvStore().add_results(result).bundle() ps.g = merge_bundles(ps.g, result_bundle) ps.g.wasGeneratedBy(result_bundle, process) processes.append(process) From 728585c3d3270036a8b8c926ba8a0283e538422a Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 20:35:08 -0500 Subject: [PATCH 10/19] tst: added missing auto tests --- .../tests/test_auto_antsBrainExtraction.py | 65 +++++++++++++ .../tests/test_auto_antsCorticalThickness.py | 96 +++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 nipype/interfaces/ants/tests/test_auto_antsBrainExtraction.py create mode 100644 nipype/interfaces/ants/tests/test_auto_antsCorticalThickness.py diff --git a/nipype/interfaces/ants/tests/test_auto_antsBrainExtraction.py b/nipype/interfaces/ants/tests/test_auto_antsBrainExtraction.py new file mode 100644 index 0000000000..5661eddd02 --- /dev/null +++ b/nipype/interfaces/ants/tests/test_auto_antsBrainExtraction.py @@ -0,0 +1,65 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..segmentation import antsBrainExtraction + + +def test_antsBrainExtraction_inputs(): + input_map = dict(anatomical_image=dict(argstr='-a %s', + mandatory=True, + ), + args=dict(argstr='%s', + ), + brain_probability_mask=dict(argstr='-m %s', + copyfile=False, + mandatory=True, + ), + brain_template=dict(argstr='-e %s', + mandatory=True, + ), + debug=dict(argstr='-z 1', + ), + dimension=dict(argstr='-d %d', + usedefault=True, + ), + environ=dict(nohash=True, + usedefault=True, + ), + extraction_registration_mask=dict(argstr='-f %s', + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + image_suffix=dict(argstr='-s %s', + usedefault=True, + ), + keep_temporary_files=dict(argstr='-k %d', + ), + num_threads=dict(nohash=True, + usedefault=True, + ), + out_prefix=dict(argstr='-o %s', + usedefault=True, + ), + terminal_output=dict(nohash=True, + ), + use_floatingpoint_precision=dict(argstr='-q %d', + ), + use_random_seeding=dict(argstr='-u %d', + ), + ) + inputs = antsBrainExtraction.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_antsBrainExtraction_outputs(): + output_map = dict(BrainExtractionBrain=dict(), + BrainExtractionMask=dict(), + ) + outputs = antsBrainExtraction.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value diff --git a/nipype/interfaces/ants/tests/test_auto_antsCorticalThickness.py b/nipype/interfaces/ants/tests/test_auto_antsCorticalThickness.py new file mode 100644 index 0000000000..0944ebf1b7 --- /dev/null +++ b/nipype/interfaces/ants/tests/test_auto_antsCorticalThickness.py @@ -0,0 +1,96 @@ +# AUTO-GENERATED by tools/checkspecs.py - DO NOT EDIT +from ....testing import assert_equal +from ..segmentation import antsCorticalThickness + + +def test_antsCorticalThickness_inputs(): + input_map = dict(anatomical_image=dict(argstr='-a %s', + mandatory=True, + ), + args=dict(argstr='%s', + ), + b_spline_smoothing=dict(argstr='-v', + ), + brain_probability_mask=dict(argstr='-m %s', + copyfile=False, + mandatory=True, + ), + brain_template=dict(argstr='-e %s', + mandatory=True, + ), + cortical_label_image=dict(), + debug=dict(argstr='-z 1', + ), + dimension=dict(argstr='-d %d', + usedefault=True, + ), + environ=dict(nohash=True, + usedefault=True, + ), + extraction_registration_mask=dict(argstr='-f %s', + ), + ignore_exception=dict(nohash=True, + usedefault=True, + ), + image_suffix=dict(argstr='-s %s', + usedefault=True, + ), + keep_temporary_files=dict(argstr='-k %d', + ), + label_propagation=dict(argstr='-l %s', + ), + max_iterations=dict(argstr='-i %d', + ), + num_threads=dict(nohash=True, + usedefault=True, + ), + out_prefix=dict(argstr='-o %s', + usedefault=True, + ), + posterior_formulation=dict(argstr='-b %s', + ), + prior_segmentation_weight=dict(argstr='-w %f', + ), + quick_registration=dict(argstr='-q 1', + ), + segmentation_iterations=dict(argstr='-n %d', + ), + segmentation_priors=dict(argstr='-p %s', + mandatory=True, + ), + t1_registration_template=dict(argstr='-t %s', + mandatory=True, + ), + terminal_output=dict(nohash=True, + ), + use_floatingpoint_precision=dict(argstr='-j %d', + ), + use_random_seeding=dict(argstr='-u %d', + ), + ) + inputs = antsCorticalThickness.input_spec() + + for key, metadata in list(input_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(inputs.traits()[key], metakey), value + + +def test_antsCorticalThickness_outputs(): + output_map = dict(BrainExtractionMask=dict(), + BrainSegmentation=dict(), + BrainSegmentationN4=dict(), + BrainSegmentationPosteriors=dict(), + BrainVolumes=dict(), + CorticalThickness=dict(), + CorticalThicknessNormedToTemplate=dict(), + SubjectToTemplate0GenericAffine=dict(), + SubjectToTemplate1Warp=dict(), + SubjectToTemplateLogJacobian=dict(), + TemplateToSubject0Warp=dict(), + TemplateToSubject1GenericAffine=dict(), + ) + outputs = antsCorticalThickness.output_spec() + + for key, metadata in list(output_map.items()): + for metakey, value in list(metadata.items()): + yield assert_equal, getattr(outputs.traits()[key], metakey), value From db51634c36a34065a6a35a8413eecb7675f7d4d5 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 20:35:40 -0500 Subject: [PATCH 11/19] fix: updated provenance support to use pypi prov package --- nipype/pipeline/tests/test_utils.py | 5 +++-- nipype/pipeline/utils.py | 25 +++++++++++++++++++------ nipype/utils/provenance.py | 12 +++++------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/nipype/pipeline/tests/test_utils.py b/nipype/pipeline/tests/test_utils.py index 6342893076..50d44b78a0 100644 --- a/nipype/pipeline/tests/test_utils.py +++ b/nipype/pipeline/tests/test_utils.py @@ -335,10 +335,11 @@ def test_provenance(): out_dir = mkdtemp() metawf = pe.Workflow(name='meta') metawf.base_dir = out_dir - metawf.add_nodes([create_wf('wf%d' % i) for i in range(30)]) + metawf.add_nodes([create_wf('wf%d' % i) for i in range(1)]) eg = metawf.run(plugin='Linear') prov_base = os.path.join(out_dir, 'workflow_provenance_test') psg = write_workflow_prov(eg, prov_base, format='all') - yield assert_equal, psg, 1 + yield assert_equal, len(psg.bundles), 2 + yield assert_equal, len(psg.get_records()), 7 rmtree(out_dir) diff --git a/nipype/pipeline/utils.py b/nipype/pipeline/utils.py index e692d8e3d2..64caa482eb 100644 --- a/nipype/pipeline/utils.py +++ b/nipype/pipeline/utils.py @@ -1168,14 +1168,27 @@ def write_workflow_prov(graph, filename=None, format='all'): values = getattr(result.outputs, key) if isdefined(values) and idx < len(values): subresult.outputs[key] = values[idx] - sub_bundle = ProvStore().add_results(subresult) - ps.g = merge_bundles(ps.g, sub_bundle) - ps.g.wasGeneratedBy(sub_bundle, process) + sub_doc = ProvStore().add_results(subresult) + sub_bundle = pm.ProvBundle(sub_doc.get_records(), + identifier=get_id()) + ps.g.add_bundle(sub_bundle) + bundle_entity = ps.g.entity(sub_bundle.identifier, + other_attributes={'prov:type': + pm.PROV_BUNDLE}) + ps.g.wasGeneratedBy(bundle_entity, process) else: process.add_attributes({pm.PROV["type"]: nipype_ns["Node"]}) - result_bundle = ProvStore().add_results(result).bundle() - ps.g = merge_bundles(ps.g, result_bundle) - ps.g.wasGeneratedBy(result_bundle, process) + if result.provenance: + prov_doc = result.provenance + else: + prov_doc = ProvStore().add_results(result) + result_bundle = pm.ProvBundle(prov_doc.get_records(), + identifier=get_id()) + ps.g.add_bundle(result_bundle) + bundle_entity = ps.g.entity(result_bundle.identifier, + other_attributes={'prov:type': + pm.PROV_BUNDLE}) + ps.g.wasGeneratedBy(bundle_entity, process) processes.append(process) # add dependencies (edges) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 79468fbc54..6325bb18d3 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -2,6 +2,7 @@ standard_library.install_aliases() from builtins import object +from copy import deepcopy from pickle import dumps import simplejson import os @@ -244,18 +245,15 @@ def write_provenance(results, filename='provenance', format='turtle'): class ProvStore(object): def __init__(self): - self.g = pm.ProvDocument() #ProvBundle(identifier=get_id()) + self.g = pm.ProvDocument() self.g.add_namespace(foaf) self.g.add_namespace(dcterms) self.g.add_namespace(nipype_ns) self.g.add_namespace(niiri) - def add_results(self, results): - if results.provenance: - try: - self.g.add_bundle(results.provenance) - except pm.ProvException: - self.g.add_bundle(results.provenance, get_id()) + def add_results(self, results, keep_provenance=False): + if keep_provenance and results.provenance: + self.g = deepcopy(results.provenance) return self.g runtime = results.runtime interface = results.interface From 7fe6251928319c8df206b87c9192dae193024f95 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 21:10:53 -0500 Subject: [PATCH 12/19] fix: removed decode call --- nipype/utils/provenance.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 6325bb18d3..44737d13eb 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -120,11 +120,12 @@ def safe_encode(x, as_literal=True): if x is None: value = "Unknown" if as_literal: - return pm.Literal(value.decode('utf-8'), pm.XSD['string']) + return pm.Literal(value, pm.XSD['string']) else: return value try: if isinstance(x, (str, string_types)): + iflogger.info(type(x)) if os.path.exists(x): value = 'file://%s%s' % (getfqdn(), x) if not as_literal: @@ -132,7 +133,7 @@ def safe_encode(x, as_literal=True): try: return pm.URIRef(value) except AttributeError: - return pm.Literal(value.decode('utf-8'), pm.XSD['anyURI']) + return pm.Literal(value, pm.XSD['anyURI']) else: if len(x) > max_text_len: value = x[:max_text_len - 13] + ['...Clipped...'] @@ -140,7 +141,7 @@ def safe_encode(x, as_literal=True): value = x if not as_literal: return value - return pm.Literal(value.decode('utf-8'), pm.XSD['string']) + return pm.Literal(value, pm.XSD['string']) if isinstance(x, int): if not as_literal: return x @@ -159,7 +160,7 @@ def safe_encode(x, as_literal=True): outdict[key] = encoded_value if not as_literal: return simplejson.dumps(outdict) - return pm.Literal(simplejson.dumps(outdict).decode('utf-8'), pm.XSD['string']) + return pm.Literal(simplejson.dumps(outdict), pm.XSD['string']) if isinstance(x, list): try: nptype = np.array(x).dtype @@ -177,7 +178,7 @@ def safe_encode(x, as_literal=True): outlist = x if not as_literal: return simplejson.dumps(outlist) - return pm.Literal(simplejson.dumps(outlist).decode('utf-8'), pm.XSD['string']) + return pm.Literal(simplejson.dumps(outlist), pm.XSD['string']) if not as_literal: return dumps(x) return pm.Literal(dumps(x), nipype_ns['pickle']) @@ -186,7 +187,7 @@ def safe_encode(x, as_literal=True): value = "Could not encode: " + str(e) if not as_literal: return value - return pm.Literal(value.decode('utf-8'), pm.XSD['string']) + return pm.Literal(value, pm.XSD['string']) def prov_encode(graph, value, create_container=True): From 2eaecfa53d551ddf9b1670664a0f30c79b3ebf33 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Thu, 3 Dec 2015 23:07:26 -0500 Subject: [PATCH 13/19] fix: removed iflogger statement --- nipype/utils/provenance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 44737d13eb..0eee295a40 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -125,7 +125,6 @@ def safe_encode(x, as_literal=True): return value try: if isinstance(x, (str, string_types)): - iflogger.info(type(x)) if os.path.exists(x): value = 'file://%s%s' % (getfqdn(), x) if not as_literal: From 74cd3bdfa1a05c037e531b50e0d75c30861c05da Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sat, 12 Dec 2015 13:44:44 -0500 Subject: [PATCH 14/19] fix: use text_type to control unicode output --- nipype/utils/provenance.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 0eee295a40..712e2dcff4 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -17,7 +17,7 @@ from ordereddict import OrderedDict import prov.model as pm -from ..external.six import string_types +from ..external.six import string_types, text_type from .. import get_info from .filemanip import (md5, hashlib, hash_infile) @@ -140,7 +140,7 @@ def safe_encode(x, as_literal=True): value = x if not as_literal: return value - return pm.Literal(value, pm.XSD['string']) + return pm.Literal(text_type(value, 'utf-8'), pm.XSD['string']) if isinstance(x, int): if not as_literal: return x @@ -183,7 +183,7 @@ def safe_encode(x, as_literal=True): return pm.Literal(dumps(x), nipype_ns['pickle']) except TypeError as e: iflogger.info(e) - value = "Could not encode: " + str(e) + value = "Could not encode: " + text_type(e, 'utf-8') if not as_literal: return value return pm.Literal(value, pm.XSD['string']) From 1b83253b03760832b4255be8475935ce7bb99485 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sat, 12 Dec 2015 15:51:16 -0500 Subject: [PATCH 15/19] fix: use text_type on str for exceptions --- nipype/utils/provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 712e2dcff4..c3430fc026 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -183,7 +183,7 @@ def safe_encode(x, as_literal=True): return pm.Literal(dumps(x), nipype_ns['pickle']) except TypeError as e: iflogger.info(e) - value = "Could not encode: " + text_type(e, 'utf-8') + value = "Could not encode: " + text_type(str(e), 'utf-8') if not as_literal: return value return pm.Literal(value, pm.XSD['string']) From 191937f70c611c175380cebd4a8c55d7558de664 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sat, 12 Dec 2015 16:07:19 -0500 Subject: [PATCH 16/19] tst: add a test that fails with unicode strings --- nipype/utils/provenance.py | 2 +- nipype/utils/tests/test_provenance.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index 712e2dcff4..c3430fc026 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -183,7 +183,7 @@ def safe_encode(x, as_literal=True): return pm.Literal(dumps(x), nipype_ns['pickle']) except TypeError as e: iflogger.info(e) - value = "Could not encode: " + text_type(e, 'utf-8') + value = "Could not encode: " + text_type(str(e), 'utf-8') if not as_literal: return value return pm.Literal(value, pm.XSD['string']) diff --git a/nipype/utils/tests/test_provenance.py b/nipype/utils/tests/test_provenance.py index 0dc963aeca..e0dbcc659c 100644 --- a/nipype/utils/tests/test_provenance.py +++ b/nipype/utils/tests/test_provenance.py @@ -3,9 +3,9 @@ from future import standard_library standard_library.install_aliases() -from nipype.testing import assert_equal, assert_true, assert_false +from ...testing import assert_equal, assert_true, assert_false -from ..provenance import ProvStore +from ..provenance import ProvStore, safe_encode, text_type def test_provenance(): @@ -16,3 +16,8 @@ def test_provenance(): provn = ps.g.get_provn() prov_json = ps.g.serialize(format='json') yield assert_true, 'echo hello' in provn + +def test_safe_encode(): + a = '\xc3\xa9lg' + out = safe_encode(a) + yield assert_equal, out.value, text_type(a, 'utf-8') \ No newline at end of file From b66c8500e813ec40598e0992721c4103ead15c03 Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sat, 12 Dec 2015 21:33:19 -0500 Subject: [PATCH 17/19] fix: unicode/str accounting on py3/2 --- nipype/utils/provenance.py | 9 ++++++--- nipype/utils/tests/test_provenance.py | 7 ++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nipype/utils/provenance.py b/nipype/utils/provenance.py index c3430fc026..028200573e 100644 --- a/nipype/utils/provenance.py +++ b/nipype/utils/provenance.py @@ -1,6 +1,6 @@ from future import standard_library standard_library.install_aliases() -from builtins import object +from builtins import object, str from copy import deepcopy from pickle import dumps @@ -140,7 +140,10 @@ def safe_encode(x, as_literal=True): value = x if not as_literal: return value - return pm.Literal(text_type(value, 'utf-8'), pm.XSD['string']) + if isinstance(value, str): + return pm.Literal(value, pm.XSD['string']) + else: + return pm.Literal(text_type(value, 'utf-8'), pm.XSD['string']) if isinstance(x, int): if not as_literal: return x @@ -183,7 +186,7 @@ def safe_encode(x, as_literal=True): return pm.Literal(dumps(x), nipype_ns['pickle']) except TypeError as e: iflogger.info(e) - value = "Could not encode: " + text_type(str(e), 'utf-8') + value = "Could not encode: " + str(e) if not as_literal: return value return pm.Literal(value, pm.XSD['string']) diff --git a/nipype/utils/tests/test_provenance.py b/nipype/utils/tests/test_provenance.py index e0dbcc659c..3e80ef5c07 100644 --- a/nipype/utils/tests/test_provenance.py +++ b/nipype/utils/tests/test_provenance.py @@ -2,6 +2,9 @@ # vi: set ft=python sts=4 ts=4 sw=4 et: from future import standard_library standard_library.install_aliases() +from builtins import str + +import sys from ...testing import assert_equal, assert_true, assert_false @@ -20,4 +23,6 @@ def test_provenance(): def test_safe_encode(): a = '\xc3\xa9lg' out = safe_encode(a) - yield assert_equal, out.value, text_type(a, 'utf-8') \ No newline at end of file + if not isinstance(a, str): + a = text_type(a, 'utf-8') + yield assert_equal, out.value, a \ No newline at end of file From e33e99c3abd28237f08338b76828569996dd0baa Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sat, 12 Dec 2015 21:57:45 -0500 Subject: [PATCH 18/19] fix: updated changes, fixed circle feeds --- CHANGES | 2 ++ circle.yml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 0b0e52d61c..96711749e0 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ Next release ============ +* FIX: Use released Prov python library (https://github.com/nipy/nipype/pull/1279) +* ENH: Support for Python 3 (https://github.com/nipy/nipype/pull/1221) * FIX: VTK version check missing when using tvtk (https://github.com/nipy/nipype/pull/1219) * ENH: Added an OAR scheduler plugin (https://github.com/nipy/nipype/pull/1259) * ENH: New ANTs interface: antsBrainExtraction (https://github.com/nipy/nipype/pull/1231) diff --git a/circle.yml b/circle.yml index baac46534a..f203415c02 100644 --- a/circle.yml +++ b/circle.yml @@ -15,7 +15,7 @@ dependencies: - if [[ ! -d ~/fsl ]]; then wget "http://fsl.fmrib.ox.ac.uk/fsldownloads/fsl-5.0.9-centos6_64.tar.gz"; tar zxvf fsl-5.0.9-centos6_64.tar.gz; mv fsl ~/fsl; fi - if [[ ! -d ~/examples/data ]]; then wget "http://tcpdiag.dl.sourceforge.net/project/nipy/nipype/nipype-0.2/nipype-tutorial.tar.bz2"; tar jxvf nipype-tutorial.tar.bz2; mkdir ~/examples; mv nipype-tutorial/* ~/examples/; fi # we download this manually because CircleCI does not cache apt - - if [[ ! -d ~/examples/feeds ]]; then wget "http://fsl.fmrib.ox.ac.uk/fsldownloads/fsl-5.0.9-feeds.tar.gz"; tar zxvf fsl-5.0.8-feeds.tar.gz; mv feeds ~/examples/; fi + - if [[ ! -d ~/examples/feeds ]]; then wget "http://fsl.fmrib.ox.ac.uk/fsldownloads/fsl-5.0.9-feeds.tar.gz"; tar zxvf fsl-5.0.9-feeds.tar.gz; mv feeds ~/examples/; fi - if [[ ! -d ~/examples/fsl_course_data ]]; then wget -c "http://fsl.fmrib.ox.ac.uk/fslcourse/fdt1.tar.gz" ; wget -c "http://fsl.fmrib.ox.ac.uk/fslcourse/fdt2.tar.gz"; wget -c "http://fsl.fmrib.ox.ac.uk/fslcourse/tbss.tar.gz"; mkdir ~/examples/fsl_course_data; tar zxvf fdt1.tar.gz -C ~/examples/fsl_course_data; tar zxvf fdt2.tar.gz -C ~/examples/fsl_course_data; tar zxvf tbss.tar.gz -C ~/examples/fsl_course_data; fi - bash ~/nipype/tools/install_spm_mcr.sh - mkdir -p ~/.nipype && echo "[logging]" > ~/.nipype/nipype.cfg && echo "workflow_level = DEBUG" >> ~/.nipype/nipype.cfg && echo "interface_level = DEBUG" >> ~/.nipype/nipype.cfg && echo "filemanip_level = DEBUG" >> ~/.nipype/nipype.cfg From 8cbf91d416aa2e2a7304ca42c04c8d9793d73fae Mon Sep 17 00:00:00 2001 From: Satrajit Ghosh Date: Sun, 13 Dec 2015 18:06:35 -0500 Subject: [PATCH 19/19] tst: enable provenance for testing on circle --- tools/run_examples.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/run_examples.py b/tools/run_examples.py index a7903c9276..ef8445bdb2 100644 --- a/tools/run_examples.py +++ b/tools/run_examples.py @@ -8,6 +8,7 @@ def run_examples(example, pipelines, plugin): print('running example: %s with plugin: %s' % (example, plugin)) from nipype import config config.enable_debug_mode() + config.enable_provenance() from nipype.interfaces.base import CommandLine CommandLine.set_default_terminal_output("stream") @@ -17,7 +18,9 @@ def run_examples(example, pipelines, plugin): wf.base_dir = os.path.join(os.getcwd(), 'output', example, plugin) if os.path.exists(wf.base_dir): rmtree(wf.base_dir) - wf.config = {'execution': {'hash_method': 'timestamp', 'stop_on_first_rerun': 'true'}} + wf.config = {'execution': {'hash_method': 'timestamp', + 'stop_on_first_rerun': 'true', + 'write_provenance': 'true'}} wf.run(plugin=plugin, plugin_args={'n_procs': 4}) # run twice to check if nothing is rerunning wf.run(plugin=plugin)