Skip to content

Commit b7be2e4

Browse files
committed
add config options for narrow deep paths mutation
wip: narrow deep paths now creates a chain of var edges (test included) fix logging config bug on windows systems added to_find_edge_var_for_narrow_path_query
1 parent 79e2c54 commit b7be2e4

File tree

6 files changed

+169
-13
lines changed

6 files changed

+169
-13
lines changed

config/defaults.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@
7878
MUTPB_FV_SAMPLE_MAXN = 32 # max n of instantiations to sample from top k
7979
MUTPB_FV_QUERY_LIMIT = 256 # SPARQL query limit for the top k instantiations
8080
MUTPB_SP = 0.05 # prob to simplify pattern (warning: can restrict exploration)
81-
81+
MUTPB_DN = 0.05 # prob to try a deep and narrow paths mutation
82+
MUTPB_DN_MIN_LEN = 2 # minimum length of the deep and narrow paths
83+
MUTPB_DN_MAX_LEN = 10 # absolute max of path length if not stopped by term_pb
84+
MUTPB_DN_TERM_PB = 0.3 # prob to terminate node expansion each step > min_len
8285
# for import in helpers and __init__
8386
__all__ = [_v for _v in globals().keys() if _v.isupper()]

gp_learner.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -413,19 +413,53 @@ def mutate_del_triple(child):
413413
return new_child
414414

415415

416-
def mutate_expand_node(child, pb_en_out_link):
417-
# TODO: can maybe be improved by sparqling
418-
nodes = list(child.nodes)
419-
node = random.choice(nodes)
416+
def _mutate_expand_node_helper(node, pb_en_out_link=config.MUTPB_EN_OUT_LINK):
420417
var_edge = gen_random_var()
421418
var_node = gen_random_var()
422419
if random.random() < pb_en_out_link:
423420
new_triple = (node, var_edge, var_node)
424421
else:
425422
new_triple = (var_node, var_edge, node)
423+
return new_triple, var_node
424+
425+
426+
def mutate_expand_node(child, node=None):
427+
# TODO: can maybe be improved by sparqling
428+
if not node:
429+
nodes = list(child.nodes)
430+
node = random.choice(nodes)
431+
new_triple, _ = _mutate_expand_node_helper(node)
426432
return child + (new_triple,)
427433

428434

435+
def mutate_deep_narrow_path(
436+
child,
437+
min_len=config.MUTPB_DN_MIN_LEN,
438+
max_len=config.MUTPB_DN_MAX_LEN,
439+
term_pb=config.MUTPB_DN_TERM_PB,
440+
pb_en_out_link=config.MUTPB_EN_OUT_LINK,
441+
):
442+
assert isinstance(child, GraphPattern)
443+
nodes = list(child.nodes)
444+
start_node = random.choice(nodes)
445+
# target_nodes = set(nodes) - {start_node}
446+
gp = child
447+
hop = 0
448+
while True:
449+
if hop >= min_len and random.random() < term_pb:
450+
break
451+
if hop >= max_len:
452+
break
453+
hop += 1
454+
new_triple, var_node = _mutate_expand_node_helper(start_node)
455+
gp += [new_triple]
456+
start_node = var_node
457+
458+
# TODO: insert connection to a target node
459+
# TODO: fix edge or node ( to_count_var_over_values_query)
460+
return gp
461+
462+
429463
def mutate_add_edge(child):
430464
# TODO: can maybe be improved by sparqling
431465
nodes = list(child.nodes)
@@ -647,7 +681,6 @@ def mutate(
647681
pb_ae=config.MUTPB_AE,
648682
pb_dt=config.MUTPB_DT,
649683
pb_en=config.MUTPB_EN,
650-
pb_en_out_link=config.MUTPB_EN_OUT_LINK,
651684
pb_fv=config.MUTPB_FV,
652685
pb_id=config.MUTPB_ID,
653686
pb_iv=config.MUTPB_IV,
@@ -678,7 +711,7 @@ def mutate(
678711
child = mutate_del_triple(child)
679712

680713
if random.random() < pb_en:
681-
child = mutate_expand_node(child, pb_en_out_link)
714+
child = mutate_expand_node(child)
682715
if random.random() < pb_ae:
683716
child = mutate_add_edge(child)
684717

@@ -694,7 +727,6 @@ def mutate(
694727
else:
695728
children = [child]
696729

697-
698730
# TODO: deep & narrow paths mutation
699731

700732
children = {

graph_pattern.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@
4141
TARGET_VAR = Variable('target')
4242
ASK_VAR = Variable('ask')
4343
COUNT_VAR = Variable('count')
44-
45-
44+
EDGE_VAR_COUNT = Variable('edge_count_var')
45+
NODE_VAR_COUNT = Variable('node_count_var')
46+
MAX_NODE_COUNT = Variable('maximum node count')
47+
PRIO_VAR = Variable('priority')
4648
def gen_random_var():
4749
return Variable(RANDOM_VAR_PREFIX + ''.join(
4850
random.choice(string.ascii_letters + string.digits)
@@ -709,6 +711,90 @@ def to_count_var_over_values_query(self, var, vars_, values, limit):
709711
res += 'LIMIT %d\n' % limit
710712
return self._sparql_prefix(res)
711713

714+
def to_find_edge_var_for_narrow_path_query(self, edge_var, node_var,
715+
vars_, filter_node_count,
716+
filter_edge_count, limit_res):
717+
"""Counts possible substitutions for edge_var to get a narrow path
718+
719+
Meant to perform a query like this:
720+
SELECT *
721+
{
722+
{
723+
SELECT
724+
?edge_var
725+
(COUNT(*) AS ?edge_var_count)
726+
(MAX(?node_var_count) AS ?max_node_count)
727+
(COUNT(*)/AVG(?node_var_count) as ?prio_var)
728+
{
729+
SELECT DISTINCT
730+
?source ?target ?edge_var (COUNT(?node_var) AS ?node_var_count)
731+
{
732+
VALUES (?source ?target) {
733+
(dbr:Adolescence dbr:Youth)
734+
(dbr:Adult dbr:Child)
735+
(dbr:Angel dbr:Heaven)
736+
(dbr:Arithmetic dbr:Mathematics)
737+
}
738+
?node_var ?edge_var ?source .
739+
?source dbo:wikiPageWikiLink ?target .
740+
}
741+
}
742+
GROUP BY ?edge_var
743+
ORDER BY DESC(?edge_var_count)
744+
}
745+
FILTER(?max_node_count < 10 && ?edge_var_count > 1)
746+
}
747+
ORDER BY DESC(?prio_var)
748+
LIMIT 32
749+
750+
:param edge_var: Edge variable to find substitution for.
751+
:param node_var: Node variable to count.
752+
:param vars_: List of vars to fix values for (e.g. ?source, ?target).
753+
:param values: List of value lists for vars_.
754+
:param filter_node_count: Filter on node count of edge variable.
755+
:param filter_edge_count: Filter for edge count of triples.
756+
:param limit_res : limit result size
757+
:return: Query String.
758+
"""
759+
760+
res = 'SELECT * WHERE {\n'
761+
res += ' {\n'\
762+
' SELECT %s (COUNT(*) as %s) (Max(%s) AS %s) ' \
763+
' (COUNT(*)/AVG(%s) AS %s) WHERE {\n' % (
764+
edge_var.n3(), EDGE_VAR_COUNT.n3(),
765+
NODE_VAR_COUNT.n3(), MAX_NODE_COUNT.n3(),
766+
NODE_VAR_COUNT.n3(), PRIO_VAR.n3())
767+
res += ' SELECT DISTINCT %s %s (COUNT(%s) AS %s) WHERE {\n' % (
768+
' '.join([v.n3() for v in vars_]),
769+
edge_var.n3(), node_var.n3(), NODE_VAR_COUNT.n3())
770+
# res += self._sparql_values_part(values)
771+
res += 'VALUES(%s) {\n' \
772+
'(dbr: Adolescence dbr: Youth)' \
773+
'(dbr:Adult dbr:Child)' \
774+
'(dbr:Angel dbr:Heaven)' \
775+
'(dbr:Arithmetic dbr:Mathematics)' \
776+
'}\n' % (' '.join([v.n3() for v in vars_]))
777+
# triples part
778+
tres = []
779+
for s, p, o in self:
780+
tres.append('%s %s %s .' % (s.n3(), p.n3(), o.n3()))
781+
indent = ' ' * 3
782+
triples = indent + ('\n' + indent).join(tres) + '\n'
783+
res += triples
784+
res += ' }\n'\
785+
' }\n'
786+
res += ' GROUP BY %s\n' % edge_var.n3()
787+
res += ' ORDER BY DESC(%s)\n' % EDGE_VAR_COUNT.n3()
788+
res += ' }\n'
789+
res += ' FILTER(%s < %d && %s > %d)\n' % (MAX_NODE_COUNT.n3(),
790+
filter_node_count,
791+
EDGE_VAR_COUNT.n3(),
792+
filter_edge_count)
793+
res += '}\n'
794+
res += 'ORDER BY DESC(%s)\n' % PRIO_VAR.n3()
795+
res += 'LIMIT %d' % limit_res
796+
return self._sparql_prefix(res)
797+
712798
def to_dict(self):
713799
return {
714800
'fitness': self.fitness.values if self.fitness.valid else (),

logging_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def doRollover(self):
191191
# 'loggers': {} # all other loggers except for root
192192
}
193193

194-
if os.uname()[0] == 'Darwin':
194+
if os.name == 'posix' and os.uname()[0] == 'Darwin':
195195
logging_config['formatters']['tts_formatter'] = {
196196
'format': format_str_tts
197197
}

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ matplotlib==1.5.1
55
networkx==1.11
66
nose==1.3.7
77
numpy==1.11.0
8-
pygraphviz==1.3.1
8+
# pygraphviz==1.3.1
99
requests==2.9.1
1010
rdflib==4.2.1
1111
scikit-learn==0.17.1
12-
scipy==0.17.0
12+
scipy
1313
scoop==0.7.1.1
1414
six==1.10.0
1515
SPARQLWrapper==1.7.6

tests/test_gp_learner_offline.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from gp_learner import mutate_increase_dist
1414
from gp_learner import mutate_merge_var
1515
from gp_learner import mutate_simplify_pattern
16+
from gp_learner import mutate_deep_narrow_path
1617
from graph_pattern import GraphPattern
1718
from graph_pattern import SOURCE_VAR
1819
from graph_pattern import TARGET_VAR
@@ -108,6 +109,35 @@ def test_mutate_merge_var():
108109
assert False, "merge never reached one of the cases: %s" % cases
109110

110111

112+
def test_mutate_deep_narrow_path():
113+
p = Variable('p')
114+
gp = GraphPattern([
115+
(SOURCE_VAR, p, TARGET_VAR)
116+
])
117+
child = mutate_deep_narrow_path(gp)
118+
assert gp == child or len(child) > len(gp)
119+
print(gp)
120+
print(child)
121+
122+
123+
def test_to_find_edge_var_for_narrow_path_query():
124+
node_var = Variable('node_variable')
125+
edge_var = Variable('edge_variable')
126+
gp = GraphPattern([
127+
(node_var, edge_var, SOURCE_VAR),
128+
(SOURCE_VAR, wikilink, TARGET_VAR)
129+
])
130+
filter_node_count = 10
131+
filter_edge_count = 1
132+
limit_res = 32
133+
vars_ = {SOURCE_VAR,TARGET_VAR}
134+
res = GraphPattern.to_find_edge_var_for_narrow_path_query(gp, edge_var, node_var,
135+
vars_, filter_node_count,
136+
filter_edge_count, limit_res)
137+
print(gp)
138+
print(res)
139+
140+
111141
def test_simplify_pattern():
112142
gp = GraphPattern([(SOURCE_VAR, wikilink, TARGET_VAR)])
113143
res = mutate_simplify_pattern(gp)
@@ -271,3 +301,8 @@ def test_remaining_gain_sample_gtps():
271301

272302
def test_gtp_scores():
273303
assert gtp_scores - gtp_scores == 0
304+
305+
306+
if __name__ == '__main__':
307+
# test_mutate_deep_narrow_path()
308+
test_to_find_edge_var_for_narrow_path_query()

0 commit comments

Comments
 (0)