Skip to content

Commit ce3a6a4

Browse files
committed
Introduce explict endpoints for Wcc
1 parent f65074f commit ce3a6a4

File tree

2 files changed

+242
-0
lines changed

2 files changed

+242
-0
lines changed

graphdatascience/procedure_surface/api/__init__.py

Whitespace-only changes.
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Any, List, Optional
3+
4+
from pandas import DataFrame, Series
5+
6+
from ...graph.graph_object import Graph
7+
8+
9+
class WccEndpoints(ABC):
10+
"""
11+
Abstract base class defining the API for the Weakly Connected Components (WCC) algorithm.
12+
"""
13+
14+
@abstractmethod
15+
def mutate(
16+
self,
17+
G: Graph,
18+
mutate_property: str,
19+
threshold: Optional[float] = None,
20+
relationship_types: Optional[List[str]] = None,
21+
node_labels: Optional[List[str]] = None,
22+
sudo: Optional[bool] = None,
23+
log_progress: Optional[bool] = None,
24+
username: Optional[str] = None,
25+
concurrency: Optional[Any] = None,
26+
job_id: Optional[Any] = None,
27+
seed_property: Optional[str] = None,
28+
consecutive_ids: Optional[bool] = None,
29+
relationship_weight_property: Optional[str] = None,
30+
) -> Series[Any]:
31+
"""
32+
Executes the WCC algorithm and writes the results to the in-memory graph as node properties.
33+
34+
Parameters
35+
----------
36+
G : Graph
37+
The graph to run the algorithm on
38+
mutate_property : str
39+
The property name to store the component ID for each node
40+
threshold : Optional[float], default=None
41+
The minimum required weight to consider a relationship during traversal
42+
relationship_types : Optional[List[str]], default=None
43+
The relationship types to project
44+
node_labels : Optional[List[str]], default=None
45+
The node labels to project
46+
sudo : Optional[bool], default=None
47+
Run analysis with admin permission
48+
log_progress : Optional[bool], default=None
49+
Whether to log progress
50+
username : Optional[str], default=None
51+
The username to attribute the procedure run to
52+
concurrency : Optional[Any], default=None
53+
The number of concurrent threads
54+
job_id : Optional[Any], default=None
55+
An identifier for the job
56+
seed_property : Optional[str], default=None
57+
Defines node properties that are used as initial component identifiers
58+
consecutive_ids : Optional[bool], default=None
59+
Flag to decide whether component identifiers are mapped into a consecutive id space
60+
relationship_weight_property : Optional[str], default=None
61+
The property name that contains weight
62+
63+
Returns
64+
-------
65+
Series
66+
Algorithm metrics and statistics
67+
"""
68+
pass
69+
70+
@abstractmethod
71+
def stats(
72+
self,
73+
G: Graph,
74+
threshold: Optional[float] = None,
75+
relationship_types: Optional[List[str]] = None,
76+
node_labels: Optional[List[str]] = None,
77+
sudo: Optional[bool] = None,
78+
log_progress: Optional[bool] = None,
79+
username: Optional[str] = None,
80+
concurrency: Optional[Any] = None,
81+
job_id: Optional[Any] = None,
82+
seed_property: Optional[str] = None,
83+
consecutive_ids: Optional[bool] = None,
84+
relationship_weight_property: Optional[str] = None,
85+
) -> Series[Any]:
86+
"""
87+
Executes the WCC algorithm and returns statistics.
88+
89+
Parameters
90+
----------
91+
G : Graph
92+
The graph to run the algorithm on
93+
threshold : Optional[float], default=None
94+
The minimum required weight to consider a relationship during traversal
95+
relationship_types : Optional[List[str]], default=None
96+
The relationship types to project
97+
node_labels : Optional[List[str]], default=None
98+
The node labels to project
99+
sudo : Optional[bool], default=None
100+
Run analysis with admin permission
101+
log_progress : Optional[bool], default=None
102+
Whether to log progress
103+
username : Optional[str], default=None
104+
The username to attribute the procedure run to
105+
concurrency : Optional[Any], default=None
106+
The number of concurrent threads
107+
job_id : Optional[Any], default=None
108+
An identifier for the job
109+
seed_property : Optional[str], default=None
110+
Defines node properties that are used as initial component identifiers
111+
consecutive_ids : Optional[bool], default=None
112+
Flag to decide whether component identifiers are mapped into a consecutive id space
113+
relationship_weight_property : Optional[str], default=None
114+
The property name that contains weight
115+
116+
Returns
117+
-------
118+
Series
119+
Algorithm metrics and statistics
120+
"""
121+
pass
122+
123+
@abstractmethod
124+
def stream(
125+
self,
126+
G: Graph,
127+
min_component_size: Optional[int] = None,
128+
threshold: Optional[float] = None,
129+
relationship_types: Optional[List[str]] = None,
130+
node_labels: Optional[List[str]] = None,
131+
sudo: Optional[bool] = None,
132+
log_progress: Optional[bool] = None,
133+
username: Optional[str] = None,
134+
concurrency: Optional[Any] = None,
135+
job_id: Optional[Any] = None,
136+
seed_property: Optional[str] = None,
137+
consecutive_ids: Optional[bool] = None,
138+
relationship_weight_property: Optional[str] = None,
139+
) -> DataFrame:
140+
"""
141+
Executes the WCC algorithm and returns a stream of results.
142+
143+
Parameters
144+
----------
145+
G : Graph
146+
The graph to run the algorithm on
147+
min_component_size : Optional[int], default=None
148+
Don't stream components with fewer nodes than this
149+
threshold : Optional[float], default=None
150+
The minimum required weight to consider a relationship during traversal
151+
relationship_types : Optional[List[str]], default=None
152+
The relationship types to project
153+
node_labels : Optional[List[str]], default=None
154+
The node labels to project
155+
sudo : Optional[bool], default=None
156+
Run analysis with admin permission
157+
log_progress : Optional[bool], default=None
158+
Whether to log progress
159+
username : Optional[str], default=None
160+
The username to attribute the procedure run to
161+
concurrency : Optional[Any], default=None
162+
The number of concurrent threads
163+
job_id : Optional[Any], default=None
164+
An identifier for the job
165+
seed_property : Optional[str], default=None
166+
Defines node properties that are used as initial component identifiers
167+
consecutive_ids : Optional[bool], default=None
168+
Flag to decide whether component identifiers are mapped into a consecutive id space
169+
relationship_weight_property : Optional[str], default=None
170+
The property name that contains weight
171+
172+
Returns
173+
-------
174+
DataFrame
175+
DataFrame with the algorithm results
176+
"""
177+
pass
178+
179+
@abstractmethod
180+
def write(
181+
self,
182+
G: Graph,
183+
write_property: str,
184+
min_component_size: Optional[int] = None,
185+
threshold: Optional[float] = None,
186+
relationship_types: Optional[List[str]] = None,
187+
node_labels: Optional[List[str]] = None,
188+
sudo: Optional[bool] = None,
189+
log_progress: Optional[bool] = None,
190+
username: Optional[str] = None,
191+
concurrency: Optional[Any] = None,
192+
job_id: Optional[Any] = None,
193+
seed_property: Optional[str] = None,
194+
consecutive_ids: Optional[bool] = None,
195+
relationship_weight_property: Optional[str] = None,
196+
write_concurrency: Optional[Any] = None,
197+
write_to_result_store: Optional[bool] = None,
198+
) -> Series[Any]:
199+
"""
200+
Executes the WCC algorithm and writes the results to the Neo4j database.
201+
202+
Parameters
203+
----------
204+
G : Graph
205+
The graph to run the algorithm on
206+
write_property : str
207+
The property name to write component IDs to
208+
min_component_size : Optional[int], default=None
209+
Don't write components with fewer nodes than this
210+
threshold : Optional[float], default=None
211+
The minimum required weight to consider a relationship during traversal
212+
relationship_types : Optional[List[str]], default=None
213+
The relationship types to project
214+
node_labels : Optional[List[str]], default=None
215+
The node labels to project
216+
sudo : Optional[bool], default=None
217+
Run analysis with admin permission
218+
log_progress : Optional[bool], default=None
219+
Whether to log progress
220+
username : Optional[str], default=None
221+
The username to attribute the procedure run to
222+
concurrency : Optional[Any], default=None
223+
The number of concurrent threads
224+
job_id : Optional[Any], default=None
225+
An identifier for the job
226+
seed_property : Optional[str], default=None
227+
Defines node properties that are used as initial component identifiers
228+
consecutive_ids : Optional[bool], default=None
229+
Flag to decide whether component identifiers are mapped into a consecutive id space
230+
relationship_weight_property : Optional[str], default=None
231+
The property name that contains weight
232+
write_concurrency : Optional[Any], default=None
233+
The number of concurrent threads during the write phase
234+
write_to_result_store : Optional[bool], default=None
235+
Whether to write the results to the result store
236+
237+
Returns
238+
-------
239+
Series
240+
Algorithm metrics and statistics
241+
"""
242+
pass

0 commit comments

Comments
 (0)