Skip to content

Commit 0bdd631

Browse files
author
Ubuntu
committed
Review feedback
1 parent c61e60f commit 0bdd631

File tree

8 files changed

+64
-18
lines changed

8 files changed

+64
-18
lines changed

nucleus/annotation.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,13 @@ def to_json(self) -> str:
7171
"""Serializes annotation object to schematized JSON string."""
7272
return json.dumps(self.to_payload(), allow_nan=False)
7373

74-
def has_local_files(self) -> bool:
74+
def has_local_files_to_upload(self) -> bool:
75+
"""Returns True if annotation has local files that need to be uploaded.
76+
77+
Nearly all subclasses have no local files, so we default this to just return
78+
false. If the subclass has local files, it should override this method (but
79+
that is not the only thing required to get local upload of files to work.)
80+
"""
7581
return False
7682

7783

@@ -582,7 +588,8 @@ def to_payload(self) -> dict:
582588

583589
return payload
584590

585-
def has_local_files(self) -> bool:
591+
def has_local_files_to_upload(self) -> bool:
592+
"""Check if the mask url is local and needs to be uploaded."""
586593
if is_local_path(self.mask_url):
587594
if not os.path.isfile(self.mask_url):
588595
raise Exception(f"Mask file {self.mask_url} does not exist.")

nucleus/annotation_uploader.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ def accumulate_dict_values(dicts: Iterable[dict]):
3232

3333

3434
class AnnotationUploader:
35+
"""This is a helper class not intended for direct use. Please use dataset.annotate.
36+
37+
This class is purely a helper class for implementing dataset.annotate.
38+
"""
39+
3540
def __init__(self, dataset_id: str, client: "NucleusClient"): # noqa: F821
3641
self.dataset_id = dataset_id
3742
self._client = client
@@ -45,14 +50,15 @@ def upload(
4550
local_files_per_upload_request: int = 10,
4651
local_file_upload_concurrency: int = 30,
4752
):
53+
"""For more details on parameters and functionality, see dataset.annotate."""
4854
if local_files_per_upload_request > 10:
4955
raise ValueError("local_files_per_upload_request must be <= 10")
5056
annotations_without_files: List[Annotation] = []
5157
segmentations_with_local_files: List[SegmentationAnnotation] = []
5258
segmentations_with_remote_files: List[SegmentationAnnotation] = []
5359

5460
for annotation in annotations:
55-
if annotation.has_local_files():
61+
if annotation.has_local_files_to_upload():
5662
# Only segmentations have local files currently, and probably for a long
5763
# time to to come.
5864
assert isinstance(annotation, SegmentationAnnotation)

nucleus/async_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class FileFormField:
2929

3030

3131
async def gather_with_concurrency(n, *tasks):
32+
"""Helper method to limit the concurrency when gathering the results from multiple tasks."""
3233
semaphore = asyncio.Semaphore(n)
3334

3435
async def sem_task(task):
@@ -107,6 +108,9 @@ def make_many_form_data_requests_concurrently(
107108
requests: Each requst should be a FormDataContextHandler object which will
108109
handle generating form data, and opening/closing files for each request.
109110
route: route for the request.
111+
progressbar: A tqdm progress bar to use for showing progress to the user.
112+
concurrency: How many concurrent requests to run at once. Should be exposed
113+
to the user.
110114
"""
111115
loop = get_event_loop()
112116
return loop.run_until_complete(
@@ -168,7 +172,7 @@ async def _post_form_data(
168172

169173
logger.info("Posting to %s", endpoint)
170174

171-
for sleep_time in RetryStrategy.sleep_times + [-1]:
175+
for sleep_time in RetryStrategy.sleep_times() + [-1]:
172176
with request as form:
173177
async with session.post(
174178
endpoint,

nucleus/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def make_request(
5555

5656
logger.info("Make request to %s", endpoint)
5757

58-
for retry_wait_time in RetryStrategy.sleep_times:
58+
for retry_wait_time in RetryStrategy.sleep_times():
5959
response = requests_command(
6060
endpoint,
6161
json=payload,

nucleus/retry_strategy.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
# TODO: use retry library instead of custom code. Tenacity is one option.
2+
import random
3+
4+
25
class RetryStrategy:
36
statuses = {503, 524, 520, 504}
4-
sleep_times = [1, 3, 9, 27] # These are in seconds
7+
8+
@staticmethod
9+
def sleep_times():
10+
sleep_times = [1, 3, 9, 27] # These are in seconds
11+
12+
return [2 * random.random() * t for t in sleep_times]

tests/helpers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ def reference_id_from_url(url):
304304
this_dir = os.path.dirname(os.path.realpath(__file__))
305305
TEST_LOCAL_MASK_URL = os.path.join(this_dir, "testdata/000000000285.png")
306306

307+
308+
NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET = len(TEST_DATASET_ITEMS)
307309
TEST_SEGMENTATION_ANNOTATIONS = [
308310
{
309311
"reference_id": reference_id_from_url(TEST_IMG_URLS[i]),

tests/test_dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ def test_dataset_append_local(CLIENT, dataset):
262262
reference_id="bad",
263263
)
264264
]
265+
num_local_items_to_test = 10
265266
with pytest.raises(ValueError) as e:
266267
dataset.append(ds_items_local_error)
267268
assert "Out of range float values are not JSON compliant" in str(
@@ -273,15 +274,15 @@ def test_dataset_append_local(CLIENT, dataset):
273274
metadata={"test": 0},
274275
reference_id=LOCAL_FILENAME.split("/")[-1] + str(i),
275276
)
276-
for i in range(1000)
277+
for i in range(num_local_items_to_test)
277278
]
278279

279280
response = dataset.append(ds_items_local)
280281

281282
assert isinstance(response, UploadResponse)
282283
resp_json = response.json()
283284
assert resp_json[DATASET_ID_KEY] == dataset.id
284-
assert resp_json[NEW_ITEMS] == 1000
285+
assert resp_json[NEW_ITEMS] == num_local_items_to_test
285286
assert resp_json[UPDATED_ITEMS] == 0
286287
assert resp_json[IGNORED_ITEMS] == 0
287288
assert resp_json[ERROR_ITEMS] == 0

tests/test_segmentation.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from nucleus.annotation import SegmentationAnnotation
22
from nucleus.dataset import Dataset
33
from tests.helpers import (
4+
NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET,
45
TEST_LOCAL_MASK_URL,
56
TEST_SEGMENTATION_ANNOTATIONS,
67
assert_segmentation_annotation_matches_dict,
@@ -38,15 +39,17 @@ def test_batch_local_semseg_gt_upload(dataset: Dataset):
3839
request_annotation.mask_url = TEST_LOCAL_MASK_URL
3940
response = dataset.annotate(annotations=request_annotations)
4041

41-
print(request_annotations)
42-
print(response)
43-
4442
assert response["dataset_id"] == dataset.id
45-
assert response["annotations_processed"] == 4
43+
assert (
44+
response["annotations_processed"]
45+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
46+
)
4647
assert response["annotations_ignored"] == 0
4748
assert bad_reference_id in response["errors"][0]
4849

49-
for request_annotation in request_annotations[:4]:
50+
for request_annotation in request_annotations[
51+
:NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
52+
]:
5053
response_annotation = dataset.refloc(request_annotation.reference_id)[
5154
"annotations"
5255
]["segmentation"][0]
@@ -78,7 +81,10 @@ def test_batch_semseg_gt_upload(dataset):
7881
]
7982
response = dataset.annotate(annotations=annotations)
8083
assert response["dataset_id"] == dataset.id
81-
assert response["annotations_processed"] == 4
84+
assert (
85+
response["annotations_processed"]
86+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
87+
)
8288
assert response["annotations_ignored"] == 0
8389

8490

@@ -90,14 +96,20 @@ def test_batch_semseg_gt_upload_ignore(dataset):
9096
]
9197
response = dataset.annotate(annotations=annotations)
9298
assert response["dataset_id"] == dataset.id
93-
assert response["annotations_processed"] == 4
99+
assert (
100+
response["annotations_processed"]
101+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
102+
)
94103
assert response["annotations_ignored"] == 0
95104

96105
# When we re-upload, expect them to be ignored
97106
response = dataset.annotate(annotations=annotations)
98107
assert response["dataset_id"] == dataset.id
99108
assert response["annotations_processed"] == 0
100-
assert response["annotations_ignored"] == 4
109+
assert (
110+
response["annotations_ignored"]
111+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
112+
)
101113

102114

103115
def test_batch_semseg_gt_upload_update(dataset):
@@ -108,11 +120,17 @@ def test_batch_semseg_gt_upload_update(dataset):
108120
]
109121
response = dataset.annotate(annotations=annotations)
110122
assert response["dataset_id"] == dataset.id
111-
assert response["annotations_processed"] == 4
123+
assert (
124+
response["annotations_processed"]
125+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
126+
)
112127
assert response["annotations_ignored"] == 0
113128

114129
# When we re-upload, expect uploads to be processed
115130
response = dataset.annotate(annotations=annotations, update=True)
116131
assert response["dataset_id"] == dataset.id
117-
assert response["annotations_processed"] == 4
132+
assert (
133+
response["annotations_processed"]
134+
== NUM_VALID_SEGMENTATIONS_IN_MAIN_DATASET
135+
)
118136
assert response["annotations_ignored"] == 0

0 commit comments

Comments
 (0)