-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Verify .map can run parallel classes (#802)
* Add integration test confirming it works. * Add documentation explaining how. * Add support for viewing the readthedocs documentation before committing.
- Loading branch information
1 parent
baedef8
commit 241bba5
Showing
6 changed files
with
86 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,3 +39,4 @@ luna_traces/ | |
traces/ | ||
apps/jupyter/bind_dir/poetry_cache | ||
apps/query-ui/cache_dir | ||
docs/build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
lib/sycamore/sycamore/tests/integration/transforms/test_map.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import sycamore | ||
import logging | ||
import math | ||
import time | ||
import uuid | ||
from sycamore.data import Document | ||
from ray.data import ActorPoolStrategy | ||
|
||
|
||
def make_docs(num): | ||
docs = [] | ||
for i in range(num): | ||
doc = Document({"doc_id": f"doc_{i}"}) | ||
docs.append(doc) | ||
|
||
return docs | ||
|
||
|
||
ctx = sycamore.init() | ||
|
||
|
||
def test_map_class_parallelism(): | ||
class AgentMark: | ||
def __init__(self): | ||
self.id = uuid.uuid4() | ||
logging.error("Start AgentMark {self.id}") | ||
|
||
def __call__(self, d): | ||
logging.error(f"Call AgentMark {self.id} on {d.doc_id}") | ||
time.sleep(1) | ||
d.properties["agent"] = self.id | ||
return d | ||
|
||
num_actors = 4 | ||
num_docs = 20 | ||
docs = ctx.read.document(make_docs(num_docs)).map(AgentMark, compute=ActorPoolStrategy(size=num_actors)).take() | ||
|
||
count = {} | ||
for d in docs: | ||
a = d.properties["agent"] | ||
count[a] = count.get(a, 0) + 1 | ||
|
||
assert len(count) == num_actors | ||
# Provide +-1 slop on perfectly even distribution. | ||
# given the sleep we probably will get perfect distribution | ||
min_count = math.floor(num_docs / num_actors - 1) | ||
max_count = math.ceil(num_docs / num_actors + 1) | ||
print("Expecting count to be between {min_count} and {max_count}") | ||
for a in count: | ||
print(f"Actor {a} got {count[a]} items") | ||
assert count[a] >= min_count | ||
assert count[a] <= max_count |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters