Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/jkobject/scDataLoader into …
Browse files Browse the repository at this point in the history
…main
  • Loading branch information
jkobject committed Sep 9, 2024
2 parents d2008c9 + 0bd5d40 commit ff4bfd9
Showing 1 changed file with 26 additions and 24 deletions.
50 changes: 26 additions & 24 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import lamindb as ln
# import lamindb as ln
import scanpy as sc
import numpy as np

# import numpy as np
import os
import time

from scdataloader.base import NAME
from scdataloader import utils

from scdataloader import DataModule
# from scdataloader import DataModule
from scdataloader import SimpleAnnDataset
from scdataloader import Collator
from scdataloader import Preprocessor
Expand All @@ -18,7 +19,8 @@

def test_base():
assert NAME == "scdataloader"
adata = sc.read_h5ad(os.path.join(os.path.dirname(__file__), "test.h5ad"))
adata = sc.read_h5ad(os.path.join(os.path.dirname(__file__), "test.h5ad"))

try:
print("populating ontology...")
start_time = time.time()
Expand All @@ -34,26 +36,26 @@ def test_base():
)
end_time = time.time()
print(f"ontology populated in {end_time - start_time:.2f} seconds")
# cx_dataset = (
# ln.Collection.using(instance="laminlabs/cellxgene")
# .filter(name="cellxgene-census", version="2023-12-15")
# .one()
# )
# datamodule = DataModule(
# collection_name="preprocessed dataset",
# organisms=["NCBITaxon:9606"], # organism that we will work on
# how="most expr", # for the collator (most expr genes only will be selected)
# max_len=1000, # only the 1000 most expressed
# batch_size=64,
# num_workers=1,
# validation_split=0.1,
# test_split=0,
# )
# for i in datamodule.train_dataloader():
# # pass #or do pass
# print(i)
# break
# assert True, "Datamodule test passed"
# cx_dataset = (
# ln.Collection.using(instance="laminlabs/cellxgene")
# .filter(name="cellxgene-census", version="2023-12-15")
# .one()
# )
# datamodule = DataModule(
# collection_name="preprocessed dataset",
# organisms=["NCBITaxon:9606"], # organism that we will work on
# how="most expr", # for the collator (most expr genes only will be selected)
# max_len=1000, # only the 1000 most expressed
# batch_size=64,
# num_workers=1,
# validation_split=0.1,
# test_split=0,
# )
# for i in datamodule.train_dataloader():
# # pass #or do pass
# print(i)
# break
# assert True, "Datamodule test passed"
preprocessor = Preprocessor(do_postp=False)
adata = preprocessor(adata)
adataset = SimpleAnnDataset(adata, obs_to_output=["organism_ontology_term_id"])
Expand Down

0 comments on commit ff4bfd9

Please sign in to comment.