diff --git a/Makefile b/Makefile
index 943f354..232dc58 100644
--- a/Makefile
+++ b/Makefile
@@ -85,7 +85,10 @@ release:          ## Create a new tag for release.
 	@echo "creating git tag : $${TAG}"
 	@git tag $${TAG}
 	@git push -u origin HEAD --tags
+	@
 	@echo "Github Actions will detect the new tag and release the new version."
+	@mkdocs gh-deploy
+	@echo "docs published too"
 
 .PHONY: docs
 docs:             ## Build the documentation.
diff --git a/docs/dataloader.md b/docs/dataloader.md
new file mode 100644
index 0000000..05501de
--- /dev/null
+++ b/docs/dataloader.md
@@ -0,0 +1,4 @@
+# Documentation for `DataLoader`
+
+::: scdataloader.dataloader.DataLoader
+    handler: python
\ No newline at end of file
diff --git a/docs/notebooks/1_download_and_preprocess.ipynb b/docs/notebooks/1_download_and_preprocess.ipynb
index 0f638ec..3bc4633 100644
--- a/docs/notebooks/1_download_and_preprocess.ipynb
+++ b/docs/notebooks/1_download_and_preprocess.ipynb
@@ -63,7 +63,6 @@
     "\n",
     "import lamindb as ln\n",
     "import lnschema_bionty as lb\n",
-    "import pandas as pd\n",
     "\n",
     "lb.settings.organism = \"human\"\n",
     "\n",
diff --git a/docs/notebooks/2_create_dataloader.ipynb b/docs/notebooks/2_create_dataloader.ipynb
index 7246d9f..08a3ad4 100644
--- a/docs/notebooks/2_create_dataloader.ipynb
+++ b/docs/notebooks/2_create_dataloader.ipynb
@@ -14,7 +14,11 @@
    "outputs": [],
    "source": [
     "from scdataloader import Dataset\n",
-    "from scdataloader import dataLoader\n",
+    "from scdataloader import DataLoader\n",
+    "import pandas as pd\n",
+    "import lamindb as ln\n",
+    "import lnschema_bionty as lb\n",
+    "\n",
     "\n",
     "%load_ext autoreload\n",
     "%autoreload 2"
@@ -128,6 +132,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# see scprint for this or contact me (@jkobject)\n",
     "embeddings = embed(genedf=genedf,\n",
     "    organism=\"homo_sapiens\",\n",
     "    cache=True,\n",
@@ -189,7 +194,7 @@
    "source": [
     "# the dataloader can weight some rare samples more: \n",
     "# one need to provide the labels on which to weight the samples:\n",
-    "labels_weighted_sampling = hierarchical_labels+[\n",
+    "labels_weighted_sampling = [\n",
     "    'sex_ontology_term_id',\n",
     "    \"cell_type_ontology_term_id\",\n",
     "    #\"tissue_ontology_term_id\",\n",
@@ -246,7 +251,7 @@
    "source": [
     "#we then create a mapped dataset. This transforms a bunch of anndata from possibly various species, into a combined object that acts roughly as a single anndata dataset \n",
     "# (WIP to get all the features of an anndata object) \n",
-    "mdataset = Dataset(dataset, genedf, gene_embedding=embeddings, organisms=['\"NCBITaxon:9606\"'], obs=all_labels, encode_obs=labels_weighted_sampling, map_hierarchy=hierarchical_labels, )\n",
+    "mdataset = Dataset(dataset, genedf, gene_embedding=embeddings, organisms=[\"NCBITaxon:9606\"], obs=all_labels, encode_obs=labels_weighted_sampling)\n",
     "mdataset"
    ]
   },
@@ -257,7 +262,7 @@
    "outputs": [],
    "source": [
     "# now we make the dataloader\n",
-    "dataloader = BaseDataLoader(mdataset, label_to_weight=labels_weighted_sampling, batch_size=4, num_workers=1)\n",
+    "dataloader = DataLoader(mdataset, label_to_weight=labels_weighted_sampling, batch_size=4, num_workers=1)\n",
     "len(dataloader)"
    ]
   },
diff --git a/mkdocs.yml b/mkdocs.yml
index 1bcca38..f10c941 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -13,6 +13,7 @@ nav:
     - dataset: dataset.md
     - preprocess: preprocess.md
     - utils: utils.md
+    - dataloader: dataloader.md
 plugins: 
   - search
   - mkdocstrings:
diff --git a/scdataloader/__init__.py b/scdataloader/__init__.py
index e69de29..96584b3 100644
--- a/scdataloader/__init__.py
+++ b/scdataloader/__init__.py
@@ -0,0 +1,3 @@
+from .data import Dataset
+from .dataloader import DataLoader
+from .preprocess import Preprocessor
diff --git a/scdataloader/dataloader.py b/scdataloader/dataloader.py
new file mode 100644
index 0000000..d2c6c9a
--- /dev/null
+++ b/scdataloader/dataloader.py
@@ -0,0 +1,204 @@
+import numpy as np
+from torch.utils.data import DataLoader as TorchLoader
+from torch.utils.data.dataloader import default_collate
+from torch.utils.data.sampler import WeightedRandomSampler
+from scdataloader.mapped import MappedDataset
+from typing import Union
+import torch
+
+# TODO: put in config
+COARSE_TISSUE = {
+    "adipose tissue": "",
+    "bladder organ": "",
+    "blood": "",
+    "bone marrow": "",
+    "brain": "",
+    "breast": "",
+    "esophagus": "",
+    "eye": "",
+    "embryo": "",
+    "fallopian tube": "",
+    "gall bladder": "",
+    "heart": "",
+    "intestine": "",
+    "kidney": "",
+    "liver": "",
+    "lung": "",
+    "lymph node": "",
+    "musculature of body": "",
+    "nose": "",
+    "ovary": "",
+    "pancreas": "",
+    "placenta": "",
+    "skin of body": "",
+    "spinal cord": "",
+    "spleen": "",
+    "stomach": "",
+    "thymus": "",
+    "thyroid gland": "",
+    "tongue": "",
+    "uterus": "",
+}
+
+COARSE_ANCESTRY = {
+    "African": "",
+    "Chinese": "",
+    "East Asian": "",
+    "Eskimo": "",
+    "European": "",
+    "Greater Middle Eastern  (Middle Eastern, North African or Persian)": "",
+    "Hispanic or Latin American": "",
+    "Native American": "",
+    "Oceanian": "",
+    "South Asian": "",
+}
+
+COARSE_DEVELOPMENT_STAGE = {
+    "Embryonic human": "",
+    "Fetal": "",
+    "Immature": "",
+    "Mature": "",
+}
+
+COARSE_ASSAY = {
+    "10x 3'": "",
+    "10x 5'": "",
+    "10x multiome": "",
+    "CEL-seq2": "",
+    "Drop-seq": "",
+    "GEXSCOPE technology": "",
+    "inDrop": "",
+    "microwell-seq": "",
+    "sci-Plex": "",
+    "sci-RNA-seq": "",
+    "Seq-Well": "",
+    "Slide-seq": "",
+    "Smart-seq": "",
+    "SPLiT-seq": "",
+    "TruDrop": "",
+    "Visium Spatial Gene Expression": "",
+}
+
+
+class DataLoader(TorchLoader):
+    """
+    Base class for all data loaders
+    """
+
+    def __init__(
+        self,
+        mapped_dataset: MappedDataset,
+        batch_size: int = 32,
+        weight_scaler: int = 30,
+        label_to_weight: list = [],
+        validation_split: float = 0.2,
+        num_workers: int = 4,
+        collate_fn=default_collate,
+        sampler=None,
+        **kwargs,
+    ):
+        self.validation_split = validation_split
+        self.dataset = mapped_dataset
+
+        self.batch_idx = 0
+        self.batch_size = batch_size
+        self.n_samples = len(self.dataset)
+        if sampler is None:
+            self.sampler, self.valid_sampler = self._split_sampler(
+                self.validation_split,
+                weight_scaler=weight_scaler,
+                label_to_weight=label_to_weight,
+            )
+        else:
+            self.sampler = sampler
+            self.valid_sampler = None
+
+        self.init_kwargs = {
+            "dataset": self.dataset,
+            "batch_size": batch_size,
+            "collate_fn": collate_fn,
+            "num_workers": num_workers,
+        }
+        super().__init__(sampler=self.sampler, **self.init_kwargs, **kwargs)
+
+    def _split_sampler(self, split, label_to_weight=[], weight_scaler: int = 30):
+        idx_full = np.arange(self.n_samples)
+        np.random.shuffle(idx_full)
+        if len(label_to_weight) > 0:
+            weights = self.dataset.get_label_weights(
+                label_to_weight, scaler=weight_scaler
+            )
+        else:
+            weights = np.ones(self.n_samples)
+        if isinstance(split, int):
+            assert (
+                split < self.n_samples
+            ), "validation set size is configured to be larger than entire dataset."
+            len_valid = split
+        else:
+            len_valid = int(self.n_samples * split)
+        if len_valid == 0:
+            self.train_idx = idx_full
+        else:
+            self.valid_idx = idx_full[0:len_valid]
+            self.train_idx = np.delete(idx_full, np.arange(0, len_valid))
+            valid_weights = weights.copy()
+            valid_weights[self.train_idx] = 0
+            # TODO: should we do weighted random sampling for validation set?
+            valid_sampler = WeightedRandomSampler(
+                valid_weights, len_valid, replacement=True
+            )
+        train_weights = weights.copy()
+        train_weights[self.valid_idx] = 0
+        train_sampler = WeightedRandomSampler(
+            train_weights, len(self.train_idx), replacement=True
+        )
+        # turn off shuffle option which is mutually exclusive with sampler
+
+        return (
+            (train_sampler, valid_sampler) if len_valid != 0 else (train_sampler, None)
+        )
+
+    def get_valid_dataloader(self):
+        if self.valid_sampler is None:
+            raise ValueError("No validation set is configured.")
+        return DataLoader(
+            self.dataset, batch_size=self.batch_size, sampler=self.valid_sampler
+        )
+
+
+def weighted_random_mask_value(
+    values: Union[torch.Tensor, np.ndarray],
+    mask_ratio: float = 0.15,
+    mask_value: int = -1,
+    important_elements: Union[torch.Tensor, np.ndarray] = np.array([]),
+    important_weight: int = 0,
+    pad_value: int = 0,
+) -> torch.Tensor:
+    """
+    Randomly mask a batch of data.
+
+    Args:
+        values (array-like):
+            A batch of tokenized data, with shape (batch_size, n_features).
+        mask_ratio (float): The ratio of genes to mask, default to 0.15.
+        mask_value (int): The value to mask with, default to -1.
+        pad_value (int): The value of padding in the values, will be kept unchanged.
+
+    Returns:
+        torch.Tensor: A tensor of masked data.
+    """
+    if isinstance(values, torch.Tensor):
+        # it is crutial to clone the tensor, otherwise it changes the original tensor
+        values = values.clone().detach().numpy()
+    else:
+        values = values.copy()
+
+    for i in range(len(values)):
+        row = values[i]
+        non_padding_idx = np.nonzero(row - pad_value)[0]
+        non_padding_idx = np.setdiff1d(non_padding_idx, do_not_pad_index)
+        n_mask = int(len(non_padding_idx) * mask_ratio)
+        mask_idx = np.random.choice(non_padding_idx, n_mask, replace=False)
+        row[mask_idx] = mask_value
+    return torch.from_numpy(values).float()