Skip to content

Commit

Permalink
Merge branch 'master' into mk/ann_fork
Browse files Browse the repository at this point in the history
  • Loading branch information
karasikov committed Dec 3, 2021
2 parents 2ac20d1 + 1cf7a6e commit f01dab1
Show file tree
Hide file tree
Showing 108 changed files with 9,388 additions and 2,740 deletions.
78 changes: 78 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ name: MetaGraph CI

on: [push]

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}


jobs:

Linux:
Expand Down Expand Up @@ -173,6 +178,79 @@ jobs:
- name: run integration tests
run: cd metagraph/build && make check

Build-and-Push-Docker:
# adapted from https://docs.github.com/en/actions/guides/publishing-docker-images#publishing-images-to-github-packages
if: github.ref == 'refs/heads/master'
needs: [Linux]
runs-on: ubuntu-20.04
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: checkout submodules
run: git submodule update --init --recursive

- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
uses: docker/build-push-action@v2
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}


Metagraph-Workflows:
name: Test metagraph workflows
runs-on: ubuntu-20.04
needs: [Linux]

steps:
- uses: actions/checkout@v2

- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8

- name: fetch static binary
uses: actions/download-artifact@v2
with:
path: artifacts

- name: setup metagraph binary
run: |
sudo ln -s $(pwd)/artifacts/metagraph_DNA_linux_x86/metagraph_DNA /usr/local/bin/metagraph
sudo chmod +rx /usr/local/bin/metagraph
/usr/local/bin/metagraph --help
metagraph --help
- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install -r metagraph/workflows/requirements.txt
- name: Test metagraph-workflows pytest
run: |
cd metagraph/workflows
pytest
Release:
name: Create Release
if: contains(github.ref, 'tags/v')
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ FROM ubuntu:20.04
ARG CODE_BASE

# the image used in production. It contains a basic runtime environment for metagraph without build tools along with
# the metagraph binary and python API code. This image is published on dockerhub (`ratschlab/metagraph`).
# the metagraph binary and python API code. This image is published on github's container registry (`ghcr.io/ratschlab/metagraph`).

RUN apt-get update && apt-get install -y \
libatomic1 \
Expand Down
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,31 @@ At the same time, the provided workflows and their careful implementation, combi

## Install

See [docs online](https://metagraph.ethz.ch/static/docs/index.html).
### Conda

Install the [latest release](https://github.com/ratschlab/metagraph/releases/latest) on Linux or Mac OS X with Anaconda:

```
conda install -c bioconda -c conda-forge metagraph
```

### Docker

If docker is available on the system, immediately get started with

```
docker run -v ${HOME}:/mnt ghcr.io/ratschlab/metagraph:master build -v -k 10 \
-o /mnt/transcripts_1000 \
/mnt/transcripts_1000.fa
```

(Replace `${HOME}` with a directory on the host system to map it under `/mnt` in the container.)

All different versions of the container are listed [here](https://github.com/ratschlab/metagraph/pkgs/container/metagraph).

### Install From Sources

To compile from source, see [documentation online](https://metagraph.ethz.ch/static/docs/installation.html#install-from-source) (e.g., for builds with custom configurations).


## Typical workflow
Expand Down
1 change: 1 addition & 0 deletions metagraph/api/python/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ Usage
For more examples, see `notebooks
<./notebooks>`_.

4 changes: 0 additions & 4 deletions metagraph/api/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@
'Programming Language :: Python :: 3.6',
],
description="Metagraph Toolkit",
entry_points={
'console_scripts': [
],
},
install_requires=requirements,
license="MIT license",
long_description=readme,
Expand Down
5 changes: 1 addition & 4 deletions metagraph/api/python/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _load_json_data(filename):

@pytest.mark.parametrize("file_name,align,expected_shape", [
('search_response.json', False, (4, 15)),
('search_with_align_response.json', True, (354, 18))
('search_with_align_response.json', True, (354, 15))
])
def test_df_from_search_result(file_name, align, expected_shape):
json_obj = _load_json_data(file_name)
Expand All @@ -27,9 +27,6 @@ def test_df_from_search_result(file_name, align, expected_shape):
'metasub_name', 'num_reads', 'sample_type', 'station',
'surface_material', 'seq_description']

if align:
expected_cols = expected_cols + ['sequence', 'score', 'cigar']

assert list(df.columns) == expected_cols


Expand Down
3 changes: 1 addition & 2 deletions metagraph/docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ framework, a software platform for indexing and analysis of very large sequence

installation.rst
quick_start.rst
workflows.rst
api.rst
sequence_search.rst
sequence_assembly.rst
resources.rst


6 changes: 4 additions & 2 deletions metagraph/docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ Docker container

If docker is available on your system, you can immediately get started with::

docker run -v ${DATA_DIR_HOST}:/mnt ratschlab/metagraph \
docker run -v ${DATA_DIR_HOST}:/mnt ghcr.io/ratschlab/metagraph:latest \
build -v -k 10 -o /mnt/transcripts_1000 /mnt/transcripts_1000.fa


where you'd need to replace ``${DATA_DIR_HOST}`` with a directory on the host system to map it
under ``/mnt`` in the container. This docker container uses the latest version of MetaGraph from
the source `GitHub repository <https://github.com/ratschlab/metagraph>`_ (branch ``master``).
See also the `image overview <https://github.com/ratschlab/metagraph/pkgs/container/metagraph>`_ for
other versions of the metagraph image.


Install from source
Expand Down Expand Up @@ -131,7 +133,7 @@ To compile MetaGraph, please follow these steps.
git clone --recursive https://github.com/ratschlab/metagraph.git

#. Change into the ``metagraph`` directory::

cd metagraph

#. Make sure all submodules have been downloaded::
Expand Down
105 changes: 105 additions & 0 deletions metagraph/docs/source/workflows.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
=========
Workflows
=========

This package provides workflows for the `metagraph framework
<https://metagraph.ethz.ch>`_


Workflows for Creating Graphs and Annotations
---------------------------------------------

Since the creation of graph and indices comprises several steps, this package provides
some support to simplify these tasks - in particular for standard cases.

Given some raw sequence data and a few options like the kmer size (`k`) graphs and annotations
are automatically built:

.. code-block:: bash
metagraph-workflows build -k 5 transcript_paths.txt /tmp/mygraph
If you prefer invoking the workflow from within a python script, the following is equivalent:

.. code-block:: python
from metagraph_workflows import workflows
workflows.run_build_workflow('/tmp/mygraph', seqs_file_list_path='transcript_paths.txt', k=5)
The workflow logic itself is expressed as a `Snakemake workflow
<https://snakemake.readthedocs.io/>`_ . You can also directly invoke the workflows
using the `snakemake` command line tool (see below).


Installation and Set up
~~~~~~~~~~~~~~~~~~~~~~~


Set up a conda environment and install the necessary packages using:

.. code-block:: bash
conda create -n metagraph-workflows python=3.8
conda activate metagraph-workflows
conda install -c bioconda -c conda-forge metagraph
pip install -U "git+https://github.com/ratschlab/metagraph.git#subdirectory=metagraph/workflows"
Usage Example
~~~~~~~~~~~~~

Typically, the following steps would be performed:

1. sequence file preparation: add your sequence files of interest into a directory.
2. running workflow: you can invoke the workflow using ``metagraph-workflows build``. Important parameters you may consider tuning are:

* k
* primary vs non primary graph creation
* annotation label source: ``sequence_headers`` or ``sequence_file_names``

An example invocation:

.. code-block:: bash
metagraph-workflows build -k 31 \
--seqs-dir-path [PATH_TO_SEQUENCES] \
--annotation-labels-source sequence_headers \
--build-primary-graph
[OUTPUT_DIR]
see ``metagraph-workflows build --help`` for more help
3. do queries: once you created the indices you can query either by using the command line
query tool or starting the metagraph server on your laptop or another suitable machine and access
do queries using e.g. the python :ref:`API` client.


There is also a `jupyter notebook <https://github.com/ratschlab/metagraph/blob/master/metagraph/workflows/notebooks/workflow_end_to_end_example.ipynb>`_ walking you through an example from indexing to api querying.



Workflow Management
~~~~~~~~~~~~~~~~~~~

The following snakemake options are exposed in the ``build`` subcommand

* ``--dryrun``: see what workflow steps would be done
* ``--force`` (corresponds to ``--forceall`` in snakemake): force run all steps


Directly Invoking Snakemake Workflow
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The above command is only a wrapper around a snakemake workflow. You can also
directly invoke the snakemake workflow (assuming you checked out the `metagraph git repository <https://github.com/ratschlab/metagraph>`_):

.. code-block:: bash
cd metagraph/workflows
snakemake --forceall --configfile default.yml \
--config k=5 seqs_file_list_path='transcript_paths.txt' output_directory=/tmp/mygraph \
annotation_labels_source=sequence_headers --cores 2
20 changes: 17 additions & 3 deletions metagraph/integration_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,16 @@ def _clean(graph, output, extra_params=''):

@staticmethod
def _annotate_graph(input, graph_path, output, anno_repr,
separate=False, no_fork_opt=False, no_anchor_opt=False):
separate=False, no_fork_opt=False, no_anchor_opt=False,
anno_type='header'):
target_anno = anno_repr

noswap = anno_repr.endswith('_noswap')
if noswap:
anno_repr = anno_repr[:-len('_noswap')]

if (anno_repr in {'row_sparse', 'column_coord'} or
anno_repr.endswith('_coord') or
anno_repr.endswith('brwt') or
anno_repr.startswith('row_diff')):
target_anno = anno_repr
Expand All @@ -115,7 +122,7 @@ def _annotate_graph(input, graph_path, output, anno_repr,
target_anno = anno_repr
anno_repr = 'row'

command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-header \
command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-{anno_type}\
-i {graph_path} --anno-type {anno_repr} \
-o {output} {input}'

Expand All @@ -141,6 +148,8 @@ def _annotate_graph(input, graph_path, output, anno_repr,
{output + anno_file_extension[anno_repr]}'

other_args = ' --count-kmers' if with_counts else ''
other_args += ' --coordinates' if final_anno.endswith('_coord') else ''
other_args += ' --disk-swap \"\"' if noswap else ''

if target_anno == 'row_diff':
command += ' -i ' + graph_path
Expand Down Expand Up @@ -170,11 +179,16 @@ def _annotate_graph(input, graph_path, output, anno_repr,
assert(res.returncode == 0)

if final_anno != target_anno:
rd_type = 'column' if with_counts else 'row_diff'
rd_type = 'column' if with_counts or final_anno.endswith('_coord') else 'row_diff'
command = f'{METAGRAPH} transform_anno --anno-type {final_anno} --greedy -o {output} ' \
f'-i {graph_path} -p {NUM_THREADS} {output}.{rd_type}.annodbg'
res = subprocess.run([command], shell=True)
assert (res.returncode == 0)
os.remove(output + anno_file_extension[rd_type])
else:
os.remove(output + anno_file_extension[anno_repr])

if final_anno.endswith('brwt') or final_anno.endswith('brwt_coord'):
command = f'{METAGRAPH} relax_brwt -o {output} -p {NUM_THREADS} {output}.{final_anno}.annodbg'
res = subprocess.run([command], shell=True)
assert (res.returncode == 0)
Loading

0 comments on commit f01dab1

Please sign in to comment.