diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aebc1832e5..c7536c1f44 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -215,6 +215,42 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + + Metagraph-Workflows: + name: Test metagraph workflows + runs-on: ubuntu-20.04 + needs: [Linux] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: fetch static binary + uses: actions/download-artifact@v2 + with: + path: artifacts + + - name: setup metagraph binary + run: | + sudo ln -s $(pwd)/artifacts/metagraph_DNA_linux_x86/metagraph_DNA /usr/local/bin/metagraph + sudo chmod +rx /usr/local/bin/metagraph + /usr/local/bin/metagraph --help + metagraph --help + + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + pip install -r metagraph/workflows/requirements.txt + - name: Test metagraph-workflows pytest + run: | + cd metagraph/workflows + pytest + Release: name: Create Release if: contains(github.ref, 'tags/v') diff --git a/metagraph/api/python/README.rst b/metagraph/api/python/README.rst index 529637dc9f..c8d55debbb 100644 --- a/metagraph/api/python/README.rst +++ b/metagraph/api/python/README.rst @@ -31,3 +31,4 @@ Usage For more examples, see `notebooks <./notebooks>`_. + diff --git a/metagraph/api/python/setup.py b/metagraph/api/python/setup.py index 594a8d1c25..875c425988 100644 --- a/metagraph/api/python/setup.py +++ b/metagraph/api/python/setup.py @@ -31,10 +31,6 @@ 'Programming Language :: Python :: 3.6', ], description="Metagraph Toolkit", - entry_points={ - 'console_scripts': [ - ], - }, install_requires=requirements, license="MIT license", long_description=readme, diff --git a/metagraph/api/python/tests/test_helpers.py b/metagraph/api/python/tests/test_helpers.py index da51b6d636..770c53d00c 100644 --- a/metagraph/api/python/tests/test_helpers.py +++ b/metagraph/api/python/tests/test_helpers.py @@ -14,7 +14,7 @@ def _load_json_data(filename): @pytest.mark.parametrize("file_name,align,expected_shape", [ ('search_response.json', False, (4, 15)), - ('search_with_align_response.json', True, (354, 18)) + ('search_with_align_response.json', True, (354, 15)) ]) def test_df_from_search_result(file_name, align, expected_shape): json_obj = _load_json_data(file_name) @@ -27,9 +27,6 @@ def test_df_from_search_result(file_name, align, expected_shape): 'metasub_name', 'num_reads', 'sample_type', 'station', 'surface_material', 'seq_description'] - if align: - expected_cols = expected_cols + ['sequence', 'score', 'cigar'] - assert list(df.columns) == expected_cols diff --git a/metagraph/docs/source/index.rst b/metagraph/docs/source/index.rst index 1e25093e0b..6f33ad4e3e 100644 --- a/metagraph/docs/source/index.rst +++ b/metagraph/docs/source/index.rst @@ -12,9 +12,8 @@ framework, a software platform for indexing and analysis of very large sequence installation.rst quick_start.rst + workflows.rst api.rst sequence_search.rst sequence_assembly.rst resources.rst - - diff --git a/metagraph/docs/source/workflows.rst b/metagraph/docs/source/workflows.rst new file mode 100644 index 0000000000..2a7b0564a8 --- /dev/null +++ b/metagraph/docs/source/workflows.rst @@ -0,0 +1,105 @@ +========= +Workflows +========= + +This package provides workflows for the `metagraph framework +`_ + + +Workflows for Creating Graphs and Annotations +--------------------------------------------- + +Since the creation of graph and indices comprises several steps, this package provides +some support to simplify these tasks - in particular for standard cases. + +Given some raw sequence data and a few options like the kmer size (`k`) graphs and annotations +are automatically built: + +.. code-block:: bash + + metagraph-workflows build -k 5 transcript_paths.txt /tmp/mygraph + + +If you prefer invoking the workflow from within a python script, the following is equivalent: + +.. code-block:: python + + from metagraph_workflows import workflows + workflows.run_build_workflow('/tmp/mygraph', seqs_file_list_path='transcript_paths.txt', k=5) + + + +The workflow logic itself is expressed as a `Snakemake workflow +`_ . You can also directly invoke the workflows +using the `snakemake` command line tool (see below). + + +Installation and Set up +~~~~~~~~~~~~~~~~~~~~~~~ + + +Set up a conda environment and install the necessary packages using: + +.. code-block:: bash + + conda create -n metagraph-workflows python=3.8 + conda activate metagraph-workflows + conda install -c bioconda -c conda-forge metagraph + pip install -U "git+https://github.com/ratschlab/metagraph.git#subdirectory=metagraph/workflows" + + + + +Usage Example +~~~~~~~~~~~~~ + +Typically, the following steps would be performed: + +1. sequence file preparation: add your sequence files of interest into a directory. +2. running workflow: you can invoke the workflow using ``metagraph-workflows build``. Important parameters you may consider tuning are: + + * k + * primary vs non primary graph creation + * annotation label source: ``sequence_headers`` or ``sequence_file_names`` + + An example invocation: + + .. code-block:: bash + + metagraph-workflows build -k 31 \ + --seqs-dir-path [PATH_TO_SEQUENCES] \ + --annotation-labels-source sequence_headers \ + --build-primary-graph + [OUTPUT_DIR] + + see ``metagraph-workflows build --help`` for more help +3. do queries: once you created the indices you can query either by using the command line + query tool or starting the metagraph server on your laptop or another suitable machine and access + do queries using e.g. the python :ref:`API` client. + + +There is also a `jupyter notebook `_ walking you through an example from indexing to api querying. + + + +Workflow Management +~~~~~~~~~~~~~~~~~~~ + +The following snakemake options are exposed in the ``build`` subcommand + + * ``--dryrun``: see what workflow steps would be done + * ``--force`` (corresponds to ``--forceall`` in snakemake): force run all steps + + +Directly Invoking Snakemake Workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The above command is only a wrapper around a snakemake workflow. You can also +directly invoke the snakemake workflow (assuming you checked out the `metagraph git repository `_): + +.. code-block:: bash + + cd metagraph/workflows + snakemake --forceall --configfile default.yml \ + --config k=5 seqs_file_list_path='transcript_paths.txt' output_directory=/tmp/mygraph \ + annotation_labels_source=sequence_headers --cores 2 diff --git a/metagraph/workflows/.editorconfig b/metagraph/workflows/.editorconfig new file mode 100644 index 0000000000..d4a2c4405e --- /dev/null +++ b/metagraph/workflows/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/metagraph/workflows/.gitignore b/metagraph/workflows/.gitignore new file mode 100644 index 0000000000..0b799d6268 --- /dev/null +++ b/metagraph/workflows/.gitignore @@ -0,0 +1,108 @@ +.snakemake +metagraph_workflows/snakemake/output_dir_example + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Pycharm +.idea diff --git a/metagraph/workflows/LICENSE b/metagraph/workflows/LICENSE new file mode 100644 index 0000000000..b5447c1e77 --- /dev/null +++ b/metagraph/workflows/LICENSE @@ -0,0 +1,24 @@ + + +MIT License + +Copyright (c) 2021, ETH Zurich, Biomedical Informatics Group; Marc Zimmermann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/metagraph/workflows/MANIFEST.in b/metagraph/workflows/MANIFEST.in new file mode 100644 index 0000000000..1b52b5db53 --- /dev/null +++ b/metagraph/workflows/MANIFEST.in @@ -0,0 +1,12 @@ +include LICENSE +include requirements.txt + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif + +recursive-include metagraph_workflows/snakemake *.smk Snakefile default.yml +recursive-include metagraph_workflows/snakemake/test_data *.fa +recursive-exclude **/.snakemake * diff --git a/metagraph/workflows/README.rst b/metagraph/workflows/README.rst new file mode 100644 index 0000000000..ab118e2e60 --- /dev/null +++ b/metagraph/workflows/README.rst @@ -0,0 +1,8 @@ +=================== +metagraph_workflows +=================== + +This package provides workflows for the `metagraph framework +`_ + +See the `corresponding section `_ in the metagraph documentation. diff --git a/metagraph/workflows/metagraph_workflows/__init__.py b/metagraph/workflows/metagraph_workflows/__init__.py new file mode 100644 index 0000000000..e0622ba888 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for metagraph_workflows.""" + +__author__ = """Marc Zimmermann""" +__email__ = 'marc.zimmermann@inf.ethz.ch' +__version__ = '0.1.0' diff --git a/metagraph/workflows/metagraph_workflows/cli.py b/metagraph/workflows/metagraph_workflows/cli.py new file mode 100644 index 0000000000..78fbe2aac0 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/cli.py @@ -0,0 +1,196 @@ +import argparse +import importlib +import logging +import shlex +import sys +from pathlib import Path +from typing import Iterable, Optional, Dict, Any + +import snakemake + +from .workflow_configs import SEQS_FILE_LIST_PATH, SEQS_DIR_PATH, \ + AnnotationLabelsSource, AnnotationFormats + +WORKFLOW_ROOT = Path(__file__).parent / 'snakemake' + +LOGGING_FORMAT='%(asctime)s - %(levelname)s: %(message)s' + +logging.basicConfig(format=LOGGING_FORMAT, level=logging.WARNING) + + +default_path = Path(WORKFLOW_ROOT / 'default.yml') + +# TODO: use custom config object? fluent config? +def run_build_workflow( + output_dir: Path, + seqs_file_list_path: Optional[Path] = None, + seqs_dir_path: Optional[Path] = None, + k: Optional[int] = None, + base_name: Optional[str] = None, + build_primary_graph: bool = False, + annotation_formats: Iterable[AnnotationFormats] = (), + annotation_labels_source: Optional[AnnotationLabelsSource] = None, + metagraph_cmd: Optional[str] = None, + threads: Optional[int] = None, + force: bool = False, + verbose: bool = False, + dryrun: bool = False, + additional_snakemake_args: Optional[Dict[str, Any]] = None +) -> None: + # TODO: support str argumt? + + snakefile_path = Path(WORKFLOW_ROOT / 'Snakefile') + + config = snakemake.load_configfile(default_path) + + if not seqs_file_list_path and not seqs_dir_path: + raise ValueError("seqs_file_list_path and seqs_dir_path cannot both be None") + + if seqs_file_list_path: + config[SEQS_FILE_LIST_PATH] = str(seqs_file_list_path) + if seqs_dir_path: + config[SEQS_DIR_PATH] = str(seqs_dir_path) + + config['output_directory'] = str(output_dir) + + config['k'] = k if k else config['k'] + + if annotation_labels_source: + config['annotation_labels_source'] = annotation_labels_source.value + + config['base_name'] = base_name if base_name else config['base_name'] + config['build_primary_graph'] = build_primary_graph + + config['annotation_formats'] = [af.value for af in + annotation_formats] if annotation_formats else config['annotation_formats'] + + config['metagraph_cmd'] = metagraph_cmd if metagraph_cmd else config['metagraph_cmd'] + config['max_threads'] = threads if threads else snakemake.available_cpu_count() + + if verbose: + importlib.reload(logging) + logging.basicConfig(format=LOGGING_FORMAT, level=logging.INFO) + logging.info("Dumping config:") + for k, v in sorted(config.items(), key=lambda t: t[0]): + logging.info(f"\t{k}: {v}") + + additional_args = additional_snakemake_args if additional_snakemake_args else {} + + was_successful = snakemake.snakemake(str(snakefile_path), config=config, + scheduler='greedy', + forceall=force, + dryrun=dryrun, + **additional_args + ) + + if not was_successful: + raise RuntimeError("The snakemake workflow did not terminate correctly. " + "See output or log files in the output directory for more details.") + + +def setup_build_parser(parser): + parser.add_argument('output_dir', type=Path) + + input_seq_group = parser.add_argument_group('input sequence paths', '') + + input_seq_group_xor = input_seq_group.add_mutually_exclusive_group(required=True) + input_seq_group_xor.add_argument('--seqs-file-list-path', + help='Path to text file containing paths of sequences files') + input_seq_group_xor.add_argument('--seqs-dir-path', + help="Path to directory containing sequence files") + + graph = parser.add_argument_group('graph', 'arguments for graph building') + graph.add_argument('-k', type=int, default=None) + graph.add_argument('--base-name', default=None) + graph.add_argument('--build-primary-graph', default=False, + action='store_true') + + annotation = parser.add_argument_group('annotation', + 'arguments for annotations') + annotation.add_argument('--annotation-format', action='append', + default=[], + help=f"Annotation format (can be used multiple times). " + f"Possible values: {', '.join([v.value for v in AnnotationFormats])}") + annotation.add_argument('--annotation-labels-source', + type=AnnotationLabelsSource, + default=AnnotationLabelsSource.SEQUENCE_HEADERS, + help=f"What should be used as column labels. Possible values: " + f"{', '.join([v.value for v in AnnotationLabelsSource])}") + + workflow = parser.add_argument_group('workflow', + 'arguments for the workflow') + workflow.add_argument('--threads', type=int, default=None) + workflow.add_argument('--force', default=False, action='store_true') + workflow.add_argument('--verbose', default=False, action='store_true') + workflow.add_argument('--dryrun', default=False, action='store_true') + workflow.add_argument('--metagraph-cmd', type=str, default=None) + workflow.add_argument('--additional-snakemake-args', type=str, default='', + help='Additional arguments to pass to snakemake, e.g. --additional-snakemake-args="arg1=val1 arg2=val2"') + + parser.set_defaults(func=init_build) + + +def _convert_type(v: str) -> Any: + if v.lower() == 'true' or v == '1': + return True + elif v.lower() == 'false' or v == '0': + return False + + try: + return float(v) + except: + pass + + return v + + +def _parse_additional_snakemake_args(arg: str) -> Dict[str, Any]: + ret = {} + for a in shlex.split(arg): + if '=' not in a: + raise ValueError("ex") + + k, v = a.split('=') + ret[k] = _convert_type(v) + + return ret + + +def init_build(args): + run_build_workflow( + args.output_dir, + seqs_file_list_path=args.seqs_file_list_path, + seqs_dir_path=args.seqs_dir_path, + k=args.k, + base_name=args.base_name, + build_primary_graph=args.build_primary_graph, + annotation_formats=[AnnotationFormats(af) for af in args.annotation_format], + annotation_labels_source=args.annotation_labels_source, + metagraph_cmd=args.metagraph_cmd, + threads=args.threads, + force=args.force, + verbose=args.verbose, + dryrun=args.dryrun, + additional_snakemake_args=_parse_additional_snakemake_args(args.additional_snakemake_args) + ) + + +def main(args=tuple(sys.argv[1:])): + parser = argparse.ArgumentParser(description='metagraph utils') + + subparsers = parser.add_subparsers(help="Available subcommands", required=True, + dest="command") + + build_parser = subparsers.add_parser("build", help="Create index") + setup_build_parser(build_parser) + + parsed_arguments = parser.parse_args(args) + + if parsed_arguments.func: + parsed_arguments.func(parsed_arguments) + else: + sys.exit("Unknown function call") + + +if __name__ == "__main__": + main() diff --git a/metagraph/workflows/metagraph_workflows/resource_management.py b/metagraph/workflows/metagraph_workflows/resource_management.py new file mode 100644 index 0000000000..cc2081f3d1 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/resource_management.py @@ -0,0 +1,206 @@ +import json +import math +import os +from pathlib import Path + + + +from metagraph_workflows import workflow_configs +from metagraph_workflows.workflow_configs import MEM_MB_KEY, DISK_MB_KEY, \ + MEM_BUFFER_MB_KEY, THREADS_KEY +from metagraph_workflows.utils import logger, get_rule_specific_config + +BASE_MEM = 1 * 1024 +FALLBACK_MAX_MEM = 4 * 1024 +FALLBACK_MAX_DISK = 10 * 1024 + + +# obviously wrong value to work around --dryrun issues when the resources requirements +# and other values depend on the input of rule (can be removed once is fixed +# https://github.com/snakemake/snakemake/issues/927) +TBD_VALUE = -1 + +def _get_max_memory(config): + return config.get(workflow_configs.MAX_MEMORY_MB, FALLBACK_MAX_MEM) + + +def _get_max_disk(config): + return config.get(workflow_configs.MAX_DISK_MB, FALLBACK_MAX_DISK) + + +def columns_size_mb(columns_file): + with open(columns_file) as f: + col_file_size_bytes = sum(os.stat(l.strip()).st_size for l in f) + return col_file_size_bytes / 1024 ** 2 + + +class ResourceConfig: + def __init__(self, rule_name, config): + self.rule_name = rule_name + self.config = config + + def get_threads(self, max_threads) -> int: + threads = get_rule_specific_config(self.rule_name, THREADS_KEY, self.config) + + if not threads: + threads = max_threads + return threads + + def get_mem(self): + def _get_mem(wildcards, input, threads) -> int: + mem_mb = get_rule_specific_config(self.rule_name, MEM_MB_KEY, + self.config) + if not mem_mb: + mem_mb = self._get_mem_estimate(wildcards, input, threads) + + max_mem = _get_max_memory(self.config) + if mem_mb > max_mem: + logger.warning( + f"The estimated memory of {mem_mb} MB " + f"is larger than the max memory {max_mem}.") + + return mem_mb + + return _get_mem + + def _get_mem_estimate(self, wildcards, input, threads): + return _get_max_memory(self.config) + + def get_disk(self): + def _get_disk(wildcards): + disk_mb = get_rule_specific_config(self.rule_name, DISK_MB_KEY, + self.config) + if not disk_mb: + disk_mb = _get_max_disk(self.config) + return disk_mb + + return _get_disk + + +class SupportsMemBufferSize(ResourceConfig): + MEM_OVERHEAD = BASE_MEM + + CAP_MEM_FRACTION = 0.85 + + def get_mem_buffer_gib(self): + """ + value for the `--mem-cap-gb` parameter (in GiB) + """ + def _get_mem_buffer(wildcards, input, threads, resources): + mem_cap_mb = get_rule_specific_config(self.rule_name, + MEM_BUFFER_MB_KEY, self.config) + + if not mem_cap_mb: + mem_cap_mb = min(self._mem_buf_estimate(wildcards, resources, input, threads), self.config[workflow_configs.MAX_BUFFER_SIZE_MB]) + + if mem_cap_mb == TBD_VALUE: + return TBD_VALUE + + return int(math.ceil(mem_cap_mb / 1024.0)) + + return _get_mem_buffer + + def _mem_buf_estimate(self, wildcards, resources, input, threads): + """ + Default estimation for mem cap: get a percentage of the available memory + """ + avail_mem_mb = get_rule_specific_config(self.rule_name, MEM_MB_KEY, + self.config) + + if not avail_mem_mb: + avail_mem_mb = resources.get('mem_mb', _get_max_memory(self.config)) + + return max(int(self.CAP_MEM_FRACTION * avail_mem_mb), 1024) # TODO: parametrize constant? + + +class SupportsMemBufferSizeWithEstimation(SupportsMemBufferSize): + """ + Base class for cases where we have a heuristic to estimate the required mem cap. + """ + def _mem_buf_estimate(self, wildcards, resources, input, threads) -> int: + raise NotImplementedError("Mixing in SupportsMemoryCapWithEstimation requires reimplementing mem_cap_estimate") + + def _get_mem_estimate(self, wildcards, input, threads): + mem_cap = self.get_mem_buffer_gib()(wildcards, input, threads, None) + + if mem_cap == TBD_VALUE: + return TBD_VALUE + + mem_cap_mib = mem_cap*1024 + + # adjusting memory s.t. mem_cap is CAP_MEM_FRACTION of the overall memory + # (to be consistent with the default heuristic in SupportsMemoryCap + return mem_cap_mib + max(int(mem_cap_mib/self.CAP_MEM_FRACTION * (1-self.CAP_MEM_FRACTION)), BASE_MEM) + + +class SupportsDiskCap(ResourceConfig): + def get_disk_cap(self): + def _get_disk_cap(wildcards): + return int(self.get_disk()(wildcards) / 1024) + return _get_disk_cap # TODO: come up with a heuristic + + +class BuildGraphResources(SupportsMemBufferSize, SupportsDiskCap): + pass + + +class BuildGraphResourcesWithKmerEstimates(SupportsMemBufferSizeWithEstimation, SupportsDiskCap): + + KMC_STATS_KEY = "Stats" + KMC_UNIQUE_KMER_CNT = "#Unique_counted_k-mers" + + def _mem_buf_estimate(self, wildcards, resources, input, threads) -> int: + kmc_json_path = Path(input['kmer']) + + if not kmc_json_path.exists(): + return TBD_VALUE + + with open(kmc_json_path, 'r') as f: + kmc_data = json.load(f) + + unique_kmers = kmc_data[self.KMC_STATS_KEY][self.KMC_UNIQUE_KMER_CNT] + + bytes_per_kmer = 2.6 + kmer_count = 2.6 * unique_kmers # 2x canonical+non-canonical + ~30% for dummy kmers (typically it's 10%) + required_ram = int(math.ceil(kmer_count * bytes_per_kmer / 1024**2)) + required_ram_mb = max(required_ram, 1024) + + return required_ram_mb + + +class PrimarizeCanonicalGraphSingleSampleResources(ResourceConfig): + def __init__(self, config): + super().__init__('primarize_canonical_graph_single_sample', config) + + def _get_mem_estimate(self, wildcards, input, threads): + input_path = Path(input[0]) + + if input_path.exists(): + file_size_mib = max(int(math.ceil(input_path.stat().st_size / 1024.0**2)), 1) + logger.debug(f"File size of {input_path.name} is {file_size_mib}") + + # factor 2 is based on experiments on the mouse data set. + # In most cases factor 1.3 to 1.5 would be enough, however, there are outliers + return 2*file_size_mib + + return TBD_VALUE + + +class TransformRdStage0Resources(SupportsMemBufferSizeWithEstimation): + def __init__(self, config): + super().__init__('transform_rd_stage0', config) + + def _mem_buf_estimate(self, wildcards, resources, input, threads): + if Path(input.columns_file).exists(): + return int(columns_size_mb(input.columns_file) + BASE_MEM) + return TBD_VALUE + + +class TransformRdStage1Resources(SupportsMemBufferSize): + def __init__(self, config): + super().__init__('transform_rd_stage1', config) + + +class TransformRdStage2Resources(SupportsMemBufferSize): + def __init__(self, config): + super().__init__('transform_rd_stage2', config) diff --git a/metagraph/workflows/metagraph_workflows/snakemake/Makefile b/metagraph/workflows/metagraph_workflows/snakemake/Makefile new file mode 100644 index 0000000000..58026558a7 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/Makefile @@ -0,0 +1,12 @@ + +SMAKE_BASE="./run_snakemake_test.sh" + +run: + $(SMAKE_BASE) + +graph: + "$(SMAKE_BASE)" --forceall --dag > test_workflow/rule_graph.dot + dot -Tpng test_workflow/rule_graph.dot > test_workflow/rule_graph.png + +shell-cmds: + "$(SMAKE_BASE)" --forceall --dryrun --printshellcmds | grep '^ [A-Za-z]' | sed -E 's/ +/ /g' diff --git a/metagraph/workflows/metagraph_workflows/snakemake/Snakefile b/metagraph/workflows/metagraph_workflows/snakemake/Snakefile new file mode 100644 index 0000000000..8f3db405e1 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/Snakefile @@ -0,0 +1,344 @@ +import metagraph_workflows.utils +from metagraph_workflows import workflow_configs, utils +from metagraph_workflows.utils import take_value_or_default +from metagraph_workflows.workflow_configs import AnnotationLabelsSource, \ + AnnotationFormats +from metagraph_workflows.resource_management import TransformRdStage0Resources, TransformRdStage1Resources, TransformRdStage2Resources, ResourceConfig + +wdir=utils.get_wdir(config) + +graph=config['base_name'] +build_primary=take_value_or_default('build_primary_graph', False, config) + +annotation_formats = config['annotation_formats'] if isinstance(config['annotation_formats'], list) else [config['annotation_formats']] + +# validate values +for af in annotation_formats: + AnnotationFormats(af) + # TODO: make a nicer error + +annotation_labels_opt = AnnotationLabelsSource(config['annotation_labels_source']).to_annotation_cmd_option() + + +metagraph_cmd=config['metagraph_cmd'] +time_cmd=utils.get_gnu_time_command(config) + +max_threads=take_value_or_default(workflow_configs.MAX_THREADS, workflow.cores, config) + +# TODO +max_memory_mb=take_value_or_default('max_memory_mb', 4000, config) + +verbose_opt=' -v ' + +DONE="DONE" + +## Paths +graph_path=wdir/f'{graph}.dbg' +annotation_cols_path=wdir/'columns' +annotation_path_done=annotation_cols_path/DONE + +columns_file=wdir/'columns.txt' + +seqs_file_list_path=utils.get_seqs_file_list_path(wdir, config) + +contigs_dir=wdir/'contigs' + +seq_ids_dict = {} +if not config[workflow_configs.SAMPLE_IDS_PATH]: + seq_ids_dict = utils.derive_sample_dictionary(seqs_file_list_path) + +localrules: generate_column_list + +rule all: + input: + graph_path, + [wdir/f'{graph}.{anno_type}.annodbg' for anno_type in annotation_formats] + + +include: 'build.smk' + +ANNOTATE_RULE="annotate" +rule annotate: + input: + seqs=utils.get_build_joint_input(config, contigs_dir, seq_ids_dict, seqs_file_list_path), + dbg_graph=graph_path, + output: + done=touch(annotation_path_done), + column_anno_files=utils.generate_col_paths(annotation_cols_path, seqs_file_list_path, config) + threads: max_threads + resources: + mem_mb=ResourceConfig(ANNOTATE_RULE, config).get_mem(), + params: + separate_build=str(bool(config[workflow_configs.PRIMARIZE_SAMPLES_SEPARATELY])).lower(), + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(ANNOTATE_RULE, config) + shell: + """ + if {params.separate_build}; then + SEQ_PATHS={wdir}/seqs_paths.txt + echo "{input}" | tr ' ' '\n' > $SEQ_PATHS + else + SEQ_PATHS="{input.seqs}" + fi + + OUT_DIR=$(dirname {output.done}) + mkdir -p $OUT_DIR + cat $SEQ_PATHS | {time_cmd} {metagraph_cmd} annotate \ + {verbose_opt} \ + --parallel {threads} \ + -i {input.dbg_graph} \ + {annotation_labels_opt} \ + --anno-type column \ + --separately \ + -o $OUT_DIR {params.tempdir_opt} > {log} 2>&1 + """ + +GENERATE_COLUMN_LIST_RULE="generate_column_list" +rule generate_column_list: + input: rules.annotate.output.column_anno_files + output: columns_file + run: + with open(output[0], 'w') as f: + f.write('\n'.join([str(l) for l in input])) + + +max_path_length=None + +GENERATE_BRWT_LINKAGE_RULE="generate_brwt_linkage" +rule generate_brwt_linkage: + input: + columns_file=columns_file, + output: + linkage=wdir/f"{graph}.linkage.txt" + threads: max_threads + resources: + mem_mb=ResourceConfig(GENERATE_BRWT_LINKAGE_RULE, config).get_mem(), + params: + subsample=config[workflow_configs.BRWT_LINKAGE_SUBSAMPLE], + log: utils.get_log_path(GENERATE_BRWT_LINKAGE_RULE, config) + shell: + """ + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type brwt \ + --linkage \ + --greedy \ + --subsample {params.subsample} \ + --parallel {threads} \ + -o {output.linkage} > {log} 2>&1 + """ + +ruleorder: relax_brwt > transform_annotation # more specific rule has priority + +TRANSFORM_ANNOTATION_RULE="transform_annotation" +rule transform_annotation: + input: + columns_file=columns_file, + output: + annotations=wdir/f'{graph}.{{anno_type}}.annodbg', + threads: max_threads + resources: + mem_mb=ResourceConfig(TRANSFORM_ANNOTATION_RULE, config).get_mem(), + params: + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(TRANSFORM_ANNOTATION_RULE, config, ['anno_type']) + shell: + """ + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type {wildcards.anno_type} \ + --parallel {threads} \ + -o {output.annotations} {params.tempdir_opt} > {log} 2>&1 + """ + + +ANNOTATE_BRWT_RULE="annotate_brwt" +rule annotate_brwt: + input: + linkage=wdir/f"{graph}.linkage.txt", + columns_file=columns_file, + output: + annotations=wdir/f'{graph}.brwt.annodbg', + threads: max_threads + resources: + mem_mb=ResourceConfig(ANNOTATE_BRWT_RULE, config).get_mem(), + params: + parallel_nodes=config[workflow_configs.BRWT_PARALLEL_NODES], + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(ANNOTATE_BRWT_RULE, config) + shell: + """ + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type brwt \ + --parallel-nodes {params.parallel_nodes} \ + --greedy \ + --parallel {threads} \ + -o {output.annotations} {params.tempdir_opt} > {log} 2>&1 + """ + + +RELAX_BRWT_RULE="relax_brwt" +rule relax_brwt: + input: + brwt_annots=wdir/f'{graph}.{{brwt_fmt}}.annodbg', + output: + annotations=wdir/f'{graph}.relax.{{brwt_fmt}}.annodbg', + threads: max_threads + resources: + mem_mb=ResourceConfig(RELAX_BRWT_RULE, config).get_mem(), + params: + relax_arity=config[workflow_configs.BRWT_RELAX_ARITY], + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(RELAX_BRWT_RULE, config, ['brwt_fmt']) + shell: + """ + {time_cmd} {metagraph_cmd} relax_brwt \ + -o {output.annotations} \ + {verbose_opt} \ + --relax-arity {params.relax_arity} \ + --parallel {threads} \ + {input.brwt_annots} {params.tempdir_opt} > {log} 2>&1 + """ + +rd_cols_dir = wdir/'rd_cols' + +from metagraph_workflows.resource_management import TransformRdStage0Resources + +TRANSFORM_RD_STAGE0_RULE="transform_rd_stage0" +rule transform_rd_stage0: + input: + dbg_graph=graph_path, + columns_file=columns_file, + output: + columns_rd_row_count=rd_cols_dir/'vector.row_count' + threads: max_threads + resources: + mem_mb=TransformRdStage0Resources(config).get_mem() + params: + mem_buffer=TransformRdStage0Resources(config).get_mem_buffer_gib(), + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(TRANSFORM_RD_STAGE0_RULE,config) + shell: + """ + COLS_DIR=$(dirname {output.columns_rd_row_count}) + mkdir -p $COLS_DIR + + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type row_diff \ + --row-diff-stage 0 \ + -i {input.dbg_graph} \ + --parallel {threads} \ + --mem-cap-gb {params.mem_buffer} \ + -o {output.columns_rd_row_count} {params.tempdir_opt} > {log} 2>&1 + """ + +TRANSFORM_RD_STAGE1_RULE="transform_rd_stage1" +rule transform_rd_stage1: + input: + dbg_graph=graph_path, + columns_file=columns_file, + columns_rd_row_count=rd_cols_dir/'vector.row_count' + output: + pred=wdir / f'{graph}.dbg.pred', + pred_boundary=wdir / f'{graph}.dbg.pred_boundary', + rd_succ=wdir / f'{graph}.dbg.rd_succ', + succ=wdir / f'{graph}.dbg.succ', + succ_boundary=wdir / f'{graph}.dbg.succ_boundary', + cols_rd_vectors=rd_cols_dir / 'vectors.row_reduction' + threads: max_threads + resources: + mem_mb=TransformRdStage1Resources(config).get_mem() + params: + mem_buffer=TransformRdStage1Resources(config).get_mem_buffer_gib(), + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(TRANSFORM_RD_STAGE1_RULE, config) + shell: + """ + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type row_diff \ + --row-diff-stage 1 \ + -i {input.dbg_graph} \ + --parallel {threads} \ + --mem-cap-gb {params.mem_buffer} \ + -o {output.cols_rd_vectors} {params.tempdir_opt} > {log} 2>&1 + """ + + +TRANSFORM_RD_STAGE2_RULE="transform_rd_stage2" +rule transform_rd_stage2: + input: + dbg_graph=graph_path, + columns_file=columns_file, + cols_rd_vectors=rd_cols_dir / 'vectors.row_reduction' + output: + anchors=wdir/f'{graph}.dbg.anchors', + rd_cols_done=touch(rd_cols_dir/DONE) + threads: max_threads + resources: + mem_mb=TransformRdStage2Resources(config).get_mem(), + params: + mem_buffer=TransformRdStage2Resources(config).get_mem_buffer_gib(), + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(TRANSFORM_RD_STAGE2_RULE, config) + shell: + """ + cat {input.columns_file} | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type row_diff \ + --row-diff-stage 2 \ + -i {input.dbg_graph} \ + --parallel {threads} \ + --mem-cap-gb {params.mem_buffer} \ + -o {output.rd_cols_done} {params.tempdir_opt} > {log} 2>&1 + """ + + +ANNOTATE_ROW_DIFF_BRWT_RULE="annotate_row_diff_brwt" +rule annotate_row_diff_brwt: + input: + anchors=rules.transform_rd_stage2.output.anchors, + rd_cols_done=rules.transform_rd_stage2.output.rd_cols_done, + dbg_graph=graph_path + output: + annotations=wdir/f'{graph}.row_diff_brwt.annodbg', + linkage=wdir / f'{graph}.row_diff_brwt.annodbg.linkage', + threads: max_threads + resources: + mem_mb=ResourceConfig(ANNOTATE_ROW_DIFF_BRWT_RULE, config).get_mem(), + params: + parallel_nodes=config[workflow_configs.BRWT_PARALLEL_NODES], + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(ANNOTATE_ROW_DIFF_BRWT_RULE, config) + shell: + """ + echo $(find $(dirname {input.rd_cols_done}) -name \*.annodbg) + find $(dirname {input.rd_cols_done}) -name \*.annodbg | {time_cmd} {metagraph_cmd} transform_anno {verbose_opt} \ + --anno-type row_diff_brwt \ + --greedy \ + --fast \ + --parallel-nodes {params.parallel_nodes} \ + --parallel {threads} \ + -i {input.dbg_graph} \ + -o {output.annotations} {params.tempdir_opt} > {log} 2>&1 + """ + + +RELAX_ROW_DIFF_BRWT_RULE="relax_row_diff_brwt" +rule relax_row_diff_brwt: + input: + brwt_annots=rules.annotate_row_diff_brwt.output.annotations, + output: + annotations_relaxed=wdir/f'{graph}.relax.row_diff_brwt.annodbg', + threads: max_threads + resources: + mem_mb=ResourceConfig(RELAX_ROW_DIFF_BRWT_RULE, config).get_mem(), + params: + relax_arity = config[workflow_configs.BRWT_RELAX_ARITY], + tempdir_opt=utils.temp_dir_config(config), + log: utils.get_log_path(RELAX_ROW_DIFF_BRWT_RULE, config) + shell: + """ + {time_cmd} {metagraph_cmd} relax_brwt \ + -o {output.annotations_relaxed} \ + {verbose_opt} \ + --relax-arity {params.relax_arity} \ + --parallel {threads} \ + {input.brwt_annots} {params.tempdir_opt} > {log} 2>&1 + """ diff --git a/metagraph/workflows/metagraph_workflows/snakemake/build.smk b/metagraph/workflows/metagraph_workflows/snakemake/build.smk new file mode 100644 index 0000000000..d45a051e19 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/build.smk @@ -0,0 +1,255 @@ + +from metagraph_workflows.resource_management import BuildGraphResources, ResourceConfig, BuildGraphResourcesWithKmerEstimates, PrimarizeCanonicalGraphSingleSampleResources +from metagraph_workflows import workflow_configs, utils + +if build_primary: + ruleorder: build_joint_primary > build +else: + ruleorder: build > build_joint_primary + + +BUILD_RULE="build" +rule build: + input: seqs_file_list_path + output: graph_path + threads: max_threads + resources: + mem_mb=BuildGraphResources(BUILD_RULE, config).get_mem(), + disk_mb=BuildGraphResources(BUILD_RULE, config).get_disk(), + params: + k=config['k'], + tempdir_opt=utils.temp_dir_config(config), + mem_buffer=BuildGraphResources(BUILD_RULE, config).get_mem_buffer_gib(), + disk_cap=BuildGraphResources(BUILD_RULE, config).get_disk_cap(), + log: utils.get_log_path(BUILD_RULE, config) + shell: + """ + cat {input} | {time_cmd} {metagraph_cmd} build {verbose_opt} \ + --parallel {threads} \ + -k {params.k} \ + -o {output} \ + --mem-cap-gb {params.mem_buffer} \ + --disk-cap-gb {params.disk_cap} \ + {params.tempdir_opt} > {log} 2>&1 + """ + + +### Build Primary + +kmc_dir=wdir / "kmc" + +canonical_graphs_dir=wdir/f'canonical_graphs' +canonical_graph_path=wdir/f'{graph}_canonical.dbg' + +joint_contigs_path=wdir/f'{graph}_primary.fasta.gz' + + +sample_ids_spec = False +orig_samples_path=wdir/'orig_samples' + + + +STAGE_SAMPLES_RULE="stage_samples" +rule stage_samples: + output: temp(orig_samples_path /f"{{sample_id}}{config[workflow_configs.SAMPLE_STAGING_FILE_ENDING]}") + resources: + parallel_staging=1 + params: + staging_script_path=config[workflow_configs.SAMPLE_STAGING_SCRIPT_PATH], + additional_options=config[workflow_configs.SAMPLE_STAGING_SCRIPT_ADDITIONAL_OPTIONS], + log: utils.get_log_path(STAGE_SAMPLES_RULE, config, ['sample_id']) + shell: + """ + bash {params.staging_script_path} {wildcards.sample_id} {output} {params.additional_options} > {log} 2>&1 + """ + +EXTRACT_KMER_COUNTS_RULE="extract_kmer_counts" +rule extract_kmer_counts: + input: utils.get_build_single_sample_input(config,orig_samples_path,seq_ids_dict) + output: + summary = kmc_dir / "{sample_id}.json", + kmc_pre=temp(kmc_dir/"{sample_id}.kmc_pre"), + kmc_suf=temp(kmc_dir/ "{sample_id}.kmc_suf"), + temp_dir=temp(directory(kmc_dir/"temp_{sample_id}.kmc")), + threads: ResourceConfig(EXTRACT_KMER_COUNTS_RULE, config).get_threads(max_threads) + resources: + mem_mb=lambda wildcards, threads: int((threads * config[workflow_configs.KMC_MEM_MB_PER_THREAD]) * config[workflow_configs.KMC_MEM_OVERHEAD_FACTOR]) + priority: 10 + params: + k=config['k'], + max_bins=config[workflow_configs.KMC_MAX_BINS], + mem_buffer=lambda wildcards, resources: max(int((resources.mem_mb * (1.0 / config[workflow_configs.KMC_MEM_OVERHEAD_FACTOR])) / 1024), 1), + base=lambda wildcards: kmc_dir/wildcards['sample_id'], + log: utils.get_log_path(EXTRACT_KMER_COUNTS_RULE, config, ['sample_id']) + shell: + """ + KMC_BINS=$(( $(ulimit -n) - 10)) + KMC_BINS=$(( KMC_BINS > {params.max_bins} ? {params.max_bins} : KMC_BINS)) + + mkdir -p {output.temp_dir} + + INPUT="{input}" + SOME_INPUT_FILE="{input}" + if [ -d {input} ]; then + # in case sample is split up in several files + SAMPLE_FILE={output.temp_dir}/samples.lst + ls {input}/* > $SAMPLE_FILE + INPUT="$SAMPLE_FILE" + INPUT="@$INPUT" + + # pick arbitrary file, assuming all file in the directory are of the same type + SOME_INPUT_FILE=$(cat $SAMPLE_FILE | head -n 1) + fi + + FORMAT_FLAG="-fq" + if [[ "$SOME_INPUT_FILE" =~ .*(.fa|.fa.gz|.fasta|.fasta.gz|.fna|.fna.gz)$ ]]; then + FORMAT_FLAG="-fm" + fi + + {time_cmd} kmc -v -k{params.k} -m{params.mem_buffer} -sm -t{threads} -ci1 -cs65535 -n$KMC_BINS -j{output.summary} $FORMAT_FLAG $INPUT {params.base} {output.temp_dir} > {log} 2>&1 + """ + +kmer_estimates=True + +BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE="build_canonical_graph_single_sample" +rule build_canonical_graph_single_sample: + input: + seq=utils.get_build_single_sample_input(config, orig_samples_path, seq_ids_dict), + kmer=kmc_dir/"{sample_id}.json" if kmer_estimates else [] + output: + graph=temp(canonical_graphs_dir/"{sample_id}.dbg"), + temp_dir=temp(directory(wdir / "temp_build_canonical_{sample_id}")), + threads: BuildGraphResourcesWithKmerEstimates(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config).get_threads(max_threads) + resources: + mem_mb=BuildGraphResourcesWithKmerEstimates(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config).get_mem(), + disk_mb=BuildGraphResourcesWithKmerEstimates(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config).get_disk(), + priority: 50 + params: + k=config['k'], + tempdir_opt=utils.temp_dir_config(config), + temp_file=wdir, + mem_buffer=BuildGraphResourcesWithKmerEstimates(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config).get_mem_buffer_gib(), + disk_cap=BuildGraphResourcesWithKmerEstimates(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config).get_disk_cap(), + log: utils.get_log_path(BUILD_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config, ['sample_id']) + shell: + """ + + INPUT_CMD="echo {input.seq}" + + mkdir -p {output.temp_dir} + + SAMPLE_FILE={output.temp_dir}/samples.lst + if [ -d {input.seq} ]; then + ls {input.seq}/* > $SAMPLE_FILE + INPUT_CMD="cat $SAMPLE_FILE" + fi + + $INPUT_CMD | {time_cmd} {metagraph_cmd} build {verbose_opt} \ + --parallel {threads} \ + --mode canonical \ + -k {params.k} \ + -o {output.graph} \ + --mem-cap-gb {params.mem_buffer} \ + --disk-cap-gb {params.disk_cap} \ + {params.tempdir_opt} > {log} 2>&1 + """ + + +PRIMARIZE_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE="primarize_canonical_graph_single_sample" +rule primarize_canonical_graph_single_sample: + input: canonical_graphs_dir/"{sample_id}.dbg" + output: temp(contigs_dir/"{sample_id}_primary.fasta.gz") + threads: PrimarizeCanonicalGraphSingleSampleResources(config).get_threads(max_threads) + resources: + mem_mb=PrimarizeCanonicalGraphSingleSampleResources(config).get_mem(), + priority: 100 + log: utils.get_log_path(PRIMARIZE_CANONICAL_GRAPH_SINGLE_SAMPLE_RULE, config, ['sample_id']) + shell: + """ + echo "{input}" | {time_cmd} {metagraph_cmd} transform {verbose_opt} \ + --to-fasta \ + --primary-kmers \ + --parallel {threads} \ + -o {output} > {log} 2>&1 + """ + + +BUILD_JOINT_GRAPH_RULE="build_joint_graph" +rule build_joint_graph: + input: utils.get_build_joint_input(config, contigs_dir, seq_ids_dict, seqs_file_list_path) + output: temp(canonical_graph_path) + threads: max_threads + resources: + mem_mb=BuildGraphResources(BUILD_JOINT_GRAPH_RULE, config).get_mem(), + disk_mb=BuildGraphResources(BUILD_JOINT_GRAPH_RULE, config).get_disk(), + params: + k=config['k'], + separate_build=str(bool(config[workflow_configs.PRIMARIZE_SAMPLES_SEPARATELY])).lower(), + tempdir_opt=utils.temp_dir_config(config), + mem_buffer=BuildGraphResources(BUILD_JOINT_GRAPH_RULE, config).get_mem_buffer_gib(), + disk_cap=BuildGraphResources(BUILD_JOINT_GRAPH_RULE, config).get_disk_cap(), + log: utils.get_log_path(BUILD_JOINT_GRAPH_RULE, config) + shell: + """ + if {params.separate_build}; then + SEQ_PATHS={wdir}/seqs_paths.txt + echo "{input}" | tr ' ' '\n' > $SEQ_PATHS + else + SEQ_PATHS="{input}" + fi + + cat $SEQ_PATHS | {time_cmd} {metagraph_cmd} build {verbose_opt} \ + --parallel {threads} \ + --mode canonical \ + -k {params.k} \ + -o {output} \ + --mem-cap-gb {params.mem_buffer} \ + --disk-cap-gb {params.disk_cap} \ + {params.tempdir_opt} > {log} 2>&1 + + """ + +PRIMARIZE_JOINT_GRAPH_RULE="primarize_joint_graph" +rule primarize_joint_graph: + input: canonical_graph_path + output: temp(joint_contigs_path) + threads: max_threads + resources: + mem_mb=ResourceConfig(PRIMARIZE_JOINT_GRAPH_RULE, config).get_mem(), + log: utils.get_log_path(PRIMARIZE_JOINT_GRAPH_RULE, config) + shell: + """ + echo "{input}" | {time_cmd} {metagraph_cmd} transform {verbose_opt} \ + --to-fasta \ + --primary-kmers \ + --parallel {threads} \ + -o {output} > {log} 2>&1 + """ + + +BUILD_JOINT_PRIMARY_RULE="build_joint_primary" +rule build_joint_primary: + input: joint_contigs_path + output: graph_path + threads: max_threads + resources: + mem_mb=BuildGraphResources(BUILD_JOINT_PRIMARY_RULE, config).get_mem(), + disk_mb=BuildGraphResources(BUILD_JOINT_PRIMARY_RULE, config).get_disk(), + params: + k=config['k'], + tempdir_opt=utils.temp_dir_config(config), + mem_buffer=BuildGraphResources(BUILD_JOINT_PRIMARY_RULE, config).get_mem_buffer_gib(), + disk_cap=BuildGraphResources(BUILD_JOINT_PRIMARY_RULE, config).get_disk_cap() + log: utils.get_log_path(BUILD_JOINT_PRIMARY_RULE, config) + shell: + """ + {time_cmd} {metagraph_cmd} build {verbose_opt} \ + --parallel {threads} \ + --mode primary \ + -k {params.k} \ + -o {output} \ + --mem-cap-gb {params.mem_buffer} \ + --disk-cap-gb {params.disk_cap} \ + {input} \ + {params.tempdir_opt} > {log} 2>&1 + """ diff --git a/metagraph/workflows/metagraph_workflows/snakemake/default.yml b/metagraph/workflows/metagraph_workflows/snakemake/default.yml new file mode 100644 index 0000000000..3dae43d756 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/default.yml @@ -0,0 +1,30 @@ +# graph options +k: 31 +base_name: 'graph' +build_primary_graph: False + +primarize_samples_separately: False + +kmc_mem_mb_per_thread: 2048 +kmc_max_bins: 2000 +kmc_mem_overhead_factor: 1.1 + +# annotation options +annotation_formats: ['relax.row_diff_brwt'] +annotation_labels_source: 'sequence_headers' + +brwt_relax_arity: 32 +brwt_parallel_nodes: 5 +brwt_linkage_subsample: 100000 + +metagraph_cmd: 'metagraph' +gnu_time_cmd: '/usr/bin/time' + +default_disk_mb: 10000 +max_memory_mb: 4048 +max_buffer_size_mb: 51200 + +sample_ids_path: '' +sample_staging_script_path: '' +sample_staging_script_additional_options: '' +sample_staging_file_ending: '.fastq' diff --git a/metagraph/workflows/metagraph_workflows/snakemake/run_snakemake_test.sh b/metagraph/workflows/metagraph_workflows/snakemake/run_snakemake_test.sh new file mode 100755 index 0000000000..a3c6776f16 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/run_snakemake_test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd ${SCRIPT_DIR} + +CORES=2 +snakemake --configfile default.yml test_workflow/test.yml -p --cores ${CORES} "$@" diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_data/metasub_fake_data.fa b/metagraph/workflows/metagraph_workflows/snakemake/test_data/metasub_fake_data.fa new file mode 100644 index 0000000000..5ba43a6650 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_data/metasub_fake_data.fa @@ -0,0 +1,18 @@ +>kl_sample;metasub_name=nan;city=kuala_lumpur;latitude=3.11318;longitude=101.68186;surface_material=metal +CTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAG +AACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGAT +TCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACATGCTG +TTGGCCTGGATCTGAGCCCTCGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCT +GTGTGGAAGTTCACTCAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGC +TGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGG +>zh_sample;metasub_name=nan;city=zurich;latitude=nan;longitude=nan;surface_material=nan;station=nan;num_reads=1303795.0 +CTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAG +AACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGAT +TCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACATGCTG +TTGGCCTGGATCTGAGCCCTCGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCT +GTGTGGAAGTTCACTCAAGTAGGCCTCTTCCTG +>tk_sample;metasub_name=nan;city=tokyo;latitude=35.6973;longitude=139.6339;surface_material=plastic +TCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACATGCTG +TTGGCCTGGATCTGAGCCCTCGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCT +GTGTGGAAGTTCACTCAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGC +TGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGC diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_data/transcripts_100.fa b/metagraph/workflows/metagraph_workflows/snakemake/test_data/transcripts_100.fa new file mode 100644 index 0000000000..00313cb29f --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_data/transcripts_100.fa @@ -0,0 +1,1649 @@ +>ENST00000456328.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000362751.1|DDX11L1-202|DDX11L1|1657|processed_transcript| +GTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTATTTGCTGTC +TCTTAGCCCAGACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACAGGGTCTTGA +TGCTGTGGTCTTCATCTGCAGGTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTG +CAAGCTGAGCACTGGAGTGGAGTTTTCCTGTGGAGAGGAGCCATGCCTAGAGTGGGATGG +GCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCAT +AGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAACCTAGGCCAG +TGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACG +ACGGCCGACTTGGATCACACTCTTGTGAGTGTCCCCAGTGTTGCAGAGGCAGGGCCATCA +GGCACCAAAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTG +TCCTGGACACGCTGTTGGCCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTT +CTGCCATTGCTGCTGTGTGGAAGTTCACTCCTGCCTTTTCCTTTCCCTAGAGCCTCCACC +ACCCCGAGATCACATTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTC +TTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCCGCTG +GAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGG +AGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGA +AAACAGGGGAATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCA +GCTCCCCTCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTC +ACTGGGCCTCTGCAGGAGGCTGCCATTTGTCCTGCCCACCTTCTTAGAAGCGAGACGGAG +CAGACCCATCTGCTACTGCCCTTTCTATAATAACTAAAGTTAGCTGCCCTGGACTATTCA +CCCCCTAGTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGT +CACCTCCCCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACA +GCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCT +CCCTCCCTCTCAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATT +GTGCCAAGGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTG +AAACCTTTTTTGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCC +TTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCC +TAGGGACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGC +TAGAGATCCTTTATTAAAAGCACACTGTTGGTTTCTG +>ENST00000450305.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000002844.2|DDX11L1-201|DDX11L1|632|transcribed_unprocessed_pseudogene| +GTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGTTGGAGGAAAGA +TGAGTGAGAGCATCAACTTCTCTCACAACCTAGGCCAGTGTGTGGTGATGCCAGGCATGC +CCTTCCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTC +TTCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAA +GGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGT +GCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGGCCTGGATCTG +AGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAATTTCAC +CAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTT +TGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAA +AGGTGAAACCCAGGAGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTA +GTGCCCGTTGGAGAAAACAGGGGAATCCCGAA +>ENST00000488147.1|ENSG00000227232.5|OTTHUMG00000000958.1|OTTHUMT00000002839.1|WASH7P-201|WASH7P|1351|unprocessed_pseudogene| +ATGGGAGCCGTGTGCACGTCGGGAGCTCGGAGTGAGCGCACCATGACTCCTGTGAGGATG +CAGCACTCCCTGGCAGGTCAGACCTATGCCGTGCCCTTCATCCAGCCAGACCTGCGGCGA +GAGGAGGCCGTCCAGCAGATGGCGGATGCCCTGCAGTACCTGCAGAAGGTCTCTGGAGAC +ATCTTCAGCAGGTAGAGCAGAGCCGGAGCCAGGTGCAGGCCATTGGAGAGAAGGTCTCCT +TGGCCCAGGCCAAGATTGAGAAGATCAAGGGCAGCAAGAAGGCCATCAAGGTGTTCTCCA +GTGCCAAGTACCCTGCTCCAGGGCGCCTGCAGGAATATGGCTCCATCTTCACGGGCGCCC +AGGACCCTGGCCTGCAGAGACGCCCCCGCCACAGGATCCAGAGCAAGCACCGCCCCCTGG +ACGAGCGGGCCCTGCAGGAGAAGCTGAAGGACTTTCCTGTGTGCGTGAGCACCAAGCCGG +AGCCCGAGGACGATGCAGAAGAGGGACTTGGGGGTCTTCCCAGCAACATCAGCTCTGTCA +GCTCCTTGCTGCTCTTCAACACCACCGAGAACCTGTAGAAGAAGTATGTCTTCCTGGACC +CCCTGGCTGGTGCTGTAACAAAGACCCATGTGATGCTGGGGGCAGAGACAGAGGAGAAGC +TGTTTGATGCCCCCTTGTCCATCAGCAAGAGAGAGCAGCTGGAACAGCAGGTCCCAGAGA +ACTACTTCTATGTGCCAGACCTGGGCCAGGTGCCTGAGATTGATGTTCCATCCTACCTGC +CTGACCTGCCCGGCATTGCCAACGACCTCATGTACATTGCCGACCTGGGCCCCGGCATTG +CCCCCTCTGCCCCTGGCACCATTCCAGAACTGCCCACCTTCCACACTGAGGTAGCCGAGC +CTCTCAAGACCTACAAGATGGGGTACTAACACCACCCCCACCGCCCCCACCACCACCCCC +AGCTCCTGAGGTGCTGGCCAGTGCACCCCCACTCCCACCCTCAACCGCGGCCCCTGTAGG +CCAAGGCGCCAGGCAGGACGACAGCAGCAGCAGCGCGTCTCCTTCAGTCCAGGGAGCTCC +CAGGGAAGTGGTTGACCCCTCCGGTGGCTGGCCACTCTGCTAGAGTCCATCCGCCAAGCT +GGGGGCATCGGCAAGGCCAAGCTGCGCAGCATGAAGGAGCGAAAGCTGGAGAAGCAGCAG +CAGAAGGAGCAGGAGCAAGTGAGAGCCACGAGCCAAGGTGGGCACTTGATGTCGCTCCAT +GGGGGGACGGCTCCACCCAGCCTGCGCCACTGTGTTCTTAAGAGGCTTCCAGAGAAAACG +GCACACCAATCAATAAAGAACTGAGCAGAAA +>ENST00000619216.1|ENSG00000278267.1|-|-|MIR6859-1-201|MIR6859-1|68|miRNA| +TGTGGGAGAGGAACATGGGCTCAGGACAGCGGGTGTCAGCTTGCCTGACCCCCATGTCGC +CTCTGTAG +>ENST00000473358.1|ENSG00000243485.5|OTTHUMG00000000959.2|OTTHUMT00000002840.1|MIR1302-2HG-202|MIR1302-2HG|712|lincRNA| +GTGCACACGGCTCCCATGCGTTGTCTTCCGAGCGTCAGGCCGCCCCTACCCGTGCTTTCT +GCTCTGCAGACCCTCTTCCTAGACCTCCGTCCTTTGTCCCATCGCTGCCTTCCCCTCAAG +CTCAGGGCCAAGCTGTCCGCCAACCTCGGCTCCTCCGGGCAGCCCTCGCCCGGGGTGCGC +CCCGGGGCAGGACCCCCAGCCCACGCCCAGGGCCCGCCCCTGCCCTCCAGCCCTACGCCT +TGACCCGCTTTCCTGCGTCTCTCAGCCTACCTGACCTTGTCTTTACCTCTGTGGGCAGCT +CCCTTGTGATCTGCTTAGTTCCCACCCCCCTTTAAGAATTCAATAGAGAAGCCAGACGCA +AAACTACAGATATCGTATGAGTCCAGTTTTGTGAAGTGCCTAGAATAGTCAAAATTCACA +GAGACAGAAGCAGTGGTCGCCAGGAATGGGGAAGCAAGGCGGAGTTGGGCAGCTCGTGTT +CAATGGTTTTGTCCGCCTTCCCTGCCTCCTCTTCTGGGGGAGTTAGATCGAGTTGTAACA +AGAACATGCCACTGTCTCGCTGGCTGCAGCGTGTGGTCCCCTTACCAGAGTGAGGATGCG +AAGAGAAGGTGGCTGTCTGCAAACCAGGAAGAGAGCCCTCACCGGGAACCCGTCCAGCTG +CCACCTTGAACTTGGACTTCCAAGCCTCCAGAACTGTGAGGGATAAATGTAT +>ENST00000469289.1|ENSG00000243485.5|OTTHUMG00000000959.2|OTTHUMT00000002841.2|MIR1302-2HG-201|MIR1302-2HG|535|lincRNA| +TCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCTGGAATCCGTTGGCTTGCCTCCGGCATT +TTTGGCCCTTGCCTTTTAGGGTTGCCAGATTAAAAGACAGGATGCCCAGCTAGTTTGAAT +TTTAGATAAACAACGAATAATTTCGTAGCATAAATATGTCCCAAGCTTAGTTTGGGACAT +ACTTATGCTAAAAAACATTATTGGTTGTTTATCTGAGATTCAGAATTAAGCATTTTATAT +TTTATTTGCTGCCTCTGGCCACCCTACTCTCTTCCTAACACTCTCTCCCTCTCCCAGTTT +TGTCCGCCTTCCCTGCCTCCTCTTCTGGGGGAGTTAGATCGAGTTGTAACAAGAACATGC +CACTGTCTCGCTGGCTGCAGCGTGTGGTCCCCTTACCAGAGTGAGGATGCGAAGAGAAGG +TGGCTGTCTGCAAACCAGGAAGAGAGCCCTCACCGGGAACCCGTCCAGCTGCCACCTTGA +ACTTGGACTTCCAAGCCTCCAGAACTGTGAGGGATAAATGTATGATTTTAAAGTC +>ENST00000607096.1|ENSG00000284332.1|-|-|MIR1302-2-201|MIR1302-2|138|miRNA| +GGATGCCCAGCTAGTTTGAATTTTAGATAAACAACGAATAATTTCGTAGCATAAATATGT +CCCAAGCTTAGTTTGGGACATACTTATGCTAAAAAACATTATTGGTTGTTTATCTGAGAT +TCAGAATTAAGCATTTTA +>ENST00000417324.1|ENSG00000237613.2|OTTHUMG00000000960.1|OTTHUMT00000002842.1|FAM138A-201|FAM138A|1187|lincRNA| +CACACAACGGGGTTTCGGGGCTGTGGACCCTGTGCCAGGAAAGGAAGGGCGCAGCTCCTG +CAATGCGGAGCAGCCAGGGCAGTGGGCACCAGGCTTTAGCCTCCCTTTCTCACCCTACAG +AGGGCAGGCCCTTCAGCTCCATTCTCCTCCAAGGCTGCAGAGGGGGCAGGAATTGGGGGT +GACAGGAGAGCTGTAAGGTCTCCAGTGGGTCATTCTGGGCCCAGAGATGGGTGCTGAAGC +TCCCACGCCTGCCTGTGAAAATGGAGTCCTCTCTCACCTGGGAGAGCCAGGTGCTGCCCC +GAGAAGGATGCATTTATGGCTTCGTGAAGTCTTTCCTGACCCCCGATGCTGCTGACTATA +GAGACAAAGTCTCACTATGTTGCTCAGGCTGGTCTTGAACTCCTGGCCTCAAGCGATCCT +CCCACCTCAGCCTCCCAAAGTGTTGGGATTATAGACATGAGCCACTGCACCTGGCCGACC +TTGGGCAAGTTCTTAAACCCTTCAAAGCCTCATTTTTCTCCAATCACAAAAGGGAAAGAT +GGTAATATTTTCCCCACCAAATTCTTGTCGGATGCCCTCACAGAATTGAGATTATGTACG +TAAAACACCAGGTGCCTAACCCGGCACAGAGCAGGAGGGCTAAGCGTGACATCCAGCACG +TGGTCAGTGGAATCCAGTATTCCTACCCACCTCTCTAGTCTCCCCTCCACCCCTCTCCCT +TTCAGAGGCACCAAGCTGCTTGTGGTCTTGTCTATTCCCACTCCCTGCCTGACTGAACAT +TTTCTCCACCTCCTGATCATCAGCAGCAGAAACTGGCTGCTCTTCCTCCTGGGTAGACAG +CCAGACTGTATTTCCCAGCTGCCCCTGCAGTGAGATGTGGCCATCGGAGCCAGCATTGGC +CAATGGACTCTGCATGGGAGTGACGCATGCTGCCTCCAGGCTTGTCCCTAAAACCTCCCA +CGTGTCCTCCGCCTGCTCTTCCCACCTCCAAGGAGCACGGCAATTGTGGAAGACCCAGAT +TAGTGATGGCAGAACCATAGATGGGAGGAACCTGGGTCCCTGACTTAAAGTATCATGGAT +TTGGATGTTCCCTTAGTGAGAAATAAACTTCCATTGTGTTTAAGCCTTTATTTGTTTATA +GTTGGTTACAGCAACTGCCTTCTTTTAATTAAAACACTCCTGCTGCT +>ENST00000461467.1|ENSG00000237613.2|OTTHUMG00000000960.1|OTTHUMT00000002843.1|FAM138A-202|FAM138A|590|lincRNA| +GGGGTTTCGGGGCTGTGGACCCTGTGCCAGGAAAGGAAGGGCGCAGCTCCTGCAATGCGG +AGCAGCCAGGGCAGTGGGCACCAGGCTTTAGCCTCCCTTTCTCACCCTACAGAGGGCAGG +CCCTTCAGCTCCATTCTCCTCCAAGGCTGCAGAGGGGGCAGGAATTGGGGGTGACAGGAG +AGCTGTAAGGTCTCCAGTGGGTCATTCTGGGCCCAGAGATGGGTGCTGAAGCTCCCACGC +CTGCCTGTGAAAATGGAGTCCTCTCTCACCTGGGAGAGCCAGGTGCTGCCCCGAGAAGGA +TGCATTTATGGCTTCGTGAAGTCTTTCCTGACCCCCGATGCTGCTGACTATAGAGACAAA +GTCTCACTATGTTGCTCAGGCTGGTCTTGAACTCCTGGCCTCAAGCGATCCTCCCACCTC +AGCCTCCCAAAGTGTTGGGATTATAGACATGAGCCACTGCACCTGGCCGACCTTGGGCAA +GTTCTTAAACCCTTCAAAGCCTCATTTTTCTCCAATCACAAAAGGGAAAGATGGTAATAT +TTTCCCCACCAAATTCTTGTAAGTATTAAACATTGTATATGTATTTTGAA +>ENST00000606857.1|ENSG00000268020.3|OTTHUMG00000185779.1|OTTHUMT00000471235.1|AL627309.6-201|AL627309.6|840|unprocessed_pseudogene| +GCGGTATCTAAATTTGTATTGATTGGACTTTCAAGCTCTTGGGAGATGCATCTTTTTCTT +TTTTGGTTCTTCTCTGTGTTCTACATGGGAATTATCCTGGAAAATCTCTTCATTGTGTTC +ACAGTAATTATTGACTCTCATTTAAATTCCCCAGGTACTGCCTACTGGCCAACATTTATC +TTCTTGATCTGGGTCTTCTCCTACAGTTCTGACTTTTTCACTAACTGCAGCATCATTTCT +TTTCCAAGATGCATCATACAGATATTTTTCATTTGTGTCATGCGTAAAAATTGAGATGGT +GCTGCTCATAACCATGGCATAGAGCAGGTACACTGCCAATCTGTAAGCCTCCCCATTACC +TGACCACAATGAACCCCAAAATGTGTGTTTCCTTTGTTGGAGGCATCCTGGATAGTCAGG +ATAATCCATGCTGTATCTCAGTTTGTTTTTGCCATAAACTTGCCTTTTTGTGGCCCTAAT +AGAGTAGGTAGTTTTCACTGTGATTTTCCTTATGTCATGAAACTTGCTTGTGTAGACACT +TACAAACTAGAGGTTGTAGTCACTGCTAACAGTGGGCTTATATCCATAGCTACCTGTTTC +TTATTAATAATATCCTATATTTTCATTTCGGTAACCGTCTAGAATCCTTCTTCAGGAGAC +TTATCTAAAGCATTTGTGTCATGTTAGATCACATCACAGTAGGGATTTTGTTTTTTATGC +CATGTATATTTCTGTATGTGTAGCCTTTGCCTAAAACAACACATGATTAATATTTGTTCA +TTGTTCCTTTTGCTATCACCCCTGTCTAGGATCTACACATTAAGAAACAAAGACATGAAC +>ENST00000642116.1|ENSG00000240361.2|OTTHUMG00000001095.3|OTTHUMT00000492680.1|OR4G11P-202|OR4G11P|1414|processed_transcript| +AGCTATCTGAATTTCTCCTTCTCCTAAAAATGCACATCCTATGACTGAAAAGACAGGAAA +GATGACAAACTCTATGGAAACTTGGCTTCTGAAGAACTCCTAGAAGCTTTCCAAAGTCAT +CAGTGTTTCCTAAGAAGGCAGAGAAATCAAACACATGGTCTTTTCCTCCAGACAAGCTCC +TTTGGGTCATCAGGATTTCTTCAACAATAAAATGTTGCTTCAGAGTCTTCCCTTCTATCT +GATTCAGTGGACCAAGTAAATGACTCTCTGGTAACAGAATTTGTATTACTTGGACTTGCA +CAATCCTTGGAAATGCAGTTTTTCCTTTTTCTCTTCTTCTCTTTATTCTATGTGGGAATT +ATCCTGGGAAAACTCTTCATTGTGTTCACAGTGATCTTTGATCCTCACTTACACTCCCCC +ATGTATATTCTGCTGGCCAACCTATCGCTCATTGACTTGAGCCTTTCATCTACCACAGTT +CCTAGGTTGATCTACGATCTTTTTACTGATTGTAAAGTTATTTCCTTCCATAATTGCATG +ATACAAAAGTTCTTTATCCATGTTATGGGAGGAGTTGAAATGGTGCTGCTGATAGTCATG +GCATATGATAGGTACACTGCGATCTGCAAGCCTCTCCACTATCCAACTATTATGAATCCC +AAAATGTGCATGTTTTTGGTAGCAGCAGCTTGGGTCATTGGGGTGATTCATGCTATGTCT +CAGTTTGTTTTTGTCATAAATTTACCCTTCTGTGGCCCTAATAATGTGGGGAGCTTTTAT +TGTGATTTTCCTCGGGTTATTAAACTTGCATGCATGGACACTTATGGGCTAGAATTTGTG +GTCACTGCCAACAGTGGATTCATATCGATGGGCACCTTCTTTTTCTTAATTGTATCATAC +ATTTTTATTCTGGTCACTGTCCAACGACATTCCTCAAATGATTTATCCAAAGCATTCTTC +ACTTCGTCGGCTCACATCACCGTAGTGGTTTTGTTTTTTGCTCCATGCATGTTTCTCTAC +GTGTGGCCTTTCCCTACTAAGTCATTGGATAAATTTTTTGCCATCATGAACTTTGTTGTC +ACCCCTGTCGTAAATCCTGCCATCTATACTTTAAGGAACAAAGATATGAAGTTTGCAATG +AGAAGGCTGAATCAACATATTTTAAATTCTATGGAGACGACATAACACATTTGGTTGATG +AGAGCACAGGATAAATGCCATGGACCATCAAGACTCCTGTGATCACCATGATCACTATGG +AACGCGCACATTTTTAGTATTGCCTGAAAAAACTGAAAAATCTGCAAAAAGGATGCATTA +AATCTAAGAATTGTATTTCAGATAAAGTTGCAACATTTTTTGTTAATCATAAAAAGTATA +TATTTCTATCTAATGTGTGTATCTAATTAACAGC +>ENST00000492842.2|ENSG00000240361.2|OTTHUMG00000001095.3|OTTHUMT00000003224.3|OR4G11P-201|OR4G11P|939|transcribed_unprocessed_pseudogene| +GTGGACCAAGTAAATGACTCTCTGGTAACAGAATTTGTATTACTTGGACTTGCACAATCC +TTGGAAATGCAGTTTTTCCTTTTTCTCTTCTTCTCTTTATTCTATGTGGGAATTATCCTG +GGAAAACTCTTCATTGTGTTCACAGTGATCTTTGATCCTCACTTACACTCCCCCATGTAT +ATTCTGCTGGCCAACCTATCGCTCATTGACTTGAGCCTTTCATCTACCACAGTTCCTAGG +TTGATCTACGATCTTTTTACTGATTGTAAAGTTATTTCCTTCCATAATTGCATGATACAA +AAGTTCTTTATCCATGTTATGGGAGGAGTTGAAATGGTGCTGCTGATAGTCATGGCATAT +GATAGGTACACTGCGATCTGCAAGCCTCTCCACTATCCAACTATTATGAATCCCAAAATG +TGCATGTTTTTGGTAGCAGCAGCTTGGGTCATTGGGGTGATTCATGCTATGTCTCAGTTT +GTTTTTGTCATAAATTTACCCTTCTGTGGCCCTAATAATGTGGGGAGCTTTTATTGTGAT +TTTCCTCGGGTTATTAAACTTGCATGCATGGACACTTATGGGCTAGAATTTGTGGTCACT +GCCAACAGTGGATTCATATCGATGGGCACCTTCTTTTTCTTAATTGTATCATACATTTTT +ATTCTGGTCACTGTCCAACGACATTCCTCAAATGATTTATCCAAAGCATTCTTCACTTCG +TCGGCTCACATCACCGTAGTGGTTTTGTTTTTTGCTCCATGCATGTTTCTCTACGTGTGG +CCTTTCCCTACTAAGTCATTGGATAAATTTTTTGCCATCATGAACTTTGTTGTCACCCCT +GTCGTAAATCCTGCCATCTATACTTTAAGGAACAAAGATATGAAGTTTGCAATGAGAAGG +CTGAATCAACATATTTTAAATTCTATGGAGACGACATAA +>ENST00000641515.1|ENSG00000186092.5|OTTHUMG00000001094.3|OTTHUMT00000003223.3|OR4F5-202|OR4F5|2618|protein_coding| +CCCAGATCTCTTCAGTTTTTATGCCTCATTCTGTGAAAATTGCTGTAGTCTCTTCCAGTT +ATGAAGAAGGTAACTGCAGAGGCTATTTCCTGGAATGAATCAACGAGTGAAACGAATAAC +TCTATGGTGACTGAATTCATTTTTCTGGGTCTCTCTGATTCTCAGGAACTCCAGACCTTC +CTATTTATGTTGTTTTTTGTATTCTATGGAGGAATCGTGTTTGGAAACCTTCTTATTGTC +ATAACAGTGGTATCTGACTCCCACCTTCACTCTCCCATGTACTTCCTGCTAGCCAACCTC +TCACTCATTGATCTGTCTCTGTCTTCAGTCACAGCCCCCAAGATGATTACTGACTTTTTC +AGCCAGCGCAAAGTCATCTCTTTCAAGGGCTGCCTTGTTCAGATATTTCTCCTTCACTTC +TTTGGTGGGAGTGAGATGGTGATCCTCATAGCCATGGGCTTTGACAGATATATAGCAATA +TGCAAGCCCCTACACTACACTACAATTATGTGTGGCAACGCATGTGTCGGCATTATGGCT +GTCACATGGGGAATTGGCTTTCTCCATTCGGTGAGCCAGTTGGCGTTTGCCGTGCACTTA +CTCTTCTGTGGTCCCAATGAGGTCGATAGTTTTTATTGTGACCTTCCTAGGGTAATCAAA +CTTGCCTGTACAGATACCTACAGGCTAGATATTATGGTCATTGCTAACAGTGGTGTGCTC +ACTGTGTGTTCTTTTGTTCTTCTAATCATCTCATACACTATCATCCTAATGACCATCCAG +CATCGCCCTTTAGATAAGTCGTCCAAAGCTCTGTCCACTTTGACTGCTCACATTACAGTA +GTTCTTTTGTTCTTTGGACCATGTGTCTTTATTTATGCCTGGCCATTCCCCATCAAGTCA +TTAGATAAATTCCTTGCTGTATTTTATTCTGTGATCACCCCTCTCTTGAACCCAATTATA +TACACACTGAGGAACAAAGACATGAAGACGGCAATAAGACAGCTGAGAAAATGGGATGCA +CATTCTAGTGTAAAGTTTTAGATCTTATATAACTGTGAGATTAATCTCAGATAATGACAC +AAAATATAGTGAAGTTGGTAAGTTATTTAGTAAAGCTCATGAAAATTGTGCCCTCCATTC +CCATATAATTTAGTAATTGTCTAGGAACTTCCACATACATTGCCTCAATTTATCTTTCAA +CAACTTGTGTGTTATATTTTGGAATACAGATACAAAGTTATTATGCTTTCAAAATATTCT +TTTGCTAATTCTTAGAACAAAGAAAGGCATAAATATATTAGTATTTGTGTACACCTGTTC +CTTCCTGTGTGACCCTAAGTTTAGTAGAAGAAAGGAGAGAAAATATAGCCTAGCTTATAA +ATTTAAAAAAAAATTTATTTGGTCCATTTTGTGAAAAACATAAAAAAAGAACTGTCACAT +CTTAATTTAAAAAATATATGCTTAGTGGTAAGGAGATATATGTCAACTTTTAAGAGGTTG +AAAAACAAACGCCTCCCATTATAAGTTTATACTTCACCTCCCACCACTATAACAACCCAG +AATCCATGAGGGCATTATCAGGAGTGAGTGGAAGAGTAAGTTTGCCAATGTGAAATGTGC +CTTCTAGGTCCTAGACGTCTGTGGTATAACTGCTCATAAGCAGTAGAAAGAATTTAGAGG +GATCCAGGCTCTCATCACGTTGGCACAAAGTATATTACTTGGATCCATCTATGTCATTTT +CCATGGTTAATGTTTAAAAGCACAGGCTTTAAAGTAAAAAACAAAGAGCTGGATTCAACT +CTACTGACTCTTATTAATCATGATTTTGGGCACATTACGTAGCTTTCATGAGCTTTAGTT +TCTACATTTATAAACAGGAGATTATACCTATTATGCATGGTTATTATGAAGGAAAATGAC +AAAATAGATATAAATCAAATAGCCCACTTCGAGACATATTAAGCATGAATAAACATTAGA +TACTATTAAAATCCTATATATTAACAAAGCCAAAAGTTTCAAACTTTACTTTTTCCCAAC +ATTCTTGTGAAATATGACACATCCCAATCTTAACAGATGCTCATTTGGGATACTGTACTT +GTGAGTGGAAGTGTGTATATTTGTGTGCAAGTGTGTACTCATATACTTCCACCTTACCAC +CCTAGAAAGGCATGATGAAAATTTAAGATAGAAGGAAAATATAAATTGAAAAAAAAAAAC +CTTAACAAATGATTCTGACAAATATCTTCTCTTTCCAGGGAGAATCACTGAGCCAGAATA +AAATTGAACACTAAATATTCTAAGAAAAAAGGAATCTAGTTTGTCAAAATGTGACTTGAA +TTAATAGATAAGGAGAGTCAGATGATAAGAGGGTCAAAATTATGTTTATCTTAGGAAAAG +TAGAATAGAAAATTTATAAGCAGATTAAAAACACATAATAAAAGTAGTAAATAATAATGA +CAGTATCTCAAATCAGTGCAGGGGGGAAAGGCCTACTAATGTGATGGTGGGATAATTGGA +TAGCAATATGGGAAAAGATATATTTAATTTATTTGCTACACCAAATGCCAGGACAATCTC +TAAGTGAATTCAAGACATAACTCTTTTTTCAAAAAAAC +>ENST00000335137.4|ENSG00000186092.5|OTTHUMG00000001094.3|-|OR4F5-201|OR4F5|1054|protein_coding| +TCCTGGAATGAATCAACGAGTGAAACGAATAACTCTATGGTGACTGAATTCATTTTTCTG +GGTCTCTCTGATTCTCAGGAACTCCAGACCTTCCTATTTATGTTGTTTTTTGTATTCTAT +GGAGGAATCGTGTTTGGAAACCTTCTTATTGTCATAACAGTGGTATCTGACTCCCACCTT +CACTCTCCCATGTACTTCCTGCTAGCCAACCTCTCACTCATTGATCTGTCTCTGTCTTCA +GTCACAGCCCCCAAGATGATTACTGACTTTTTCAGCCAGCGCAAAGTCATCTCTTTCAAG +GGCTGCCTTGTTCAGATATTTCTCCTTCACTTCTTTGGTGGGAGTGAGATGGTGATCCTC +ATAGCCATGGGCTTTGACAGATATATAGCAATATGCAAGCCCCTACACTACACTACAATT +ATGTGTGGCAACGCATGTGTCGGCATTATGGCTGTCACATGGGGAATTGGCTTTCTCCAT +TCGGTGAGCCAGTTGGCGTTTGCCGTGCACTTACTCTTCTGTGGTCCCAATGAGGTCGAT +AGTTTTTATTGTGACCTTCCTAGGGTAATCAAACTTGCCTGTACAGATACCTACAGGCTA +GATATTATGGTCATTGCTAACAGTGGTGTGCTCACTGTGTGTTCTTTTGTTCTTCTAATC +ATCTCATACACTATCATCCTAATGACCATCCAGCATCGCCCTTTAGATAAGTCGTCCAAA +GCTCTGTCCACTTTGACTGCTCACATTACAGTAGTTCTTTTGTTCTTTGGACCATGTGTC +TTTATTTATGCCTGGCCATTCCCCATCAAGTCATTAGATAAATTCCTTGCTGTATTTTAT +TCTGTGATCACCCCTCTCTTGAACCCAATTATATACACACTGAGGAACAAAGACATGAAG +ACGGCAATAAGACAGCTGAGAAAATGGGATGCACATTCTAGTGTAAAGTTTTAGATCTTA +TATAACTGTGAGATTAATCTCAGATAATGACACAAAATATAGTGAAGTTGGTAAGTTATT +TAGTAAAGCTCATGAAAATTGTGCCCTCCATTCC +>ENST00000466430.5|ENSG00000238009.6|OTTHUMG00000001096.2|OTTHUMT00000003225.1|AL627309.1-201|AL627309.1|2748|lincRNA| +CTGATCCATATGAATTCCTCTTATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAAC +TGACTATCACCTTGTTAATCATTCAGAAACATGTTGCAGGCTTAAGCCATTTTTGATATA +GATACTGAAACAATTACTTGCTAAGAGCAAACTTGAAGTAACAATTTGGACAAGACAGCA +AATGCTATTGTCCAAGTTTTCTAAAGAAGAATCTGAAGTGAAATGACATCAAGAGACCTA +TCAAGACCTGTATCCAGGAAAAGACCAAACCAATGCAGACCAAACCAATGCAGAACTCCT +ATGTGCTGATGGTGGTCTTACATTTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTT +CTCAGGACCTCCTGAAGCTGCGTCACAGGCACTAATCAAAGAACACAACCAAGAGTTTGG +CCTTTTCTTCAGCACTGGGAATTGTGATCCAAAGCTTTTCCCGATGAAGCACAAAGTTGG +AGAAAAAAAAACGCAAACTAAACAACCACAATGAAACAGAACAGAGTTAATCTGCTGTAG +CTCAAGAGAGGATGTACCTGCCCCCACCCCGCATCCCTGGGCTCGGGTTTGCCTTGCTGA +CCTCTGCTGCCACCTGGTGCCACACAGAGAAACTGAGGAGAAACCACATCAGTCTCCTTC +AGCCTCAGCTTCACATCTGTGGGTCAAGCAACCCTTTCAGAAGCTGTATAATGTGGGAAA +GCTTTCCTCTCAGGAAAATGCACACATCCAACTTTGAGAAGATGCCCTTGGGGGCGCTTC +AAGGATTCTAGATAATAACCCCCTTTCCCGAACATCCAAGAACCTAAGATTTTTTTTTTT +TTGAGAAAGTCTCGCTCTCTCTCCCAGTCTGGAGTGCAGTGGCGTGATCTTGGCTCACTG +CCAGCTCCACCTCCCAGGTTCACGCCATTCTCCTGCTTCAGCCTCCCAAGTAGCTGGGGC +TACAGGCACCTGCCACCAAACCCGGCTAATTTTTTTGTATTTTTAGTAGAGACGGGGTTT +CACCGTGTTAGCCAGGATCGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCGGCCTC +CCAAATTGCTGGGATTACAGATGTGAGCCACCGCACCTGGTCCAAGAACCCAAGTTTTAG +ATCTAGAGTGATGTCAGCATGACATTGATTTCCTGAGGCCCAGGGGCGAAGGAGCTGAGG +ACAGCAGAGGGGTGAAGGAACTCAGCTACAGACAGCAGCAGCTGATGCACAGGCCTCCCA +GCGCCTGAAGTCACCCGGAATTGGGAAGTGCTCAGAAGCTTACAAAGCTGCCTCGAGGTG +GGAACATGACATAAATCCAAGAGCAGATCCCTGATCCTATAAAAATGTACTAGATGCAGT +GGGGGCATTTTAAATGAGCAGAGAAGGACAGACAGATAAACAGAAGGACAAACAGTATTG +GGATTGGGATAAATGCTCAGCTTTTGCCCAAATCTTAGTGACTTAAGCATCACTTATTTG +CTCACGATTCTGTGGCTGGACCATTTGGTTTGGCTCACAGGGCAGGGACTGTGCTGGTCT +TACCTGAGCAGACCTGCATGTCTGCGGTCAACTGGGTTGGCAGAGACAGAGTGACTGTCT +TCCTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACAGAGGGACTGTCTT +CCTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACAGAGGGACTGTCTTC +CTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACTGAGGGACTGTCTTCC +TCCATGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACTGAGGGACTGTCTTCCT +CCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACTGAGGGACTGTCTTCCTC +CAGGAAGCAGCAGGTTAACTGATTGGCAGAGACAGAGGGACTGAGGGACTGTCTCCCCCC +AGGAAGCAGCAGGTTGGCTCTGTTTCCTTCGTGGGGCAGCTGGTCTCCAGGGCAGCAAGA +GAGACCAAGCCCCAGTGCACATTCTACAGCCTCTGTGCACATCAGACTTGTTAATATCCC +ATTGGCCAGTGTAAGTCACTTGGCCAAGCCCAGATTAAGGAGTGGAAAGATGGAGGCTAT +CTCCTCCTGGGAGAGGAGGCAAAGGAGGTGGGAGTATTATGTGGCCACTTATGTTTGCAA +TCTACCATACTTAGCACTTTGAGAAAAGAATTAACTGAGAAACTTGCTTCAAATAGGGCC +AGTAAAATGAAGCCCCAATTGAAGTAAAATGCATATATAAAAAATGAAACTGTGACCGAT +TTTAAGGACAGTATTGGCAAATATTTCTGTGCTCTTGGAGGAGAAGACCCTTATTGGCAT +GACATGTCAGAAACCACAATGAAAGAATTATTTTAACTTGCATTCATAAAAATTAAAATT +ATTCATTAAAAACATCGTGAATGAAATTAAAAGTCAAAATGTAAGCCAGAAAATTATTTA +CAACGTATGTGTCAGGAAAAGACAATACCCTTCAAACTTTGAGAGTTTACATCAGAAAGA +AAACAGCAAATGACATGATCCAAACTTGATAAAGGACATGAAAAAGAGCCAGCACTTAGT +ATGTTTTCTGAATGAATAAGTAGCCAACAGCACATGAAAATGCGTGTAATCCATTTGTAA +GCAGAGAAATGCAAACTAAAACAGTAAAGTGTCATTTTCACTTCCTGG +>ENST00000477740.5|ENSG00000238009.6|OTTHUMG00000001096.2|OTTHUMT00000003688.1|AL627309.1-202|AL627309.1|491|lincRNA| +GACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAGGAAGAG +AAAAAAAAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCTGAAGCA +CTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCCTGATCCATATGAATTC +CTCTTATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAACTGACTATCACCTTGTTA +ATCATTCAGAAACATGTTGCAGGCTTAAGCCATTTTTGATATAGATACTGAAACAATTAC +TTGCTAAGAGCAAACTTGAAGGTATGGATAAGGCCCTGAGTCATCTTCCTGAGCTGAATG +ATAGTTAAGCTGAATTAACAATTTGGACAAGACAGCAAATGCTATTGTCCAAGTTTTCTA +AAGAAGAATCTGAAGTGAAATGACATCAAGAGACCTATCAAGACCTGTATCCAGGAAAAG +ACCAAACCAAT +>ENST00000471248.1|ENSG00000238009.6|OTTHUMG00000001096.2|OTTHUMT00000003687.1|AL627309.1-203|AL627309.1|629|lincRNA| +GAAGCTCGAGGAAGAGAAAAAAAAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAA +AGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCT +AACAATTTGGACAAGACAGCAAATGCTATTGTCCAAGTTTTCTAAAGAAGAATCTGAAGT +GAAATGACATCAAGAGACCTATCAAGACCTGTATCCAGGAAAAGAGTCTTGCTCTTGTTG +CCCAGGCTGGAGTGCAATGGCATGATTTTGGCTCACTGTAAACTCCACCTCCTGGGTTCA +AGCGATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGTATTACAGGTGCCTGCCACCACACC +CAGCTAATTTTTGTATTTTTAGTAGAGATGGGGGTTTCACCATGTTGGCCAGCCTGGTCC +CGAACTCCTGACCTTAGGTGATCCACCAGCATCGGCCTCCCAAAGTGCTGGGATTACAGG +TGTGAGCCACTGTGTCCAGCCAGTGGTGGGTCTCATATCTCAATGTGGACTTTTACTAAC +TCCCAATGCCTCAGTTTCCTCATCAGTTGAAAGGAATGAATGAAAGATATGTGTTTTTCA +TATTACCAGGTAGATGATAAGGAGATTTT +>ENST00000610542.1|ENSG00000238009.6|OTTHUMG00000001096.2|-|AL627309.1-205|AL627309.1|723|lincRNA| +TGCACACATCTTCTTCTCCAAGGTTTGTGTGCAGAACATCCTGCCCATGCTGCCCCAGCA +GCTTCAGTTGGCACCTGCCTCAGTCCAGCCTCTGGGAACCATGCAGCAGCTCCCAGCGGC +CCTGCACCCACCACCAGCATCCGTTTCACCTGCAGTTGAAGATCCGTGAGGTGCCCAGAA +GATCATGCAGTCATCAGTCCCACGGAGCAGCCTGCGAGGCTGAGGCTCCTCCCACTGGAC +CGCCCCCCAACTGGCACCACTGCTGCCCCTGCCCCTACTCTCAGCCTCACGTGACTCTCG +GGCAGAAGCACTGGTGGGGCAGCCAGGGCAGCGTCAAGAGTCTGAGCCAGCTGCAGGACA +AGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAGGAAGAGAAAA +AAAAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCTGAAGCACTGC +AGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCCTGATCCATATGAATTCCTCT +TATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAACTATCACCTTGTTAATCATTCA +GAAACATGTTGCAGGCTTAAGCCATTTTTGATATAGATACTGAAACAATTACTTGCTAAG +AGCAAACTTGAAGGTATGGATAAGGCCCTGAGTCATCTTCCTGAGCTGAATGATAGTTAA +GCT +>ENST00000453576.2|ENSG00000238009.6|OTTHUMG00000001096.2|OTTHUMT00000003689.1|AL627309.1-204|AL627309.1|336|lincRNA| +GAAGATCCGTGAGGTGCCCAGAAGATCATGCAGTCATCAGTCCCACGGAGCAGCCTGCGA +GGCTGAGGCTCCTCCCACTGGACCGCCCCCCAACTGGCACCACTGCTGCCCCTGCCCCTA +CTCTCAGCCTCACGTGACTCTCGGGCAGAAGCACTGGTGGGGCAGCCAGGGCAGCGTCAA +GAGTCTGAGCCAGCTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGAT +AAGGAAGCTCGAGGAAGAGAAAAAAAAACTGGAAGGAGAAATCATAGATTTTTATAAAAT +GAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAG +>ENST00000495576.1|ENSG00000239945.1|OTTHUMG00000001097.2|OTTHUMT00000003226.2|AL627309.3-201|AL627309.3|1319|lincRNA| +TCAGCCTCCCAAGTAGCTGGGGCTACAGGCACCTGCCACCAAACCCGGCTAATTTTTTTG +TATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATCGTCTTGATCTCCTGACCT +TGTGATCCACCCGCCTCGGCCTCCCAAATTGCTGGGATTACAGATGTGAGCCACCGCACC +TGGTCCAAGAACCCAAGTTTTAGATCTAGAGTGATGTCAGCATGACATTGATTTCCTGAG +GCCCAGGGGCGAAGGAGCTGAGGACAGCAGAGGGGTGAAGGAACTCAGCTACAGACAGCA +GCAGCTGATGCACAGGCCTCCCAGCGCCTGAAGTCACCCGGAATTGGGAAGTGCTCAGAA +GCTTACAAAGCTGCCTCGAGGTGGGAACATGACATAAATCCAAGAGCAGATCCCTGATCC +TATAAAAATGTACTAGATGCAGTGGGGGCATTTTAAATGAGCAGAGAAGGACAGACAGAT +AAACAGAAGGACAAACAGTATTGGGATTGGGATAAATGCTCAGCTTTTGCCCAAATCTTA +GTGACTTAAGCATCACTTATTTGCTCACGATTCTGTGGCTGGACCATTTGGTTTGGCTCA +CAGGGCAGGGACTGTGCTGGTCTTACCTGAGCAGACCTGCATGTCTGCGGTCAACTGGGT +TGGCAGAGACAGAGTGACTGTCTTCCTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGA +CAGAGGGACAGAGGGACTGTCTTCCTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGAC +AGAGGGACAGAGGGACTGTCTTCCTCCAGGAAGCAGCAGGTTGGCTCTGTTTCCTTCGTG +GGGCAGCTGGTCTCCAGGGCAGCAAGAGAGACCAAGCCCCAGTGCACATTCTACAGCCTC +TGTGCACATCAGACTTGTTAATATCCCATTGGCCAGTGTAAGTCACTTGGCCAAGCCCAG +ATTAAGGAGTGGAAAGATGGAGGCTATCTCCTCCTGGGAGAGGAGGCAAAGGAGGTGGGA +GTATTATGTGGCCACTTATGTTTGCAATCTACCATACTTAGCACTTTGAGAAAAGAATTA +ACTGAGAAACTTGCTTCAAATAGGGCCAGTAAAATGAAGCCCCAATTGAAGTAAAATGCA +TATATAAAAAATGAAACTGTGACCGATTTTAAGGACAGTATTGGCAAATATTTCTGTGCT +CTTGGAGGAGAAGACCCTTATTGGCATGACATGTCAGAAACCACAATGAAAGAATTATTT +TAACTTGCATTCATAAAAATTAAAATTATTCATTAAAAACATCGTGAATGAAATTAAAA +>ENST00000442987.3|ENSG00000233750.3|OTTHUMG00000001257.3|OTTHUMT00000003691.3|CICP27-201|CICP27|3812|processed_pseudogene| +CCCCTTCTCTGGGCCCAAGCCACCTTGGCTGAGGAGGGGGCGAGGAGGTGTGAGCCCCTG +CCAGGAACCCCCTGCCCGGACCAAGTGCTCGGCCCCCAGGCCTGCGTTCAGTGAGGCCTC +CCGTGGCGTCAGCATGTTCGTGTGGAGGAATGTGGAAGGTCACTCTGCGGCCGTGTTCTC +CTGGTACTCCATCCCCTTCCTGACCCCTCCCTGCAGCCACACGAGGCCCAGCAACCTGCC +AGTCACTCAGTGGCCTCCAACCAGAGAAAACAACCTGCCAAGTTGGCAGCTGTTGCTCAT +GAGCGTCCACCAGGTGGGACAGGGAGTGTTGACCCTGGGCGGCCCCCTGGAGCCACCTGC +CCTGAAAGCCCAGGGCCCGCAACCCCACACACTTTGGGGTTGGTGGAACCTGGTAAAAGC +TCACCTCCCACCATGGAGGAGGAGCCCTGGGCCCCTCAGGGGAGTCCCTGCTGGACAGTG +AGACAGAGAATGACCATGATGATGCTTTCCTCTCCATCATGTCTCCTGACACCCAGTTGC +CTCTACCACTCAGATGATGTCAGGCCCAGTCCCTCAGTGCCCTGCGCAAGGAACAGGACT +CATCTTCTGAGAAGGATGGACGCAGCCCCAACAAATCAGACAAGGACCACATCCGGTGGC +CCATGAGTGGCGCTCATGATCTTCAGCAGGCGGCACCAGGCCCTGGCGGGGCGCACCAGG +GTCACCCCAACCAGGATAACCGGACCGTCAGCCAGATGCTGAGCGAGCGGTGGTACACCC +TGGGGCCCAATGAGATGCAGAAATACAACCTGGCCTTCCAGGTGAAGGTGGCCCACTTGC +AACAAGGACCGAAAGAAGTCCAGCTCAGAGGCCAAGCCCACAAGCCAGGGGCTAGCAGGA +GTGTAACAAGGGCTCGTGGGAGCGGAGCATATCAGAGACGGGCACTGCCACTGCCCCTGG +GGTGTCCTCTGAACTCCTGTCAGTTGCAGCCCAAACACTCCAGAGCTCGGATACCAAGGA +GCAGCTTCTGTGGGGCAGAACGGCTGCACACAGTCAGGGAACCTGGCTCAGCCTGGCCCA +AGCCTTCTCCCACAGCGGGGTACACAGCCTGGACGGCAGGGAAATAGACCGTCAGGCACT +ACGGGAACTGACACAGGTGGTGTCTGGCACTGCATCATACTCTGGCCCAAAGCCTTCTAC +TCAGCATGGAGCTCCAGGCCACTTTGCAGCCCCTGGTGAGGGAGGTGACCCGTGGGCAGC +CCTGCTGCCGCCCACGTGAGCTGCTCATTCCCAGCACATGGCCAGCGAGGTCATAGCGAG +TGACGAAGAGCACACGGTCATCCATGAGGAGGAGGGGGTGATGATGTCATTGCTGATGAT +GGCTTTAGCACCACCGACACCGATCTCAAGTTCAAGGAGTGGGTGACCGACTGAGAGTGG +GGACAACTCTGGGGAGGAGCCAGAGGGCAACAAGGGCTTTGGTGGGAAGGTATTTGCACC +TGTCATTCCTTCCTCCTTTACTCCTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCC +CCCGATCCACCTGCAGCTTTTGGCAGTCTATGGTCACACCCTGTCCTCCTCCTACACATA +CTCGGATGCTTCCTCCTCAACCTTGGCACCCACCTCCTTCTTACTGGGCCCAGGAGCCTT +CAAAGCCCAGGAGTCTGGTCAACGCAGCAGAGCGGGCCCCCTACGGCCCCAACCCCTGGG +GATGGGGGCCCAGGGACGCCTTCCAAGGTGGCCTGTTTCCTCCCAATGGATCCTGCCACC +TTCTGGTGCAAGAGACCTGAAAGTGTGGGCGACCTGGAGCTACCAGGCTCCTCAGTCATC +AGGGTCCCTCCCAACACTAAGGCTTTCCTAGGCAGGAGCTGGGCTGAGCCACCCGGGGGG +CAGAGCCTGAAGAGAAACTGACTGGGCTTTCGGGGTCGGGGCAGAGGGAACCCCACGGAC +ATGGACCCCACACTGGAGGACCCCACCGCGCCCAAATGCAAGATGAGAAGATGCTCCAGC +TGCAGTCCAAAGCCCAACACCCCCAAGTGTGCCATGTGTGATGGGGACAGCTTCCCCTTT +GCCTGTACAGGTGGAGAAGCCGAGGACAGGCTCAGGGAACCGGAGACCAAGAAGGCGCTG +TCCTCTTCACTGCATGTACCCTGGACCAGTGCCGGCCCTGATCATGCAGCTCTTCCAGGC +CCACTGCTTCTTCCTGTCCACTAGGCCACAGCCGCCCTCCAGGCCCACTATGCACACATC +TTCCCCTCCAAGGTTTGTTCTGCCCCTGCCCTGACTCCCAGCCCTGTGGGGGTCCTGACC +GCACCTCACCTGGCTCAGACTCTTGACGCTGCCCTGGCTGCCCCACCAGTGCTTCTGCCC +GAGAGTCACGTGAGGCTGAGAGTAGGGGCAGGGGCAGCAGTGGTGCCAGTTGGGGGGCGG +TCCAGTGGGAGGAGCCTCAGCCTCGCAGGCTGCTCCGTGGGACTGATGACTGCATGATCT +TCTGGGCACCTCACGGATCTTCAACTGCAGGTGAAACGGATGCTGGTGGTGGGTGCAGGG +CCGCTGGGAGCTGCTGCATGGTTCCCAGAGGCTGGACTGAGGCAGGTGCCAACTGAAGCT +GCTGGGGCAGCATGGGCAGGATGTTCTGCACACAAACCTTGGAGAAGAAGATGTGTGCAT +AGCAGGTCCACTGCTGCTGCCCCTGCCCTGACTCCCAGCCCTGCCTGACCCCACCTCAAC +CTGCTCAGGCTCTGGCACAACCCTGGCTGCCCTGCCACTGCCTCTGCCCCAGAGTTGGTG +CCTTGACAGCCTGGTTGGAAGGGGACACCCCAGCCCTGCCTCAACACCTGGGGGTCTCCA +TAACTAGCACAGGCAGGTGGGCAACCCCAAAGATCCCAGGACTCACAGTACCCCCTGAGA +ACATGGACAGTATGTGGGGGTAGCAATGGAGGGCAGGATGGTTATCTTCTCCCAGGTGAA +GCCATTTAATCCTTTCAGTTTGGGACGGAGTAAGGCCTTCCTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTGAGACCGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGTGCGATCT +TGGCTCACTGCAACCTCTTCCCGCTGGGTTCACGCCATTCTCCTGCCTCAGCCTTCCGGG +TAGCTAGGATTACAGGTGGACGCTACCACGTCCGGCTAATTTTTGTATTTTTAGTACAGA +CGGGGCTTCATCATCTTGGCCAGGCTGATTTCGATCTCCTGACATCGTGATCTGCCTGCC +TCCCCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCTGGCCAAGGCCTGCT +CCTCTTATCTATACCCCCTACCCCTGCAGCTGTGCCGGGGGAAAGCTGGGCAGTTTCCCT +CCTCCGAGCCCCTGTACATACCATGAATTGTGGGACCTTCAGAGCTTTTCACTTTTCGGA +AAATAGCTCCTGCTGGGGCTACAAGATGGAGTGTGAAGAGGGCCTTGGGCCACAGGGAGG +CGCCTGTGGACTAGGGGGAGTTCATGCACCCCTTCTTTCCCCAGAGGGGCTGGACTCAGG +TGAGTATGGGGGTGGGGGCTCCTGCACTTCGACACAGGCAGCAGGAGGGTTTTCTCCCCA +TTCCCTCTGCACTCCCAACTTGAGCTATACTTTTTAAGAAAGTGATTCACCCTGCCTTTG +CCCCCTTCCCCAGAACAGAACACGTTGATCGTGGGCGATATTTTTCATTGTGCCAAAAAG +TTGCCATGACCGTCATTAAACCTGTTTAACAC +>ENST00000494149.2|ENSG00000268903.1|OTTHUMG00000182518.2|OTTHUMT00000461982.2|AL627309.7-201|AL627309.7|755|processed_pseudogene| +ATGCGGGAGGCAGAGGCCAGGCCTCCTCAAGTTGGCCTCTCAGACCCACTTGCAGCCTCC +CGGCGCCCCCTCCGGGCCCAGCTCTTCCTCCCGGCTGCATCTCCAGGCCGGACTCTGGCC +CGACTCCAGGTCCCAACAACGTCTTTGGACTCAGCTCCTGCCCAGCTCCCAGCGGCCCTG +GTAGGCCCACAACTTCCCTAAGCCAAGCTCCCCAGGCCCAGCTCAGGCCTCGCGGTGGCC +TCTCCAGGCTCAGCTCCTGGCCCTCCGATGACATCTGCAGGCCCCAAATGGCCTCCGGTC +GGTGGGCTCCTCTAGGCCCAGCTTGGGCCTCCCGGCGGCCTCCGCAGGCCCAAATCGTCC +CGAAGTCAGTCTCTCCAGGCTTAGCTCCAGCCTCCCGGCGGCCTCTGCAGGCCCAAGTCG +TCCTCAAGTCGGCCTGGAAGTGGGCCTGGAAGAGCAGCAAGTCGGCCTCCCTGGGCCCAG +CTCCGTCCTCTCGACGGCCTCTCCAGGTGCAAAACTTCCTCGAGTCAGCCTCTCCAGGCC +CAGCTCCTCCTGCCTCCCAGTGGCCTCTTTCGGCCCAGCCCAGCTCATGGCTCTCGGCGG +CCTTCCCAGGCCCCGCTTTTGACTTTTGGCAGCCTCTTCAGGCGCAGAACTTGATCTCCA +GTCGGCCTTTGCAGGCCCGGCCTCCTGCGTCTCGAAGGCCTGCACGGGCCCAGCCTCGGC +CTCGGCCTCACAGCGGACTCTCCACGCCCAGCTAG +>ENST00000595919.1|ENSG00000269981.1|OTTHUMG00000182738.2|OTTHUMT00000463398.2|AL627309.8-201|AL627309.8|284|processed_pseudogene| +TCCTCAAGTCGGCCTCTCCAGACCCACTTGCACCCTCCGGGCGTTCTCTCCGGGCCCAGC +TCTTCTTCCTGGTTGGGTCTCCAGGCCCGATTCCTGCCTCTCAACAACCTCTTTGGACTC +AGTGCCTACCCATCTCCTGGCGGCCTTGGTCGGTCCACAGCTTCCTCAAGCCAAGCTCCC +CAGGCCCAGGTCAGGCCTCACGGTGGCCTCTCCAGGATGAGCTCCTGCCCTCCGATGGCA +TCTCCAGGCCCCAAATGGTCTCCGGTCGGTGGGCTCCTCCACGC +>ENST00000493797.1|ENSG00000239906.1|OTTHUMG00000002481.1|OTTHUMT00000007038.1|AL627309.2-201|AL627309.2|323|antisense_RNA| +TTGGTTTCCCAGGGGGCAGTGCTGAGCTCTTTGCCAGTGGGAAATGGGATGCTGGTGATT +TCCAGTAGGTGACCTCACAGTGACTCAAGCTACCACTTACTGTTGATTGTGACGAAATGC +CAGCTGAGGCACATGCCTTGGGAGCTAAGTGGTTGCTGCCCTTGACCACTGTGAAGACTG +GTGTGGGAAGGGTCGTTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAGCCATGGA +GCCGCAAGGAGCCACACAGCAGGAGGTGGGAACATCCAGTTGCAGGAAAACAAGCTTAAC +ACGCCCACTGATTCTACATTATG +>ENST00000484859.1|ENSG00000241860.6|OTTHUMG00000002480.3|OTTHUMT00000007035.1|AL627309.5-201|AL627309.5|4860|antisense_RNA| +GCCATTTCTTTTTTTTCTTTTTTTTTTTAAGATAAGAGTCTTTCTCTTGTCACCCAGGCT +GGAGTGCAATGGTGCGATTTTGGCTCACTGCAACCTTCACCTTCTGGGTTCAAGCAATTC +TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGTGCCCGCCACCATACCCAGCTAAT +TTTTGTATTTTTAGTAGAGATGGGATTTCACCATGTTGGCCAGGCTCCTCTGGAGCTCCT +GACCTCAGGCAATCTGCCTGCCTCAGCCTCCCAAAATTCAAGGAGTACAGATGTGAACAA +CCACGCCCGGCCTCCATTTCTTTTTTGTAGTCTTTAATAAACAGCTGCTATCATTGCAGA +CTTGCTGTTTAGGCACTTAGGAATTTTTCACTAGAAGGCATGTAAATAAAGACCATGGGC +AATTGTAATGAATTTCGCCTTCATTCTTTGACTACATGACTGTCCCCAGAGCTGTAACTT +TATTGAATTTTTTAGAAGCCATTTAGCTAGCAACTGAGCCTAACCAGCCACTCACTGTCA +TTATTCAGTGCTCTTTTATTATTGTCTATTTCTCCTCCAACTTGGCTACACTCACAAAGT +GATAAAAACTTGCATTTGTTTTCTTTCCTTTTCAGAGACAGCGTCTTGCTCTGTTGCTTA +GGCTACAGTACAGTGACATGATCATGGTTCACTGTAGCCTCAAACTCCTGGGCTCAAGTG +GTTCTCTCACTTCAGTCTCCCAAGTAGCTGGGACTACAGACATGTGCCACCATGTCCAGC +TAATTTTTTATCATAGAGACGGGATCTTGCCATGTTGCTCCGACTGGGCTCAAAACTCCT +GACCTCAAGTGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCAGGCATGA +CCACCTGTGCCCAGCCCCCTATTATTATTATTTTAAATAATAGCTTTATTAAAATATTCA +CATACCATTCACTTTATTTATTGAAATCTGCAATTCAGTAGGTTTTAGAATATTCACAGA +GCTGTGCATCGATCACCACAGTCACTTTTAGAACCTTTCATTACCCTATAGAGAAATCCA +TACCCCTTAGCCACTACCTCCTACTCTCCCCACCTACCTTTGCCCCCAGCCTTAGGCAAC +CATTGATTAATTTTTTTGTCACTATAGATTTGCCTAATCTGGACAAATAGAATTGTACAA +TATGTGATCTTTTGTGGCTTTTTTTCCCTCTTAGCACAGTGTTTTCAAAGTTCCTTTATG +TCATAGTGTGTATCAATATTTCATTCCTTCTATGGCAGTATTCCATGGTAGAGACACACT +GCATTTTGTTTATCTGTTCATCAGTTGGTGGATATTTGGGTTGTTTCCATGTATTCCATG +TATTGGTCATTATGAATAATGCTGCTATGAAGATTGTTGTACAAGTTTTTGTGTGGACAT +ATATTTTTATTTTTCTGGGATATATGCCTAGGAGTGAAATTGTTGCATTATAGGATGACT +GTACATTTAGCCTTTTGAGAAACTGCCAGAATGTTTTCTAACGTGGCTATACCAGTTGGG +TGCAATGGCTCACACCTGTAATCCCAGCTACTCAGGAGGCTCAGCTAGGAGGATGGCTTG +AGCCCGTGAATTCAAGACCAGCCTGGGCAAGATAGTGAAACCCCGTCTTGATTTTTTAAA +AATCCAATTAAAATGACAAGAAAAGAAATACCCAAACAAAATGGTTACACAATTTTATGT +TCCCACCAGTAATGTTTGTGGGTTCCAATTCCTCCACATCTTCACTGACATTTTTTTTTT +CTAGATAGGGGCTTGCTCTGTCTCTCAGGCCGCAGTGCAATGATGCCATCACAGTTCACT +GCAGCCGTGACCTCCCAGGCACAAGTGATTCTCTCATCTCAGCCTCCTGGGTAGCTGAAA +ATTACAGGTGTACGCCACCATGCCTGGCTAATTTTTAGATTTTTCTGTAGTGGTGGGATT +TTACCATGTTGCCCAGGCTGGTCTCATACTCCTGGCCTCAAGTGATCTGCCCACCTCAGC +CTCCCTAAGTTCTGGAATTACAGGCTGCCACCATGCCCGGCCTTCACCAACATTTGCCAT +TATCTGTTTTTTTTTTCTTCCTTTATACCTTAAAGCAGTATAAGAACAAGTGTCTTCAAT +TATAGGAAACAGTATAATCCCAGGGCTTTGGGAGGCTAAGACAGGAAGATGTCTTGATGC +CAGGAGTTTTTTTTGTTGTTGTTGTTTTTGTTTTTGTTGTTGTTGTTGTTGTTGACAGTC +TCGCTCTGTCACCCAGGGTGGAGTGCAGTGATGGGGTCCACTGCAACCTCCACCTCCCAG +GTTCAAGTGATTCTCCTGCCTCAGCCTCCCGAGTAGGTGAGACTACAGGCACACGCCACT +ACTGCCCAGCTGATTTTTGTATTTTTGATAGAGTCAGAGTTTCACCGTGTTGGCCAGGCT +GGTCTCGAACTCCAGACTTCAGGTGATTTGCCTGCCTTAGCTTCCCAAAGTGCTGCGATT +ACAAGCATGAGCCACCATGCCCAGCCTGATGCCAGGAGTTTTAGACTAGCCTGGGCAACC +TAGCAAGACCTTGTCTCTACAGAATATTTAAAAATTAGCCAAATGTGGTGGTGCCTGTGT +ATAGTCTCTCTCCCTCTCTCTTTTTTTTTTCTAACTTTTTGTGACATGGTCTGGCTCTGT +CACCCAGGCTGAAGTGCAGTGGTGTGATCATGGCTCACTGCAGCCTGAAACTCCTGGGAT +CAAGTGATCAATCCTCCCACCTCATCCTACCAAGTAGTAGGGACCACAGGTGTGTGCCAC +CCAGGTCTTGCTATGTTGTCCAGGCTGGTCTTGAGCTCCTGGCCTCAAGCAATCCTCTCA +CCTTGGCCCCCCACAGTGCAAGGATTACAGGTATGAGCCACCATGCCTGGCCCCTACCCT +GCCTACTGAGAACCAAAGGAAGGATCCAAATTCTCCTTAGCTCAACTCGAGCCATTTCCT +GATTGCTTCATCAGCGAGGAGCTGGTTATTGGGCTGTCCAGGCCTCCCAAGCAGCACAGA +AATGAGGTGAAGGAGTTTTCCTGTTGCTCCACTCTGTAAGGAGTTGGAGGGTGATGTTTA +CTCGTTTGCAGAGAGAGATGCCTTGTAGGCACCTCAGGATGGAGAGGGCCCTGATTCCAA +TGTCCTTTTTTTCTTCAGAAACAGGACCTTGCCCTGTCACTCAGGATGGAGTTCAGTGGT +CCTATCATGGCTCATTATAGCCTCAAACTCCCAGGCTCAAGCAATCCTACCATGTCAGCC +TTCCCAGTAGCTGGGACTACAGCTGGATCTACCATGAAAGACTTGTGAATCCAGGAAGAG +AGACTGACTGGGCAACATGTTATTCAGGTACAAAAAGATTTGGACTGTAACTTAAAAATG +ATCAAATTATGTTTCCCATGCATCAGGTGCAATGGGAAGCTCTTCTGGAGAGTGAGAGAA +GCTTCCAGTTAAGGTGACATTGAAGCCAAGTCCTGAAAGATGAGGAAGAGTTGTATGAGA +GTGGGGAGGGAAGGGGGAGGTGGAGGGATGGGGAATGGGCCGGGATGGGATAGCGCAAAC +TGTCCGGGAAGGGAAACCAGCACTGTACAGACCTGAACAACGAAGATGGCATATTCTGTT +CAGGGAATGGTGAATTAAGTGTGGCAGGAATGCTTTGTAGACACAGTAATTTGCTTGTAT +GGAATTTTGCCTGAGAGACCTCATTGCAGTTTCTGATTTTTTGATGTCTTCATCCATCAC +TGTCCTTGTCAAATAGTTTGGAACAGGTATAATGATCACAATAACCCCAAGCATAATATT +TCGTTAATTCTCACAGAATCACATGTAGGTGCCACAGTTATCCCCATTTTATGAATGGAG +TGATGAAAACCTTAGGAATAATGAATGATTTGCGCAGGCTCACCTGGATATTAAGACTGA +GTCAAATGTTGGGTCTGGTCTGACTTTAATGTTTGCTTTGTTCATGAGCACCACATATTG +CCTCTCCTATGCAGTTAAGCAGGTAGGTGACAGAAAAGCCCATGTTTGTCTCTACTCACA +CACTTCCGACTGAATGTACGTATGGAGTTTCTACACCAGATTCTTCAGTGCTCTGGATAT +TAACTGGGTATCCCATGACTTTATTCTGACACTACCTGGAGTTAGCACAGACCCCACAAG +TTAGGGGCTCAGTCCCACGAGGCCATCCTCACTTCAGATGACAATGGCAAGTCCTAAGTT +GTCACCATACTTTTGACCAACCTGTTACCAATCGGGGGTTCCCGTAACTGTCTTCTTGGG +TTTAATAATTTGCTAGAACAGTTTACGGAACTCAGAAAAACAGTTTATTTTCTTTTTTTC +TGAGAGAGAGGGTCTTATTTTGTTGCCCAGGCTGGTGTGCAATGGTGCAGTCATAGCTCA +TTGCAGCCTTGATTGTCTGGGTTCCAGTGGTTCTCCCACCTCAGCCTCCCTAGTAGCTGA +GACTACATGCCTGCACCACCACATCTGGCTAGTTTCTTTTATTTTTTGTATAGATGGGGT +CTTGTTGTGTTGGCCAGGCTGGCCACAAATTCCTGGTCTCAAGTGATCCTCCCACCTCAG +CCTCTGAAAGTGCTGGGATTACAGATGTGAGCCACCACATCTGGCCAGTTCATTTCCTAT +TACTGGTTCATTGTGAAGGATACATCTCAGAAACAGTCAATGAAAGAGACGTGCATGCTG +GATGCAGTGGCTCATGCCTGTAATCTCAGCACTTTGGGAGGCCAAGGTGGGAGGATCGCT +TAAACTCAGGAGTTTGAGACCAGCCTGGGCAACATGGTGAAAACCTGTCTCTATAAAAAA +>ENST00000490997.5|ENSG00000241860.6|OTTHUMG00000002480.3|OTTHUMT00000007036.1|AL627309.5-202|AL627309.5|518|antisense_RNA| +CTCACCTTGGCCCCCCACAGTGCAAGGATTACAGGTATGAGCCACCATGCCTGGCCCCTA +CCCTGCCTACTGAGAACCAAAGGAAGGATCCAAATTCTCCTTAGCTCAACTCGAGCCATT +TCCTGATTGCTTCATCAGCGAGGAGCTGGTTATTGGGCTGTCCAGGCCTCCCAAGCAGCA +CAGAAATGAGAAACAGGACCTTGCCCTGTCACTCAGGATGGAGTTCAGTGGTCCTATCAT +GGCTCATTATAGCCTCAAACTCCCAGGCTCAAGCAATCCTACCATGTCAGCCTTCCCAGT +AGCTGGGACTACAGCTGGATCTACCATGAAAGACTTGTGAATCCAGGAAGAGAGACTGAC +TGGGCAACATGTTATTCAGGTACAAAAAGATTTGGACTGTAACTTAAAAATGATCAAATT +ATGTTTCCCATGCATCAGGTGCAATGGGAAGCTCTTCTGGAGAGTGAGAGAAGCTTCCAG +TTAAGGTGACATTGAAGCCAAGTCCTGAAAGATGAGGA +>ENST00000466557.6|ENSG00000241860.6|OTTHUMG00000002480.3|OTTHUMT00000007037.2|AL627309.5-203|AL627309.5|1301|lincRNA| +ATGATGATTATTCCCCACCTTCTAAGAGACAAAGACCAACGAGCCACCACAGCCACCAGT +CCCAGAACCTGCCAATGCTGGGGAACGGAAAATGAGGGAGTTCAACTCTGGCCCTCACAA +TCCAGTGGAGGAGACGAAACTCATCTGCCTCTGTCCCTCTGGGCACGCCTCATGCCAGGT +GCATCTGTGGACAGGGGCCATGCCCCTGGGCTTCCAAAGTTGGAGAGAGCTGCCAGGCTC +AGGTCTGAAGGCCAGAATTCTACAGTAAGTCCTACTGAGTCAAGGTGGGAGCAGGGTCGG +TAGCTTCCGAGGCTCTGCGGGAGAATCCGTTTCCTGGCCGTAGAGGTGGCCTGCACTCCG +CAGCTTGTGCTGCCCGTCTCGAATGACTGGAGTTTCCTGCTTCTGTCACTACACCTCCCA +CCCTCTCCATCACCTGCTCTGCTCTTACAAGGATCCGAAGAAATGGAATCATCGTATCGC +TGATCTACGTAAACAAACTGAAGAATTGTCTGAAAGAAAATATGACATGAACTTATGAAT +TCAACAGGTGAAGATTTACAACTTGATAAATCAACTTTGTCAGCTCGAGCTGTAAAAGCC +AAAGGTCCGGTGATGATCCCATACCCTTTTTTCCAGTCTCATGTTGAAGATTTTTATGTA +GAAGGCCTTCCCAAAGGAATTTTTTTTTTTTTTTTTTTTTTGAGATGGAGTTTTCACTCT +TATCGCCCAGGCTGGGGTGCAATGGCGCAACCTTGCTGGTCACTGCAACCTCTGCCTCCT +GGGTTCAAGAAATTCTCCTGCCTTAGCCTCCCAAGTCACTGGGATTACAGGTGCCCACCA +CCATACCAGGCTAATTTTTGTATTTTTAGTGGAGATGCGGTTTCACCATGTTGGCCGGGC +CAGTCTCGAACTCCTGACGTCAAGTGATCTTCCCGCCTCGACTCCTGATATCAAGTGATC +TTCCCGCCTCGGCCTCCCAGAGTGCTGAGATTACAGACGTGAACCCATGCCTGGCCAGGA +ATTTTGTTTTTTAGGAAGGCTTTCTACTAATGGAATTCCTGGCCTTGAGAGGATGTTACT +TTAGAAGGAAAGGATTTTTTTGTTATTAAAAGCTGGACCTACCATGAAAGACTTCTGAAT +CCAGGAAGAGAAACTGACTGGGCAACATGTTATTCAGAAACAGGACCTTGCCCTGTCACT +CAGGATGGAGTTCAGTGGTCCTATCATGGCTCATTATAGCCTCAAACTCCCAGGCTCAAG +CAATCCTACCATGTCAGCCTTCCCAGTAGCTGGGACTACAG +>ENST00000491962.1|ENSG00000241860.6|OTTHUMG00000002480.3|OTTHUMT00000007168.1|AL627309.5-204|AL627309.5|278|lincRNA| +TGAGGTGGAGTCTTGCTCTGTCATCCAGGCTGGCAGTGGCGTGATCTTGGCTCACTGCAA +TGTCTGCCTCCTGGGTTCAGGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTAC +AGGCGCCCGCCACCATGCCCTGCTGTTTTGTATTTTTGAGAAATGGAATCATCGTATCGC +TGATCTACGTAAACAAACTGAAGAATTGTCTGAAAGAAAATATGACATGAACTTATGAAT +TCAACAGGTGAAGATTTACAACTTGATAAATCAACTTT +>ENST00000410691.1|ENSG00000222623.1|-|-|RNU6-1100P-201|RNU6-1100P|104|snRNA| +ATGCTTGCCTCAGTAGCACACATACTTAAGTTGGAACAATAGAGAGATTGGCACGGCCTC +TGTGAAAGAATGACATGCAAATTTGTGAAGCATTCCATATTTTT +>ENST00000496488.1|ENSG00000241599.1|OTTHUMG00000002525.1|OTTHUMT00000007169.1|AL627309.4-201|AL627309.4|457|lincRNA| +ATTCAAATGGGCCCTGCTGCCAAGCCTTTTTTTTTTTTTTTTAACAATGCCATCTCTTCA +TATTGTTCCATTTAACAAAACTGCAGCCCTTCATCTATCCTTAAGTCCCTTGGCCAGTGG +TACAGAGCCAGAGTATGCTACTCCCTAGCAGGAAATCAACAGGATGACCTACTAAACACC +ATTCAGAAGATGCTAAGACCCATGAATTGCAACAGGAAAGAAAAGACAGAGAATTAGTCA +GACAGGAATCAAGGTTCCCTAGAGAAATGGCTGACTCCATGTATGGTGCAGTATATTGAT +CCTGGAACATCTGTTTTGCCAGAAAGCAAGGAAGCCATCAAAGTCCAACAGGATCACTTC +AAAAAGACATGAAAGTCAACTTGAAGAGATAATTATTAACCTAGATGAGACAATCTAAGC +ATCCAAAACAATAAAGACTGCAATGGCCTGAAATACA +>ENST00000624431.2|ENSG00000279928.2|OTTHUMG00000191962.1|OTTHUMT00000491420.1|FO538757.2-201|FO538757.2|570|unprocessed_pseudogene| +GATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTGTGTGGT +GATGCCAGGCATGCCCTTCCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACGACGGCCGA +CTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAG +AACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGAT +TCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACATGCTG +TTGGCCTGGATCTGAGCCCTCGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCT +GTGTGGAAGTTCACTCAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGC +TGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGG +GATCCTGCTACAAAGGTGAAACCCAGGAGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCA +GGCACAGGCATTAGTGCCCGTTGGAGAAAA +>ENST00000623083.4|ENSG00000279457.4|OTTHUMG00000191963.1|OTTHUMT00000491421.1|FO538757.1-201|FO538757.1|1397|unprocessed_pseudogene| +ATGACTCCTGTGAGGATGCAGCACTCCCTGGCAGGTCAGACCTATGCCGTGCCCCTCATC +CAGCCAGACCTGCGGCGAGAGGAGGCCGTCCAGCAGATGGCGGATGCCCTGCAGTACCTG +CAGAAGGTCTCTGGAGACATCTTCAGCAGGATCTCCCAGCAGGTAGAGCAGAGCCGGAGC +CAGGTGCAGGCCATTGGAGAGAAGGTCTCCTTGGCCCAGGCCAAGATTGAGAAGATCAAG +GGCAGCAAGAAGGCCATCAAGGTGTTCTCCAGTGCCAAGTACCCTGCTCCAGGGCGCCTG +CAGGAATATGGCTCCATCTTCACGGGCGCCCAGGACCCTGGCCTGCAGAGACGCCCCCGC +ACAGGATCCAGAGCAAGCACCGCCCCCTGGACGAGCGGGCCCTGCAGGAGAAGCTGAAGG +ACTTTCCTGTGTGCGTGAGCACCAAGCCGGAGCCCGAGGACGATGCAGAAGAGGGACTTG +GGGGTCTTCCCAGCAACATCAGCTCTGTCAGCTCCTTGCTGCTCTTCAACACCACCGAGA +ACCTGTAGAAGAAGTATGTCTTCCTGGACCCCCTGGCTGGTGCTGTAACAAAGACCCATG +TGATGCTGGGGGCAGAGACAGAGGAGAAGCTGTTTGATGCCCCCTTGTCCATCAGCAAGA +GAGAGCAGCTGGAACAGCAGGTCCCAGAGAACTACTTCTATGTGCCAGACCTGGGCCAGG +TGCCTGAGATTGATGTTCCATCCTACCTGCCTGACCTGTCCGGCATTGCCAACGACCTCA +TGTACATTGCCGACCTGGGCCCCGGCATTGCCCCCTCTGCCCCTGGCACCATTCCAGAAC +TGCCCACCTTCCACACTGAGGTAGCCGAGCCTCTCAAGGTAGACCTACAAGATGGGGTAC +TAACACCACCCCCACCGCCCCCACCACCACCCCCAGCTCCTGAGGTGCTGGCCAGTGCAC +CCCCACTCCCACCCTCAACCGCGGCCCCTGTAGGCCAAGGCGCCAGGCAGGACGACAGCA +GCAGCAGCGCGTCTCCTTCAGTCCAGGGAGCTCCCAGGGAAGTGGTCGACCCCTCCGGTG +GCCGGGCCACTCTGCTAGAGTCCATCCGCCAAGCTGGGGGCATCGGCAAGGCCAAGCTGC +GCAGCATGAAGGAGCGAAAGCTGGAGAAGAAGAAGCAGAAGGAGCAGGAGCAAGTGAGAG +CCACGAGCCAAGGTGGGCACTTGATGTCGGATCTCTTCAACAAGCTGGTCATGAGGCGCA +AGGGCATCTCTGGGAAAGGACCTGGGGCTGGTGAGGGGCCCGGAGGAGCCTTTGCCCGCG +TGTCAGACTCCATCCCTCCTCTGCCGCCACCGCAGCAGCCACAGGCAGAGGAGGACGAGG +ACGACTGGGAATCCTAG +>ENST00000612080.1|ENSG00000273874.1|-|-|MIR6859-2-201|MIR6859-2|68|miRNA| +TGTGGGAGAGGAACATGGGCTCAGGACAGCGGGTGTCAGCTTGCCTGACCCCCATGTCGC +CTCTGTAG +>ENST00000442116.1|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000007244.1|AP006222.1-201|AP006222.1|1292|processed_transcript| +AGGACCCACCCGAGGGTGGGTCACCATGGCTTTGGAGCGCCTGGTAGTGTGGTGTGTCCA +CAGTGAAGACCAGAGTTTCATTGTCCTTAAGACTGACCTGGGGAGATGTGGCTGTAGGCC +ATTGAGGAAGACGGCACCGAAAGCGAAGGAAGCTCCTGCTCCTCCTAAAGCCGAAGCCAA +AGCGAAGGCCTTAAAGGCCAAGAAGGCAGTGTTGAAAGGTGTCCGCAGCCACACGCAAAA +AAGAAGATCCGCATGTCACTCACCTTCAGGCGGCCCAAGACACTGCGACTCCGGAGGCAG +CCCAGATATCCTCGGAAGAGCACCCCCAGGAGAAACAAGCTTGGCCACTATGCTATCATC +AAGTTTCCGCTGACCACTGAGTCGGCCGTGAAGAAGATAGAAGAAAACAACACGCTTGTG +TTCACTGTGGATGTTAAAGCCAACAAGCACCAGATCAGACAGGCTGTGAAGAAGCTCTAT +GACAGTGATGTGGCCAAGGTCACCACCCTGATTTGTCCTGATAAAGAGAAGGCATATGTT +CGACTTGCTCCTGATTATGATGCTTTCGATGTTGTAACAAAATTGGGATCACCTAAACTG +AGTCCAGCTGGCTAACTCTAAATATATGTGTATCTTTTCAGCATAAAAAAAATAATGTTT +TTCATAAGAATGACAACTTAATTAGAATCAAATCTATAAGCTTTAAGATTTTACGTTTCT +AGTAAGTATAATATTAGCTTATTTGACTAGAACTCAAGCAGAATAGGAATTTATGCTTGT +TTTATATTCAATAATGATAATTTTGAAGATATAGTTGTTTTATTACACCAAAAATACTAT +ATTAATCTTATTTAACTAAGTTTTATCCAAATCATGTTAACTTAAGAAACATTTGATCAG +TTCCTATATTTCTAGGAGTTTGGTGAATATTTATTTATAAATGCTTATTTTTTTCCAAGC +CAAGTTAGAATAGAGCACTTTTAGAGGTTTTCATAAATGAATTTTGCAATGCTCTCTGGA +GTTAAGAAAATATCACATATACATAACATACATTAATAGATACACAAACACAAATAGAGA +TTTCATAGCTTTCATCCTGAAATTTCAGCCATGAATCAGGCATAAATATTCTGATGGTTA +ATTTCAGACATCTACTTGATCGGATTGAGAGACACACATAGCTGGTCAAACACGATTTCA +GCCATGAATCAGGCATAAATATTCTGATGGTTAATTTTAGACATCTACTTGACTGGATTA +AGAGACACACATAGCTGGTCAAACGTGATTTC +>ENST00000448958.2|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000488654.2|AP006222.1-202|AP006222.1|2250|processed_transcript| +ACCAAACCAATGCAGACCAAACCAATGCAGAACTCCTATGTGCTGATGGTGGTCTTACAT +TTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTTCTCAGGACCTCCTGAAGCTGCGT +CACAGGCACTAATCAAAGAACACAACCAAGAGTTTGGCCTTTTCTTCAGCACTGGGAATT +GTGATCCAAAGCTTTTCCTGATGAGGCACAAAGTTGGAGAAACACAACGCAAACTAAACA +ACAATGAAACAGAACAGAGTGAATCTGCTGTAGCTCAAGAGAGGACGTAGCTGCCCCCAC +TCCGCATCCCCGGGCTCGGGTTTGCCTTGCTGACCTCTGCTGCCACCTGGTGCTGCACAG +AGAAACTGAGGAGAAACCACATCAGTCTCCTTCAGCCTCAGCTTCACATCTGTGGGTCAA +GCAACCCTTTCAGAAGCTGTATAATGTGGGAAAGCTTTCCTCTCAGGAAAATGCACACAT +CCAACTTTGAGAAGATGCCCTTGGGGGTGCTTCAAGGATCCTAGATAATAACCCCCTTTC +CCGAACATCCAAGAACCTAAGTTTTTTTTTTTTTTTTGAGAAAGTCTCGCTCTCTCTCCC +ATTCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCACCTCCCAGGTTCAAGC +CATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGGCTACAGGCACCTGCCACCACACCCGG +CTAATTTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGAATCGTCTTG +ATCTCCTGACCTTGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTG +AGCCACCACGCCTGGTCCAAGAACCCAACTTTTAGATCTAGAGTGATGTCAGCATGACAT +TGATTTCCTGAGGCCCAGGGGTGAAGGAGCTGAGGACAGCAGAGGGGTGAAGGAAGTCAG +CTACAGACAGCAGCAGCTGATGCACAGGCCTCCCAGTGCCTGAAGTCACCCGGAATTGGG +AAGTGCTCAGAAGCTTACAAAGCTGCCTCGAGGAATCATTCTAGCCTTCTCGTCTTGCTA +ATTTGTAACCTCCCACTTCAACAGTGAGAAAGCTGGTTCCCACCATCTGCGACTTATACG +GCACCGAAAGCGAAGGAAGCTCCTGCTCCTCCTAAAGCCGAAGCCAAAGCGAAGGCCTTA +AAGGCCAAGAAGGCAGTGTTGAAAGGTGTCCGCAGCCACACGCAAAAAAGAAGATCCGCA +TGTCACTCACCTTCAGGCGGCCCAAGACACTGCGACTCCGGAGGCAGCCCAGATATCCTC +GGAAGAGCACCCCCAGGAGAAACAAGCTTGGCCACTATGCTATCATCAAGTTTCCGCTGA +CCACTGAGTCGGCCGTGAAGAAGATAGAAGAAAACAACACGCTTGTGTTCACTGTGGATG +TTAAAGCCAACAAGCACCAGATCAGACAGGCTGTGAAGAAGCTCTATGACAGTGATGTGG +CCAAGGTCACCACCCTGATTTGTCCTGATAAAGAGAAGGCATATGTTCGACTTGCTCCTG +ATTATGATGCTTTCGATGTTGTAACAAAATTGGGATCACCTAAACTGAGTCCAGCTGGCT +AACTCTAAATATATGTGTATCTTTTCAGCATAAAAAAAATAATGTTTTTCATAAGAATGA +CAACTTAATTAGAATCAAATCTATAAGCTTTAAGATTTTACGTTTCTAGTAAGTATAATA +TTAGCTTATTTGACTAGAACTCAAGCAGAATAGGAATTTATGCTTGTTTTATATTCAATA +ATGATAATTTTGAAGATATAGTTGTTTTATTACACCAAAAATACTATATTAATCTTATTT +AACTAAGTTTTATCCAAATCATGTTAACTTAAGAAACATTTGATCAGTTCCTATATTTCT +AGGAGTTTGGTGAATATTTATTTATAAATGCTTATTTTTTTCCAAGCCAAGTTAGAATAG +AGCACTTTTAGAGGTTTTCATAAATGAATTTTGCAATGCTCTCTGGAGTTAAGAAAATAT +CACATATACATAACATACATTAATAGATACACAAACACAAATAGAGATTTCATAGCTTTC +ATCCTGAAATTTCAGCCATGAATCAGGCATAAATATTCTGATGGTTAATTTCAGACATCT +ACTTGATCGGATTGAGAGACACACATAGCTGGTCAAACACGATTTCAGCCATGAATCAGG +CATAAATATTCTGATGGTTAATTTTAGACA +>ENST00000441866.2|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000346905.2|AP006222.1-203|AP006222.1|2256|processed_transcript| +AATATCTCGTCATGGACTGTGCCCCGCTCGAGCCTCTCCACATGCAGCAGGAAGGAAAGT +GGAGGGAGCTGCTCCTTTCCGTAGCCGGGGTGCCCACCCCAACCAGGCTGCCTCTGCCAC +CCAAGACAGAGGTTCTCTGATAATAATTTGTGGGGCTTGTTTCCAGAGACCACACCTGAA +GCTGCCAACTCCCCGGAGGGAAGGTCCTGATTAATGGCCGATGAATTTCTCCTTAAGGCC +CTGAAACTGCCTACTCAGAACCAAGCCAGTTTTTCCTGCCTGTCCTGTTTGGGCAGGCAG +AGGAGGCAGCTAGAAACCCATTATGCAGGGGATGGGGACCAAACCAATGCACAACTCCTA +CGTACTGATGGTGGTCTTACGTTTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTTC +TCAGGACCTCCTGAAGCTGCGTCACAGGCGCTGATCAAAGAACACAACCAAGAGTTTGGC +CTTTTCTTCAGCACTGGGAATTGTGATCCAAAGCTTTTCCTGATGAGGCACAAAGTTGGA +GAAACAAAACGCAAACTAAGCAACAATGAAACAGAACAGAGTGAATCTGCTGTAGCTCAA +GAGAGGACGTAGCTGCCCCCACCCCGCATCCCTGGGCTCGGGTTTGCCTTGCTGACCTCT +GCTGCCACCTGGTGCCGCACAGAGAAACTGAGGAGAAACCACATCAGTCTCCTTCAGCCT +CAGCTTCACATCTGTGGGTCAAGCAACCCTTTCAGAAGCTGTATAATGTGGGAAAGCTTT +CCTCTCAGGAAAATGCACACATCCAACTTTGAGAAGATGCCCTTGGGGGCGCTTCAAGGA +TCCTAGATAATAACCCCCTTTCCCGAACATCCAAGAACCTAAGTTTTTTTTTTTTTTTTG +AGAAAGTCTCGCTCTCTCTCCCATTCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAA +GCTCCACCTCCCAGGTTCAAGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGGCTAC +AGGCACCTGCCACCACACCCGGCTAATTTTTTTGTATTTTTAGTAGAGACGGGGTTTCAC +CGTGTTAGCCAGAATCGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCGGCCTCCCA +AAGTGCTGGGATTACAGGTGTGAGCCACCACACCTGGTCCAAGAACCCAACTTTTAGATC +TAGAGTGATGTCAGCATGACATTGATTTCCTGAGGCCCAGGGGTGAAGGAGCTGAGGACA +GCAGAGGGGTGAAGGAAGTCAGCTACAGACAGCAGCAGCTGATGCACAGGCCTCCCAGCG +CCTGAAGTCACCCGGAATTGGGAAGTGCTCAGAAGCTTACAAAGCTGCCTCGAGACGGCA +CCGAAAGCGAAGGAAGCTCCTGCTCCTCCTAAAGCCGAAGCCAAAGCGAAGGCCTTAAAG +GCCAAGAAGGCAGTGTTGAAAGGTGTCCGCAGCCACACGCAAAAAAGAAGATCCGCATGT +CACTCACCTTCAGGCGGCCCAAGACACTGCGACTCCGGAGGCAGCCCAGATATCCTCGGA +AGAGCACCCCCAGGAGAAACAAGCTTGGCCACTATGCTATCATCAAGTTTCCGCTGACCA +CTGAGTCGGCCGTGAAGAAGATAGAAGAAAACAACACGCTTGTGTTCACTGTGGATGTTA +AAGCCAACAAGCACCAGATCAGACAGGCTGTGAAGAAGCTCTATGACAGTGATGTGGCCA +AGGTCACCACCCTGATTTGTCCTGATAAAGAGAAGGCATATGTTCGACTTGCTCCTGATT +ATGATGCTTTCGATGTTGTAACAAAATTGGGATCACCTAAACTGAGTCCAGCTGGCTAAC +TCTAAATATATGTGTATCTTTTCAGCATAAAAAAAATAATGTTTTTCATAAGAATGACAA +CTTAATTAGAATCAAATCTATAAGCTTTAAGATTTTACGTTTCTAGTAAGTATAATATTA +GCTTATTTGACTAGAACTCAAGCAGAATAGGAATTTATGCTTGTTTTATATTCAATAATG +ATAATTTTGAAGATATAGTTGTTTTATTACACCAAAAATACTATATTAATCTTATTTAAC +TAAGTTTTATCCAAATCATGTTAACTTAAGAAACATTTGATCAGTTCCTATATTTCTAGG +AGTTTGGTGAATATTTATTTATAAATGCTTATTTTTTTCCAAGCCAAGTTAGAATAGAGC +ACTTTTAGAGGTTTTCATAAATGAATTTTGCAATGC +>ENST00000634344.2|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000007243.3|AP006222.1-204|AP006222.1|1554|processed_transcript| +ACCAAACCAATGCAGACCAAACCAATGCAGAACTCCTATGTGCTGATGGTGGTCTTACAT +TTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTTCTCAGGACCTCCTGAAGCTGCGT +CACAGGCACTAATCAAAGAACACAACCAAGAGTTTGGCCTTTTCTTCAGCACTGGGAATT +GTGATCCAAAGCTTTTCCTGATGAGGCACAAAGTTGGAGAAACACAACGCAAACTAAACA +ACAATGAAACAGAACAGAGTGAATCTGCTGTAGCTCAAGAGAGGACGTAGCTGCCCCCAC +TCCGCATCCCCGGGCTCGGGTTTGCCTTGCTGACCTCTGCTGCCACCTGGTGCTGCACAG +AGAAACTGAGGAGAAACCACATCAGTCTCCTTCAGCCTCAGCTTCACATCTGTGGGTCAA +GCAACCCTTTCAGAAGCTGTATAATGTGGGAAAGCTTTCCTCTCAGGAAAATGCACACAT +CCAACTTTGAGAAGATGCCCTTGGGGGTGCTTCAAGGATCCTAGATAATAACCCCCTTTC +CCGAACATCCAAGAACCTAAGTTTTTTTTTTTTTTTTGAGAAAGTCTCGCTCTCTCTCCC +ATTCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCACCTCCCAGGTTCAAGC +CATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGGCTACAGGCACCTGCCACCACACCCGG +CTAATTTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGAATCGTCTTG +ATCTCCTGACCTTGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTG +AGCCACCACGCCTGGTCCAAGAACCCAACTTTTAGATCTAGAGTGATGTCAGCATGACAT +TGATTTCCTGAGGCCCAGGGGTGAAGGAGCTGAGGACAGCAGAGGGGTGAAGGAAGTCAG +CTACAGACAGCAGCAGCTGATGCACAGGCCTCCCAGTGCCTGAAGTCACCCGGAATTGGG +AAGTGCTCAGAAGCTTACAAAGCTGCCTCGAGACGGCACCGAAAGCGAAGGAAGCTCCTG +CTCCTCCTAAAGCCGAAGCCAAAGCGAAGGCCTTAAAGGCCAAGAAGGCAGTGTTGAAAG +GTGTCCGCAGCCACACGCAAAAAAGAAGATCCGCATGTCACTCACCTTCAGGCGGCCCAA +GACACTGCGACTCCGGAGGCAGCCCAGATATCCTCGGAAGAGCACCCCCAGGAGAAACAA +GCTTGGCCACTATGCTATCATCAAGTTTCCGCTGACCACTGAGTCGGCCGTGAAGAAGAT +AGAAGAAAACAACACGCTTGTGTTCACTGTGGATGTTAAAGCCAACAAGCACCAGATCAG +ACAGGCTGTGAAGAAGCTCTATGACAGTGATGTGGCCAAGGTCACCACCCTGATTTGTCC +TGATAAAGAGAAGGCATATGTTCGACTTGCTCCTGATTATGATGCTTTCGATGTTGTAAC +AAAATTGGGATCACCTAAACTGAGTCCAGCTGGCTAACTCTAAATATATGTGTA +>ENST00000450734.1|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000007241.1|AP006222.1-205|AP006222.1|457|transcribed_processed_pseudogene| +CGGCACCGAAAGCGAAGGAAGCTCCTGCTCCTCCTAAAGCCGAAGCCAAAGCGAAGGCCT +TAAAGGCCAAGAAGGCAGTGTTGAAAGGTGTCCGCAGCCACACGCAAAAAAGAAGATCCG +CATGTCACTCACCTTCAGGCGGCCCAAGACACTGCGACTCCGGAGGCAGCCCAGATATCC +TCGGAAGAGCACCCCCAGGAGAAACAAGCTTGGCCACTATGCTATCATCAAGTTTCCGCT +GACCACTGAGTCGGCCGTGAAGAAGATAGAAGAAAACAACACGCTTGTGTTCACTGTGGA +TGTTAAAGCCAACAAGCACCAGATCAGACAGGCTGTGAAGAAGCTCTATGACAGTGATGT +GGCCAAGGTCACCACCCTGATTTGTCCTGATAAAGAGAAGGCATATGTTCGACTTGCTCC +TGATTATGATGCTTTCGATGTTGTAACAAAATTGGGA +>ENST00000424587.7|ENSG00000228463.10|OTTHUMG00000002552.2|OTTHUMT00000007242.3|AP006222.1-206|AP006222.1|5603|processed_transcript| +CTGATCCATATGAATTCCTCTTATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAAC +TGACTATCACCTTGTTAATCATTCAGAAACATGTTGCAGGCTTAAGCCATTTTTGATATA +GATACTGAAACAATTACTTGCTAAGAGCAAACTTGAAGTAACAATTTGGACAAGACAGCA +AATGCTATTGTCCAAGTTTTCTAAAGAAGAATCTGAAGTGAAATGACATCAAGAGACCTA +TCAAGACCTGTATCCAGGAAAAGACCAAACCAATGCAGACCAAACCAATGCAGAACTCCT +ATGTGCTGATGGTGGTCTTACATTTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTT +CTCAGGACCTCCTGAAGCTGCGTCACAGGCACTAATCAAAGAACACAACCAAGAGTTTGG +CCTTTTCTTCAGCACTGGGAATTGTGATCCAAAGCTTTTCCTGATGAGGCACAAAGTTGG +AGAAACACAACGCAAACTAAACAACAATGAAACAGAACAGAGTGAATCTGCTGTAGCTCA +AGAGAGGACGTAGCTGCCCCCACTCCGCATCCCCGGGCTCGGGTTTGCCTTGCTGACCTC +TGCTGCCACCTGGTGCTGCACAGAGAAACTGAGGAGAAACCACATCAGTCTCCTTCAGCC +TCAGCTTCACATCTGTGGGTCAAGCAACCCTTTCAGAAGCTGTATAATGTGGGAAAGCTT +TCCTCTCAGGAAAATGCACACATCCAACTTTGAGAAGATGCCCTTGGGGGTGCTTCAAGG +ATCCTAGATAATAACCCCCTTTCCCGAACATCCAAGAACCTAAGTTTTTTTTTTTTTTTT +GAGAAAGTCTCGCTCTCTCTCCCATTCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCA +AGCTCCACCTCCCAGGTTCAAGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGGCTA +CAGGCACCTGCCACCACACCCGGCTAATTTTTTTGTATTTTTAGTAGAGACGGGGTTTCA +CCGTGTTAGCCAGAATCGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCGGCCTCCC +AAAGTGCTGGGATTACAGGTGTGAGCCACCACGCCTGGTCCAAGAACCCAACTTTTAGAT +CTAGAGTGATGTCAGCATGACATTGATTTCCTGAGGCCCAGGGGTGAAGGAGCTGAGGAC +AGCAGAGGGGTGAAGGAAGTCAGCTACAGACAGCAGCAGCTGATGCACAGGCCTCCCAGT +GCCTGAAGTCACCCGGAATTGGGAAGTGCTCAGAAGCTTACAAAGCTGCCTCGAGGTGGG +AACACAACATTAATCCAAGAGCAGATCCCTGATCCTATAAAAATGTACTAGATGCAGTGG +GGGCATTTTAAATGAGCAGGGAAGGACAGACAGATAAACAGAAGGACAAACAGTATTGGG +ATTGGGATAAATGCTCAGCTTTTGCCCAAATCTTAGTGACTTAAGCATCACTTATTTGCT +CACGATTCTGTGGCTGGACCATTTGGTTTGGCTCACAGGGCAGGGACTGTGCTGGTCTTA +CCTGAGCAGACCTGCATGTCTGCGGTCAACTGGGTTGGCAGAGACAGAGTGACTGTCTTC +CTCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACTGAGGGACTGTCTCCC +TCCAGGAAGCAGCAGGTTAACTGGTTGGCAGAGACAGAGGGACAGAGGGACTGTCTTCCT +CCAGGAAGCAGCAGGTTGGCTCTGGTTCCTTCGTGGGGCAGCTGGTCTCCAGGGCAGCAA +GAGAGACCAAGCCCCCGTGCACATTCTACAGCCTCTGTGCACATCAGACTTGTTAATATC +CCATTGGCCAGTGCAAGTCACACGGCCAAGCCCAGATTAAGGAGTGGAAAGATGGACGCT +ATCTCCTCCTGGGAGAGGAGGCAAAGGAGGTGAGAGCATTATGTGGCCACTTATGTTTGC +AATCTACCATACTTAGCCCTTTGAGAAAAGAATTAACTGAGAAACTTGCTTCAAATAGGG +CATTCAGTAAAATGAAGCCCCAATTGAAGTAAAATGCATATATAAAAAATGAAACTGTGA +CCGATTTTAAGGACAGTATTGGCAAATATTTCTGTGCTCTTGGAGGAGAAGACCCTTATT +GGCATGACATGTCAGAGACCACAATGAAAGAATTATTTTAACTTGCATTCATAAAAATTA +AAATTATTCATTAAAAACATCGTGAATGAAATTAAAAGTCAAAATGTAAGCCAGAAAATT +ATTTACAACATATGTGTCAGGAAAAGACAATACACTTCAGACTTTGAGAGTTTACATCAG +AAAGAAAATAGCAAATGACATGATCCAAACTTGATAAAGGACATGAAAAAGAGCCAGCAC +TTAGTATGTTTTCTGAATGAATAAGTAGCCAACAGCACACGAAAATGTGTGTAATCCACT +TGTAAGCAGAGAAATGCAAATTAAAACAGTAAAGTGTCATTTTCACTTCCTGGATTGGCA +AAGGGTTTTATGTATTTTACTGACAGTGCTCAACATTAGCAGTAAACAACAAATGGTGAG +TAAATATGAGCTTCGGAACCTCAGGGAAATGATCTCCTTATTTCAACCTGCAGATTCCTT +CCTACAACCAGTGTAGAGCAGAGTACCAGGACGGGCCATTGAGCACCCTGGTGTTGAGAT +CAAGTGGCCTCTAGTCAGAGTTGGGTCAGGGCCACTGTGAGTGGGCTGCCCCCAACATGA +GTCAGCTGTCTAGGACTAGTTTATCTCTGCTTCTCACTTTACTGGTATTATGGGGCAGCT +CCTGCTGTCTTCCAATTTGGTGTCTTCCAAATCGGCACCGTCTTTTAAAGTTGAGTTTCT +TGTTATTCTCACCTGATATACCTTATTTATCCCACACCCACCCCAATAACATATCGTGCT +CAGTGTTATCTTTGAGACAACACTTGAATTTTACTCAGCCTGGAGCGCTCTTCACATGTC +TTGTCCAGATCCAGTTCGGACTCATTCTTCAGCCGTGCATCAGTAAATGGGGGCTAGGTT +AAACTGTGGTGACAAACAACCTCCAAATTTCAGTGGCTCAAAAATCTTCTTCCTCATTTA +TTTACATTTCATCATGGGTCAGGTGAGAGGTAGCTCTGTGCTGTGTCATCCTAACACAGG +AATCCAGACGGAAGGAGGGACAATCAATAAGATCCCCATTGCTATAGAAAAGAGAAAAAA +GTATGCGGAATAGCACTCTGTTTCTTGGAGATTTCTCCTGAAAAAGTCACATGTTATTTC +TTCTCACCTCCATTGGCAAAAAAAAAGTCATGTGGCCATGTGAAAATGTAAGTAGGCGGG +ATGGAACAGTCAGAATGCATTCATAAAATATGAACTGAAAATATCTGGAGAACAGCACCT +ATGACTACCACGAATGCCAACATGCATCCCTAACAACCCAGTGCTGTCACCCTCCAAACT +TTTTATGTCTTGCAAAGTATTAGAACTTCTTATCTGAAGCCATACCACTCAGAGGGAATG +CAAAATACATATTGACATCTCCTTTAGGATGTCCTTAGAGAATTCAAGGAAAAGAAGTTA +AATAATTTTAAAGTGCTTTTGGGTACAGCTATTTAGCACTAGAGGGTAAGATTAGACATA +GATTGTAAAGATAATAATAGGGTTAGGGATAGGATTAGGATCTGGGTCAGAGTCAGGGCC +AGAAGTATGGTTAGAGGTGGGGTCATGGTCAGGGTCGAGATCAAAGTCAGGGTCAAAGTA +AGGGTCAGAATTAGGGACCAGGATAGGGATCAGGATTTAGGTTCAGTGTCAAAGTCTTGG +GACAAGGTTAGGGTTAGAATTAGAACCAGAGCTTTGTTCTCCTCAGGACCCACCCGAGGG +TGGGTCACCATGGCTTTGGAGCGCCTGGTAGTGTGGTGTGTCCACAGTGAAGACCAGAGT +TTCATTGTCCTTAAGACTGACCTGGGGAGATGTGGCTGTAGGCCATTGAGGAAGGTGAGG +CAACAGCTTCCTGTCTGCTCCCCGTGTGCTGAGGAGGGAGTTCTGCCATGGGCTTTACTT +TCACATGTTATATTCCACAAGTCTTGTTTTACAAAAGCATCCCTTCCTTGAGGCTTCGGC +TGCTCATCGCTGCTCATCATCATAGCGTGCCATAACATATAGTAAGATTTGGGTTTGTTT +CTGGGGAGATATCTTGGTATAGAGAAAGGAGAAATGCTTAGAGCCACCATCAGGACAGTT +GGGATGAAAGTTGGGTATAGGCAGAGGCTGGAGGAAACATGTGCATCCCCTGTAAACACT +TTTATTCATGTTTTAATTACTCATTTTTCTTACAGTGTTAAATTAGTAAAGATAGTATTG +AAAAATTGAAAAGTAGGCATATTAAAACTTGCAACACTATTTAAGCCTAGATATATTATT +TGTACCTCATCAACATTTTTTATTGTGTTGAGAAAGTTTAAGGTTAATTGACAGCATATT +TCTAATAGTAGATAGAATAACATCCCTTTTATAAACATTGACATCCTACATTACATGTGT +GAACCCTGAAAATCTGAGACAGCTCTCAGATTTTTTAGAAAGTTTATTTTGCCAATCTTG +AGGATGTGCGCCTGTGATGCCTCCTCAGGAGATCCTGACAACATGGGCCCAAGGTGGTCG +GGGCACAGCTTGGTTTTATACACTTTAGGGAGACACGAGAGATCAATCAATACGTGTAAG +ATGTACATTGGTTCAGTCCAGAAAGGTGAGAAGGCCAGACAGGGGGCTTCCAGGTCACAG +GTAGGTAAGAGACAAATGGTTTCATTCTTTTGCATTGCTGATTACCCTCTCCATGTGAGG +CAATCAGGTATGCATTTATCTAGGTGATCAGACGGGTGTTTTGGATAGAATGGGAGGCGG +GTTTGCCCTAAGCAGTTCCCAGCTTGACTTTTCCCTTTAGCTTAGTGATTTTGAGTCCCC +AAGATTTATTTTCCCTTCGTAAGTGTTCCTATGAGTATTAATTATTCATTGTGTCTTTTA +TTACACAAATAAGGCACAGATTTTTAAGAAATCATCAACTTCATGGCTACCTATATAGAC +ATAATTACACAGAAGCTCAACTAAATTTGCAAACATTCCAGAGTTTGGGTTTCCAATAAT +TCTTTGTGATTCTTTAAAAGGTAAAGTATTTTTTCCCATAAAACATAGCAACATTTAAAA +TCACCCGTAGAATGTCCCGCCATTTTTGTTTTTCTAGTTTCCTCATTTTCTGCAAATCCT +CGCTGAGGAAATTGACTTTGAATATCCTTTTAGACTCTTTTGTTTTAGAAAGCATTGTGG +TAAAACATTGAATCATCATGGTCATAAGTTCTGTTCACATTCTTTCTTTCTTTGAATATT +TTTTCCCAGTGGCCAATATTTGATTCTGTTGTATTATGGCTAAAAGGTAGGCATGGGAAC +AAAATAAAGACAAGAAGTCTTTGGAATAATTGATCCCATCACAATGAATCAATTTGCCAT +TGGAACATGTTTTTACAAAGTCACTCTTTTGAAAATATTCAGCTATGACTTGAAACAGAG +TCTGTATGGTTAATATTTTTCCT +>ENST00000458203.2|ENSG00000236679.2|OTTHUMG00000002854.4|OTTHUMT00000346877.2|RPL23AP24-201|RPL23AP24|385|processed_pseudogene| +AGGCCAAGAAGGCAGTGTTGAAAGGTGTCCGCAGCCACACGCAAAAAAGAAGATCCGCAT +GTCACCCACCTTCAGGCGGCCCAAGACACTGCGACTCCGGAGGCAGCCCAGATATCCTCG +GAAGAGCACCCCCAGGAGAAACAAGCTTGGCCACTATGCTATCATCAAGTTTCCGCTGAC +CACTGAGTCGGCCGGAAGAAGATAGAAGAAAACAACACGCTTGTGTTCACTGTGGATGTT +AAAGCCAACAAGCACCAGATCAGACAGGCTGTGAAGAAGCTCTATGACAGTGATGTGGCC +AAGGTCACCACCCTGATTTGTCCTGATAAAGAGAACAAGGCATATGTTCGACTTGCTCCT +GATTATGATGCTTTCGATGTTGTAA +>ENST00000450983.1|ENSG00000236601.2|OTTHUMG00000002855.3|OTTHUMT00000007988.2|AL732372.1-201|AL732372.1|607|lincRNA| +TACGGCAGCTTTAGGGAGGTGCTCTGAGACCCGAAACTAGACTCGACTTTAACAGACACA +GACGACCCTGAAGATGGAAACGTTTTCTTCCAAATTGTGCTGCACGTTTTTGGCGAGAGC +ATGGGGCTGTGCGGCGTCCCCTCCCTGGCGCCCACCTGTGCCCTGCACACTGGCCTGCAC +TGTGGTGATCTCGCTTGGCCCCCACCTGATTCCCGACATACAGCAGAGGAACCTTAGGCT +CAGGTGGAACAGCCTCAACTGATTCTGTCCCTGAACTTCCGTACACAGCCCTGGAGTCGT +CTTAGAGCCATGATTTATTTAACTGTTCTTTCATTTTACAGAACATAAAATGTATTGTTT +CCAACTTTTTTCCTATGGTAAATAATACTAAAGTAAATATCTCTGTGCATGAATCTTTTT +GTATATGTTGGAATATCCTTAAGATAAGGCCCCAGAACTAAAAGTACCCTGTCAAAAGGT +GAGCATTTCCGGTTCCCCTGCTGTGCTTTGCTGCGTTGTTCTCTCCTGCTGCAACGTTCT +CACTCCACAATCCTGGGGCAGGGAGGGGAGGCCCAGCTGAGTTTGGATCATAATCCTGAA +AGACACA +>ENST00000412666.1|ENSG00000236601.2|OTTHUMG00000002855.3|OTTHUMT00000007987.2|AL732372.1-202|AL732372.1|426|lincRNA| +GAGGTGCTCTGAGACCCGAAACTAGACTCGACTTTAACAGACACAGACGACCCTGAAGGC +GAGACTGTCTGCTGGTGGGATGCTGGATGGAAACGTTTTCTTCCAAATTGTGCTGCACGT +TTTTGGCGAGAGCATGGGGCTGTGCGGCGTCCCCTCCCTGGCGCCCACCTGTGCCCTGCA +CACTGGCCTGCACTGTGGTGATCTCGCTTGGCCCCCACCTGATTCCCGACATACAGCAGA +GGAACCTTAGGCTCAGGTGGAACAGCCTCAACTGATTCTGTCCCTGAACTTCCGTACACA +GCCCTGGAGTCGTCTTAGAGCCATGATTTATTTAACTGTTCTTTCATTTTACAGAACATA +AAATGTATTGTTTCCAACTTTTTTCCTATGGTAAATAATACTAAAGTAAATATCTCTGTG +CATGAA +>ENST00000635159.1|ENSG00000236601.2|OTTHUMG00000002855.3|OTTHUMT00000488661.1|AL732372.1-203|AL732372.1|994|lincRNA| +CACACACAGATACGGATTCAAAGAGACATGCACACTCTGAGTTTCTGAGAGTAAGCCACT +GTCAGTTCCTGGGGTGAGCCACCAGCCACATGGACACAATTTCCTCTTTTTGATGGAAAC +GTTTTCTTCCAAATTGTGCTGCACGTTTTTGGCGAGAGCATGGGGCTGTGCGGCGTCCCC +TCCCTGGCGCCCACCTGTGCCCTGCACACTGGCCTGCACTGTGGTGATCTCGCTTGGCCC +CCACCTGATTCCCGACATACAGCAGAGGAACCTTAGGCTCAGGTGGAACAGCCTCAACTG +ATTCTGTCCCTGAACTTCCGTACACAGCCCTGGAGTCGTCTTAGAGCCATGATTTATTTA +ACTGTTCTTTCATTTTACAGAACATAAAATGTATTGTTTCCAACTTTTTTCCTATGGTAA +ATAATACTAAAGTAAATATCTCTGTGCATGAATCTTTTTGTATATGTTGGAATATCCTTA +AGATAAGGCCCCAGAACTAAAAGTACCCTGTCAAAAGGTGAGCATTTCCGGTTCCCCTGC +TGTGCTTTGCTGCGTTGTTCTCTCCTGCTGCAACGTTCTCACTCCACAATCCTGGGGCAG +GGAGGGGAGGCCCAGCTGAGTTTGGATCATAATCCTGAAAGACACAATCCCAAGCACCAT +AATGTGGAATGTTGAAATCCCTAAAGATCAAAATCCCTCAAGTCTAAAATCCCTGATATT +TCAGATGACCACAGCTACAGGGCTAGGTGCACACAATTAGTAACCGTAGCGATATACGTG +TACACGTTTCTCTTTTGACTTATTTCTTTATGGTCTGTCTTCTTATAACTGCTACACCCA +TGCCGCCGTCGTTAGTTACCTCAGTGTTTATGCAAAAATACCTGTTATCATTGCCTATTT +TATTGTGTAAAGTGGCCTATGAAATGTTCTGTTGTGTTTTTATGTTTCTCAAATACATAC +CTTTTAAAAATGTAAATAAATAACATCGACATTA +>ENST00000431321.2|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000007989.2|AL732372.2-201|AL732372.2|437|processed_transcript| +CGGCACAGGAGCCAGTCATCATCCCCAAAGACACAGCTCCGAATACCTCATCCTGAAATC +CCAAAGATCAAAATCCCAAAAGTATAAATCTGGAAAAAATAATGTCGATGTTATTTATTT +ACATTTTTAAAAGGATTATGATCCAAACTCAGCTGGGCCTCCCCTCCCTGCCCCAGGATT +GTGGAGTGAGAACGTTGCAGCAGGAGAGAACAACGCAGCAAAGCACAGCAGGGGAACCGG +AAATGCTCACCTTTTGACAGGGTACTTTTAGTTCTGGGGCCTTATCTTAAGGATATTCCA +ACATATACAAAAAGATTCATGCACAGAGATATTTACTTTAGTATTATTTACCATAGGAAA +AAAGTTGGAAACAATACATTTTATGTTCTGTAAAATGAAAGAACAGTTAAATAAATCATG +GCTCTAAGACGACTCCA +>ENST00000453935.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000007990.1|AL732372.2-202|AL732372.2|498|processed_transcript| +ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCACTTGAACCCGGGAGGCAGAGGTTG +CAGTGAGCCGAGATCGCGCCCCTGCACTCCAGCCTGGGTGACAGAGCAAGACTCCATCTC +AAAAAAAATAAAGGAAATATGCGTCGTTGGATGCTGTATGACAATCAAGCTACTTATAAC +AAACAAAATTGAGAATGAAGGATTATGATCCAAACTCAGCTGGGCCTCCCCTCCCTGCCC +CAGGATTGTGGAGTGAGAACGTTGCAGCAGGAGAGAACAACGCAGCAAAGCACAGCAGGG +GAACCGGAAATGCTCACCTTTTGACAGGGTACTTTTAGTTCTGGGGCCTTATCTTAAGGA +TATTCCAACATATACAAAAAGATTCATGCACAGAGATATTTACTTTAGTATTATTTACCA +TAGGAAAAAAGTTGGAAACAATACATTTTATGTTCTGTAAAATGAAAGAACAGTTAAATA +AATCATGGCTCTAAGACG +>ENST00000440163.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346889.1|AL732372.2-203|AL732372.2|462|processed_transcript| +TGGTGCTCTACAAGAAAAATCTGCTTTATAGATGAGGAAAGACCCTTGGGGCAGACAAGA +CAGTCCCTGTTTGACATAATTCCTTTATCTCTTCTCCCCCAGCTCTGCTGCCCCCTGCCC +CATACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCAGGGCCTGTTGAC +GTGGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAACAAGATCAACAA +GAGGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAGGACGGGT +ACAGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACGCCTGCGCTGAAAGA +GTGGGCAGAAAGGAGCTGACCTGGGATTATGATCCAAACTCAGCTGGGCCTCCCCTCCCT +GCCCCAGGATTGTGGAGTGAGAACGTTGCAGCAGGAGAGAAC +>ENST00000455207.5|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000007991.1|AL732372.2-204|AL732372.2|413|processed_transcript| +CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAG +GAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCT +GAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCCTCTGCTGCCC +CCTGCCCCATACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCAGGGCC +TGTTGACGTGGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAACAAGA +TCAACAAGAGGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAG +GACGGGTACAGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACG +>ENST00000455464.7|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346907.3|AL732372.2-205|AL732372.2|902|processed_transcript| +ATCGCAGCTGACTGCAGCCTCAACCTTCCAGGCTGAAGCGATCCTCCCACCTCAACCTCC +CACGTGGCTGAGACTACAGGTGCTTGCCACTATGCCCAACTAACATTTGGAATTTTCGTA +TACGTGGATTCCAGAGGGGTGACAGCGAAACCTGCAGGACAAGTTCGAGCATCTTAAAAT +GATTCAACAGGAGGAGATAAGGAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAAT +CATAGATTTTTATAAAATGAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGA +TACAAAGAAAGACAAACATCCTGATCCATATGAATTCCTCTTATTAAGAAAAATAAAGCA +TCCAGGATTCAATGAAGAACTGACTATCACCTTGTTAATCATTCAGAAACATGTTGCAGG +CTTAAGCCATTTTTGATATAGATACTGAAACAATTACTTGCTAAGAGCAAACTTGAAGGT +ATGGATAAGGCCCTGAGTCATCTTCCTGAGCTGAATGATAGTTAAGCTGAATGTACGTAT +AAAATATGATTTTCTAACCACTTGCTCGCCAACAAGGAAAACTTTTAAGTAGAGCAGAAC +CTGAATAGACAAGACATTTCTTTCTTTTGGTAGAAAATGATTTACCATCACTGTGTAGTT +AATTGTAGACTAGGTAATTTTAACTTTGTGATTTATTGCCGGAGACATTTTCTTCTGTAC +TGTAAAGTGTGTGTCAAAAAAAAAAAATAGCGATTTTGGAGGATTAGGGGACTTTGATAA +ATTGCCTGCAATTCTGGCAGTATGAACTGCATATTAATTTCTCTCTTTCAAGAACATTTT +TATTTATTAATTCCTTACAAAAACTCCCTAAACTTTGGAACAGCTCTCAATTGCCTGTAT +TC +>ENST00000601814.5|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000462548.1|AL732372.2-206|AL732372.2|635|processed_transcript| +TAGGTGACCTCACAGTGACTCAAGCTACCACTTACTGTTGATTGTGACGAAATGCCAGCT +GAGGCACATGCCTTGGGAGCTAAGTGGTTGCTGCCCTTGACCACTGTGAAGACTGGTGTG +GGAAGGGTCGCTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAGCCATGGAGCCGC +AAGGAGCCACACAGCAGGAGGTGGGACCATCCAGTTGCAGGAAAACAAGCTTAACACGCC +CACTAATTCTACATTATGCTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAG +GAGATAAGGAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTAT +AAAATGAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGAC +AAACATCGTAAGAAGCAATAGTTTCTCTTACTATTCTGAGAGCCTTATCATTCTACATCC +CATCTTCCTGTGAGTTTGTCTTTGTAGCATTTAACTCTAATTGCAGTTCTCATTTTAAAA +ACTGGCTTGCTTATTGTATATTTTCCCCAACTAAAGCGTGAACTCCTAGCAGGGCGTGGT +GGCTCATGCCTGTAATCTCAGCACTGTGGGAGGCC +>ENST00000445840.2|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346882.2|AL732372.2-207|AL732372.2|183|transcribed_unprocessed_pseudogene| +CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAG +GAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCT +GAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCGTAAGAAGCAA +TAG +>ENST00000431812.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000007996.2|AL732372.2-208|AL732372.2|336|processed_transcript| +GAAGATCCGTGAGGTGCCCAGAAGATCATGCAGTCATCAGTCCCACGGAGCAGCCTGCGA +GGCTGAGGCTCCTCCCACTGGACCGCCCCCCAACTGGCACCACTGCTGCCCCTGCCCCTA +CTCTCAGCCTCACGTGACTCTCGGGCAGAAGCAGTGGTGGGGCAGCCAGGGCAGCGTCAA +GAGTCTGAGCCAGCTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGAT +AAGGAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAAT +GAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAG +>ENST00000419160.4|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346881.4|AL732372.2-209|AL732372.2|547|processed_transcript| +TTTTACCAGGTGCCTATTGATACCATAGTTTAATTTCTTATAACTGTTTCTTATTTCACT +TACCAACTCTGTCTTCAGTTACTCCCAGATTTTTACTGTGTGTGTACAGATGACCTTTTG +TTTAGATTGAATTGTCTCCCCAGAAGTAAGATTACTGTGAGTCATGGTGAATGGACATTC +TCCTTACCCTTGATGTAAATTGACAGGGTTTTGGGTGCCTCCCAGCTATAATCTTAGCAC +TTTGGGAGGCTAAGAGAGGAGGATTGCTTGAGGCCAAGAGTTGGAGGAGGCAGTATGGCA +GTATGGTGAGACCCTGTCTCCATTATTTTAAAAAATTGACAGGCCCAGCCTCTGCCTCCC +GTCGGCCTCTGCAGTCCCAACGTCTGCCTCACAGCAGATTCTTCACGCCCAGCATCTACC +TCACTGTGGACCCCCCAAGCCAAGCTCCCAACCTTTCAGCAGCTTCTACACACCCAGCTC +CTGCCACCCAGTGGCCTCTTTAGGCCAAGCTCATGCTTCACAAGGGCCTTTCCAGGCCCA +ACTTTTG +>ENST00000440038.7|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346880.3|AL732372.2-210|AL732372.2|793|processed_transcript| +GCACTTGAGCAGCGGTCTCCAACCACAGGGCCACAGAGCTGGAGCTGGATCTACCATGAA +AGACTTGTGAATCCAGGAAGAGAGACTGACTGGGCAACATGTTATTCAGGGTCTCCCTCT +GTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTGACTGCAGCCTCAACCTTCCAG +GCTGAAGCGATCCTCCCACCTCAACCTCCCACGTGGCTGAGACTACAGGTGCTTGCCACT +ATGCCCAACTAACATTTGGAATTTTCGTATACGTGGATTCCAGAGGGGTGACAGCGAAAC +GTGGGACCATCCAGTTGCAGGAAAACAAGCTTAACACGCCCACTAATTCTACATTATGCT +CCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCAGTCAGCCTCTACAGACCAA +GCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACGTCACGGCAGCCTCCGCAGA +TGAGGCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCCATCGTTACAATGGCCTCTT +TAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCCCTGAACTTTCTCAAGTCGACCT +CACCAGGCCCAGCTCATGCTTCTTTGCAGCCTCTCCAGGCCCAGCTCCTGCATCTTGGTG +GCCCCTCCAGGCCCAGCCTCTGCCTCCCGTCGGCCTCTGCAGTCCCAACGTCTGCCTCAC +AGCAGATTCTTCACGCCCAGCATCTACCTCACTGTGGACCCCCCAAGCCAAGCTCCCAAC +CTTTCAGCAGCTT +>ENST00000642074.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493597.1|AL732372.2-211|AL732372.2|574|processed_transcript| +AAGACTGGTGTGGGAAGGGTCGCTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAG +CCATGGAGCCGCAAGGAGCCACACAGCAGGAGGTGGGACCATCCAGTTGCAGGAAAACAA +GCTTAACACGCCCACTAATTCTACATTATGCTCCTACCTCCCGGCAGCCTCTCCAGGCCC +AGAACTTTCTCCAGTCAGCCTCTACAGACCAAGCTCATGACTCACAATGGCCTATTTAGG +CCCATACCCTACGTCACGGCAGCCTCCGCAGATGAGGCTACTGCCTCACAACAGCCTCCA +CAGGCACAGCTCCATCGTTACAATGGCCTCTTTAGACCCAGCTCCTGCCTCCCAGCCTTC +TCTCCAGGCCCTGAACTTTCTCAAGTCGACCTCACCAGGCCCAGCTCATGCTTCTTTGCA +GCCTCTCCAGGCCCAGCTCCTGCATCTTGGTGGCCCCTCCAGGCCCAGCCTCTGCCTCCC +GTCGGCCTCTGCAGTCCCAACGTCTGCCTCACAGCAGATTCTTCACGCCCAGCATCTACC +TCACTGTGGACCCCCCAAGCCAAGCTCCCAACCT +>ENST00000423728.6|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346879.2|AL732372.2-212|AL732372.2|838|processed_transcript| +AGATGGGGTCTTCTTTTGTTGCCCAGGCTGGCCACAAATTCCTGGGCTCAAGTGATCCTC +CCACCTCGTCCTTGTAGAGATGAGATTTAGTTACGTCGTCCAGGCTGATCTCAAACTCCT +GGGCTAAATCGATTGTCTCACCTCAGCCTCTCAAGTATGTTATGAAGGTTATATGTTAGG +AAGGGTCCCAGGAGGTAGACCCACACAGATGGGATTTGGGCATAGGTTTGGTTTCCCAGG +GGGCAGTGCTGAGCTCTTTGCCAGTGGGAAATGGGATGCTGGTGATTTCCAGTAGGTGAC +CTCACAGTGACTCAAGCTACCACTTACTGTTGATTGTGACGAAATGCCAGCTGAGGCACA +TGCCTTGGGAGCTAAGTGGTTGCTGCCCTTGACCACTGTGAAGACTGGTGTGGGAAGGGT +CGTTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAGCCATGGAGCCGCAAGGAGCC +ACACAGCAGGAGGTGGGACCATCCAGTTGCAGGAAAACAAGCTTAACACGCCCACTAATT +CTACATTATGCTCCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCAGTCAGCC +TCTACAGACCAAGCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACGTCACGGC +AGCCTCCGCAGATGAGGCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCCATCGTTA +CAATGGCCTCTTTAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCCCTGAACTTTC +TCAAGTCGACCTCACCAGGCCCAGCTCATGCTTCTTTGCAGCCTCTCCAGGCCCAGCT +>ENST00000616311.5|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000480470.2|AL732372.2-213|AL732372.2|728|processed_transcript| +CCCACACAGATGGGATTTGGGCATAGGTTTGGTTTCCCAGGGGGCAGTGCTGAGCTCTTT +GCCAGTGGGAAATGGGATGCTGGTGATTTCCAGTAGGTGACCTCACAGTGACTCAAGCTA +CCACTTACTGTTGATTGTGACGAAATGCCAGCTGAGGCACATGCCTTGGGAGCTAAGTGG +TTGCTGCCCTTGACCACTGTGAAGACTGGTGTGGGAAGGGTCGTTTTGGATGCACTTGAG +CAGGGGTCCCCAACCCCTGAGCCATGGAGCCGCAAGGAGCCACACAGCAGGAGGTGGGAA +CATCCAGTTGCGGGAAAACAAGCTTAACACGCCCACTGATTCTACATTATGGGTCTCCCT +CTGTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTGACTGCAGCCTCAACCTTCC +AGGCTGAAGCGATCCTCCCACCTCAACCTCCCACGTGGCTGAGACTACAGGTGCTTGCCA +CTATGCCCAACTAACATTTGGAATTTTCGTATACGTGGATTCCAGAGGGGTGACAGCGAA +ACGTGGGACCATCCAGTTGCAGGAAAACAAGCTTAACACGCCCACTAATTCTACATTATG +CTCCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCAGTCAGCCTCTACAGACC +AAGCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACGTCACGGCAGCCTCCGCA +GATGAGGC +>ENST00000599771.6|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000461272.1|AL732372.2-214|AL732372.2|457|processed_transcript| +GTGGGAACATCCAGTTGCGGGAAAACAAGCTTAACACGCCCACTGATTCTACATTATGCT +CCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCAGTCAGCCTCTACAGACCAA +GCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACGTCACGGCAGCCTCCGCAGA +TGAGCCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCCATCGTTACAATGGCCTCTT +TAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCTCTGAACTTTCTCAGGTCTCCCT +CTGTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTGACTGCAGCCTCAACCTTCC +AGGCTGAAGCGATCCTCCCACCTCAACCTCCCACGTGGCTGAGACTACAGGTGCTTGCCA +CTATGCCCAACTAACATTTGGAATTTTCGTATACGTG +>ENST00000641845.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493598.1|AL732372.2-215|AL732372.2|571|processed_transcript| +TATGTCCTGCAAGCTTAGGGGTGCTACAAGTTGACCACTGCAGCAGTAAAGATGACTCTG +AAGAATGGCGTGGGATGGTTCCTTTCAAATGCACTTGAGCAGCGGTCTCCAACCACAGGG +CCACAGAGCTGGAGGTGGACAGTCTAGTTGCAGGAAAACAAGCTCAGAGATCCCACTGAG +TCTACGTTATACTGGATCTACCATGAAAGACTTGTGAATCCAGGAAGAGAGACTGACTGG +GCAACATGTTATTCAGGTGGGAACATCCAGTTGCGGGAAAACAAGCTTAACACGCCCACT +GATTCTACATTATGCTCCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCAGTC +AGCCTCTACAGACCAAGCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACGTCA +CGGCAGCCTCCGCAGATGAGCCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCCATC +GTTACAATGGCCTCTTTAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCTCTGAAC +TTTCTCAGCTGAAGCGATCCTCCCACCTCAA +>ENST00000641916.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493599.1|AL732372.2-216|AL732372.2|622|processed_transcript| +CTGGGATTACAGGTGTGAGCCATCTTGCTCATTCTAGTTTAAACTTTTGAGTGGTTTGTG +TCTCCTGATTGGACTCCTACAAATACAGAATTGATGCTAGGAAGGGTACCAGGAGATAGA +CGCACACAGATGGGATTTGGGAATAGGTTTGGTTATCCAAGGAGCAGTGCTGAGCTCCTT +GCAATGGGATATGGGATGCTGGTGATTTCTAGGAAGTGAGCTCACAATGACTCAAGCTGC +CACATACTGTTGATTGTGAAATGCCAGTTGAAGCATATGTCCTGCAAGCTTAGGGGTGCT +ACAAGTTGACCACTGCAGCAGTAAAGATGACTCTGAAGAATGGCGTGGGATGGTTCCTTT +CAAATGCACTTGAGCAGCGGTCTCCAACCACAGGGCCACAGAGCTGGAGCTGGATCTACC +ATGAAAGACTTGTGAATCCAGGAAGAGAGACTGACTGGGCAACATGTTATTCAGGTGGGA +ACATCCAGTTGCGGGAAAACAAGCTTAACACGCCCACTGATTCTACATTATGGGTCTCCC +TCTGTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTGACTGCAGCCTCAACCTTC +CAGGCTGAAGCGATCCTCCCAC +>ENST00000601486.5|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000461271.1|AL732372.2-217|AL732372.2|696|processed_transcript| +TGTTAGGAAGGGTCCCAGGAGGTAGACCCACACAGATGGGATTTGGGCATAGGTTTGGTT +TCCCAGGGGGCAGTGCTGAGCTCTTTGCCAGTGGGAAATGGGATGCTGGTGATTTCCAGT +AGGTGACCTCACAGTGACTCAAGCTACCACTTACTGTTGATTGTGACGAAATGCCAGCTG +AGGCACATGCCTTGGGAGCTAAGTGGTTGCTGCCCTTGACCACTGTGAAGACTGGTGTGG +GAAGGGTCGTTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAGCCATGGAGCCGCA +AGGAGCCACACAGCAGGAGGTGGGAACATCCAGTTGCGGGAAAACAAGCTTAACACGCCC +ACTGATTCTACATTATGCTCCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCA +GTCAGCCTCTACAGACCAAGCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACG +TCACGGCAGCCTCCGCAGATGAGCCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCC +ATCGTTACAATGGCCTCTTTAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCTCTG +AACTTTCTCAGTAAGTTCAGGTAGCTGGGACTGTAGGGTCTCCCTCTGTTGTCCAAGGCT +GGAGTGTAGTAGTGCTATCGCAGCTGACTGCAGCCT +>ENST00000641579.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493600.1|AL732372.2-218|AL732372.2|545|processed_transcript| +TATGTCCTGCAAGCTTAGGGGTGCTACAAGTTGACCACTGCAGCAGTAAAGATGACTCTG +AAGAATGGCGTGGGATGGTTCCTTTCAAATGCACTTGAGCAGCGGTCTCCAACCACAGGG +CCACAGAGCTGGAGCTGGATCTACCATGAAAGACTTGTGAATCCAGGAAGAGAGACTGAC +TGGGCAACATGTTATTCAGGTGGGAACATCCAGTTGCGGGAAAACAAGCTTAACACGCCC +ACTGATTCTACATTATGCTCCTACCTCCCGGCAGCCTCTCCAGGCCCAGAACTTTCTCCA +GTCAGCCTCTACAGACCAAGCTCATGACTCACAATGGCCTATTTAGGCCCATACCCTACG +TCACGGCAGCCTCCGCAGATGAGCCTACTGCCTCACAACAGCCTCCACAGGCACAGCTCC +ATCGTTACAATGGCCTCTTTAGACCCAGCTCCTGCCTCCCAGCCTTCTCTCCAGGCTCTG +AACTTTCTCAGGTCTCCCTCTGTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTG +ACTGC +>ENST00000616947.2|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000480469.1|AL732372.2-219|AL732372.2|603|processed_transcript| +CCCACACAGATGGGATTTGGGCATAGGTTTGGTTTCCCAGGGGGCAGTGCTGAGCTCTTT +GCCAGTGGGAAATGGGATGCTGGTGATTTCCAGTAGGTGACCTCACAGTGACTCAAGCTA +CCACTTACTGTTGATTGTGACGAAATGCCAGCTGAGGCACATGCCTTGGGAGCTAAGTGG +TTGCTGCCCTTGACCACTGTGAAGACTGGTGTGGGAAGGGTCGTTTTGGATGCACTTGAG +CAGGGGTCCCCAACCCCTGAGCCATGGAGCCGCAAGGAGCCACACAGCAGGAGGTGAGCG +GTGTCGAGTGAGGGAGTGAGGGAAGCTTCGTCTGTATTTACAGCCACTCCCCTTTGCTCA +CATTCCCACCTGAGCTCCACCTTCTCAGATGAGCAGCAGCGTTAGATTCTCATAGGAGAA +CGCACCCTGTTGTGAACCGTGCATGTGAGGGATCTAGGTTGCGCTGTCCTTATGAGAGTC +TAATACCTATTGATCTGTCACTTTCTCCCATCACGCTCAGGTGGGAACATCCAGTTGCGG +GAAAACAAGCTTAACACGCCCACTGATTCTACATTATGGGTCTCCCTCTGTTGTCCAAGG +CTG +>ENST00000432964.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000346878.1|AL732372.2-220|AL732372.2|575|processed_transcript| +AGATGGGGTCTTCTTTTGTTGCCCAGGCTGGCCACAAATTCCTGGGCTCAAGTGATCCTC +CCACCTCGTCCTTGTAGAGATGAGATTTAGTTACGTCGTCCAGGCTGATCTCAAACTCCT +GGGCTAAATCGATTGTCTCACCTCAGCCTCTCAAGTATGTTATGAAGGTTATATGTTAGG +AAGGGTCCCAGGAGGTAGACCCACACAGATGGGATTTGGGCATAGGTTTGGTTTCCCAGG +GGGCAGTGCTGAGCTCTTTGCCAGTGGGAAATGGGATGCTGGTGATTTCCAGTAGGTGAC +CTCACAGTGACTCAAGCTACCACTTACTGTTGATTGTGACGAAATGCCAGCTGAGGCACA +TGCCTTGGGAGCTAAGTGGTTGCTGCCCTTGACCACTGTGAAGACTGGTGTGGGAAGGGT +CGTTTTGGATGCACTTGAGCAGGGGTCCCCAACCCCTGAGCCATGGAGCCGCAAGGAGCC +ACACAGCAGGAGGTGGGAACATCCAGTTGCGGGAAAACAAGCTTAACACGCCCACTGATT +CTACATTATGCTCCTACCTCCCGGCAGCCTCTCCA +>ENST00000608420.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000472556.1|AL732372.2-221|AL732372.2|438|processed_transcript| +GAAAGACTTGTGAATCCAGGAAGAGAGACTGACTGGGCAACATGTTATTCAGAATCTCCC +TGTGCCATCCAGGCTGGAGTGCAGTGATGTGATCATAGCTCACTATAGCTTTGGCCTTCT +GAGATCAAGCAATCCTCCCATCTCAACCTCCCAAGTAGCTAGGACTACACATGCATGTCA +CCCATGCCCAGATCATTTTTGTAGAGTCAGAGTTTCACCGTGGTGGCCAGGTTGGCCATG +TTGGCCAGATGGGGTCTTCTTTTGTTGCCCAGGCTGGCCACAAATTCCTGGGCTCAAGTG +ATCCTCCCACCTCGTCCTTGTAGAGATGAGATTTAGTTACGTCGTCCAGGCTGATCTCAA +ACTCCTGGGCTAAATCGATTGTCTCACCTCAGCCTCTCAAGTATGTTATGAAGGTTATAT +GTTAGGAAGGGTCCCAGG +>ENST00000641303.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493601.1|AL732372.2-222|AL732372.2|384|processed_transcript| +AAGATTCCCTTGAGAACAAGTACTGTCCCTAGTTTCCCAGTGCTGGAATATAGAAAATGG +ATGGACAACTGGACCTACCATGAAAGACTTCTGAATCCAAGAAGAGAAACTGACTGGGCA +ACATGTTATTCAGAAACAGGACCTTGCCCTGTCACTCAGGATGGAGTTCAGTGGTCCTAT +CATGGCTCATTATAGCCTCAAACTCCCAGGCTCAAGCAATCCTACCATGTCAGCCTTCCC +AGTAGCTGGGACTACAGAGACGAGGTTTCGCCATGTTTCCCAGACTGTTCTCAAACTCCT +GAGCTCAAAGCAGTCCACCCACCTTGGCCTCCCAGAGTTCTGGGATTACAGCTGGATCTA +CCATGAAAGACTTGTGAATCCAGG +>ENST00000641063.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493602.1|AL732372.2-223|AL732372.2|473|processed_transcript| +CCTAGTTTCCCAGTGCTGGAATATAGAAAATGGATGGACAAGTAAATCCCACTCAGCACC +CATAGTCCAGCTGGACCTACCATGAAAGACTTCTGAATCCAAGAAGAGAAACTGACTGGG +CAACATGTTATTCAGAAACAGGACCTTGCCCTGTCACTCAGGATGGAGTTCAGTGGTCCT +ATCATGGCTCATTATAGCCTCAAACTCCCAGGCTCAAGCAATCCTACCATGTCAGCCTTC +CCAGTAGCTGGGACTACAGGAAGTGAGCTCACAATGACTCAAGCTGCCACATACTGTTGA +TTGTGAAATGCCAGTTGAAGCATATGTCCTGCAAGCTTAGGGGTGCTACAAGTTGACCAC +TGCAGCAGTAAAGATGACTCTGAAGAATGGCGTGGGATGGTTCCTTTCAAATGCACTTGA +GCAGCGGTCTCCAACCACAGGGCCACAGAGCTGGAGCTGGATCTACCATGAAA +>ENST00000641049.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493603.1|AL732372.2-224|AL732372.2|519|processed_transcript| +CATGAAAGACTTCTGAATCCAAGAAGAGAAACTGACTGGGCAACATGTTATTCAGAGATG +GGACCTCAGTATGTTGCCATGGCTGACCTTGAACTCCTGCACTCAAGGGATTTTCCTACC +CTGGCCTCCCAAAGTATTGGTATTACAGGCATGAGCCATTGTGCCCACCGTCTCTGGTTC +TTAACCTTCTGCCTCCCTCTTCCAGTTTTAAAGAATGCTTGTAATTACATGGGCTCTCCT +AGATACTCCAGGATAATCTTGTTTTAAGGTCAGCTGATGAGCAACATTAATTTTATCTGC +ACTCTTAATTCCCCCTTCCTATGTAATTGTGCTGTGTAACATAGGACATGAGCAATTGGT +GGCGGTGGGGGTTATTACTTTGGCCACCACAGTAACTATTTTATGCCAGGTACTCAGCTA +AGCACTGGTGAATTAAGCATGAATAACACACACTCCTTAATCTCCATCCATTCATGGGAG +GAGCACTTCACCTGCCATGCTCCTGAGAATCTCGGGAGT +>ENST00000642124.1|ENSG00000237094.12|OTTHUMG00000002857.7|OTTHUMT00000493604.1|AL732372.2-225|AL732372.2|456|processed_transcript| +TCTACTAATGGAATTCCTGGCCTTGAGAGGATGTTACTTTAGAAGGAAAGGATTTTTTTG +TTATTAAAAGAGCCACCTAAGCTGAAGATTCCCTTGAGAACAAGTACTGTCCCTAGTTTC +CCAGTGCTGGAATATAGAAAATGGATGGACAACTGGACCTACCATGAAAGACTTCTGAAT +CCAAGAAGAGAAACTGACTGGGCAACATGTTATTCAGAAACAGGACCTTGCCCTGTCACT +CAGGATGGAGTTCAGTGGTCCTATCATGGCTCATTATAGCCTCAAACTCCCAGGCTCAAG +CAATCCTACCATGTCAGCCTTCCCAGTAGCTGGGACTACAGGTAAGCATCGTGACACTCA +GTGAATTTTGTTTTTATTTTGTTGTAGAGATGGGACCTCAGTATGTTGCCATGGCTGACC +TTGAACTCCTGCACTCAAGGGATTTTCCTACCCTGG +>ENST00000437905.2|ENSG00000269732.1|OTTHUMG00000002859.2|OTTHUMT00000007998.2|WBP1LP7-201|WBP1LP7|363|processed_pseudogene| +GACTCCTGGGTGGCATGGAGCTCTTGCACCTCTAGGCACTGCCCAGCCCTGTGTCAGCCA +GGGCTGAACCCCCACAGGATAAGGAAGCCTGTGTGTGTACCAACAATCAAAGCTACATCT +GTGACACAACAGGACACTGCTATGGGCAGTCTCAGTGTTGTAACTACTACTATGAACATT +GGTGGTTCTGGCTCGCGTGGACCATCACCATCATCCTGAGCTGCTGCTGTGTCTGCCACC +ACAGCCAAGCCAGCCCTCAAGTCCAGCAGTAGCAACATGAAATCAACCTGCCTGCCTATC +CAGAAGCCCGCAATTACTCAGTGCTACCATTTTATTTCACCAAACTATTTATTACCTTCT +TAT +>ENST00000426406.3|ENSG00000284733.1|OTTHUMG00000002860.3|OTTHUMT00000007999.3|OR4F29-201|OR4F29|995|protein_coding| +AGCCCAGTTGGCTGGACCAATGGATGGAGAGAATCACTCAGTGGTATCTGAGTTTTTGTT +TCTGGGACTCACTCATTCATGGGAGATCCAGCTCCTCCTCCTAGTGTTTTCCTCTGTGCT +CTATGTGGCAAGCATTACTGGAAACATCCTCATTGTGTTTTCTGTGACCACTGACCCTCA +CTTACACTCCCCCATGTACTTTCTACTGGCCAGTCTCTCCTTCATTGACTTAGGAGCCTG +CTCTGTCACTTCTCCCAAGATGATTTATGACCTGTTCAGAAAGCGCAAAGTCATCTCCTT +TGGAGGCTGCATCGCTCAAATCTTCTTCATCCACGTCGTTGGTGGTGTGGAGATGGTGCT +GCTCATAGCCATGGCCTTTGACAGATATGTGGCCCTATGTAAGCCCCTCCACTATCTGAC +CATTATGAGCCCAAGAATGTGCCTTTCATTTCTGGCTGTTGCCTGGACCCTTGGTGTCAG +TCACTCCCTGTTCCAACTGGCATTTCTTGTTAATTTAGCCTTCTGTGGCCCTAATGTGTT +GGACAGCTTCTACTGTGACCTTCCTCGGCTTCTCAGACTAGCCTGTACCGACACCTACAG +ATTGCAGTTCATGGTCACTGTTAACAGTGGGTTTATCTGTGTGGGTACTTTCTTCATACT +TCTAATCTCCTACGTCTTCATCCTGTTTACTGTTTGGAAACATTCCTCAGGTGGTTCATC +CAAGGCCCTTTCCACTCTTTCAGCTCACAGCACAGTGGTCCTTTTGTTCTTTGGTCCACC +CATGTTTGTGTATACACGGCCACACCCTAATTCACAGATGGACAAGTTTCTGGCTATTTT +TGATGCAGTTCTCACTCCTTTTCTGAATCCAGTTGTCTATACATTCAGGAATAAGGAGAT +GAAGGCAGCAATAAAGAGAGTATGCAAACAGCTAGTGATTTACAAGAGGATCTCATAAAT +GATATAATAAGCCCTTCTCATTAAACATGATATGG +>ENST00000432723.3|ENSG00000233653.3|OTTHUMG00000057429.3|OTTHUMT00000127609.3|CICP7-201|CICP7|2477|processed_pseudogene| +CGGCCCCCAGGCCTGCGTTCAGTGAGGCCTCCCGTGGCGTCAGCATGTTCGTGTGGAGGA +ATGTGGAAGGTCACTCTGCGGCCGTGTTCTCCTGGTACTCCATCCCCTTCCTGACCCCTC +CCTGCAGCCACACGAGGCCCAGCAACCTGCCAGTCACTCAGTGGCCTCCAACCAGAGAAA +ACAACCTGCCAAGTTGGCAGCCGTTGCTCATGAGCGTCCACCAGGTGGGACAGGGAGTGT +TGACCCTGGGCGGCCCCCTGGAGCCACCTGCCCTGAAAGCCCAGGGCCCGCAACCCCACA +CACTTTGGGGGTGGTGGAACCTGGTAAAAGCTCACCTCCCACCATGGAGGAGGAGCCCTG +GGCCCCTCAGGGGAGTCCCTGCTGGACAGTGAGACAGAGAATGACCATGATGATGCTTTC +CTCTCCATCATGTCTCCTGACACCCAGTTGCCTCTACCACTCAGATGATGTCAGGCCCAG +TCCCTCAGTGCCCTGCGCAAGGAACAGGACTCATCTTCTGAGAAGGATGGACGCAGCCCC +AACAAATCAGACAAGGACCACATCCGGTGGCCCATGAGTGGCGCTCATGATCTTCAGCAG +GCGGCACCAGGCCCTGGCGGGGCGCACCAGGGTCACCCCAACCAGGATAACCGGACCGTC +AGCCAGATGCTGAGCGAGCGGTGGTACACCCTGGGGCCCAATGAGATGCAGAAATACAAC +CTGGCCTTCCAGGTGAAGGTGGCCCACTTGCAACAAGGACCGAAAGAAGTCCAGCTCAGA +GGCCAAGCCCACAAGCCAGGGGCTAGCAGGAGTGTAACAAGGGCTCGTGGGAGCGGAGCA +TATCAGAGACGGGCACGGCCACTGCCCCTGGGGTGTCCTCTGAACTCCTGTCAGTTGCAG +CCCAAACACTCCAGAGCTCGGATACCAAGGAGCAGCTTCTGTGGGGCAGAACGGCTGCAC +ACAGTCAGGGAACCTGGCTCAGCCTGGCCCAAGCCTTCTCCCACAGCGGGGTACACAGCC +TGGACGGCAGGGAAATAGACCGTCAGGCACTACGGGAACTGACACAGGTGGTGTCTGGCA +CTGCATCATACTCTGGCCCAAAGCCTTCTACTCAGCATGGAGCTCCAGGCCACTTTGCAG +CCCCTGGTGAGGGAGGTGACCCGTGGGCAGCCCTGCTGCCGCCCACGTGAGCTGCTCATT +CCCAGCACATGGCCAGCGAGGTCATAGCGAGTGACGAAGAGCACACGGTCATCCATGAGG +AGGAGGGGGTGATGATGTCATTGCTGATGATGGCTTTAGCACCACCGACACCGATCTCAA +GTTCAAGGAGTGGGTGACCGACTGAGAGTGGGGACAACTCTGGGGAGGAGCCAGAGGGCA +ACAAGGGCTTTGGTGGGAAGGTATTTGCACCTGTCATTCCTTCCTCCTTTACTCCTGCCG +CCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATCCACCTGCAGCTTTTGGCAGTCT +ATGGTCACACCCTGTCCTCCTCCTACACGTACTCGGATGCTTCCTCCTCAACCTTGGCAC +CCACCTCCTTCTTACTGGGCCCAGGAGCCTTCAAAGCCCAGGAGTCTGGTCAACGCAGCA +GAGCGGGCCCCCTACGGCCCCAACCCCTGGGGATGGGGGCCCAGGGACGCCTTCCAAGGT +GGCCTGTTTCCTCCCAATGGATCCTGCCACCTTCTGGTGCAAGAGACCTGAAAGTGTGGG +CGACCTGGAGCTACCAGGCTCCTCAGTCATCAGGGTCCCTCCCAACACTAAGGCTTTCCT +AGGCAGGAGCTGGGCTGAGCCACCCGGGGGGCAGAGCCTGAAGAGAAACTGACTGGGCTT +TCGGGGTCGGGGCAGAGGGAACCCCACGGACATGGATCCCACACTGGAGGACCCCACCGC +GCCCAAATGCAAGACGAGAAGATGCTCCAGCTGCAGTCCAAAGCCCAACACCCCCAAGTG +TGCCATGTGTGATGGGGACAGCTTCCCCTTTGCCTGTACAGGTGGAGAAGCCGAGGACAG +GCTCAGGGAACCGGAGACCGAGAAGGCGCTGTCCTCTTCACTGCACGTACCCTGGACCAG +TGCCGGCCCTGATCATGCAGCTCTTCCAGGCCCACTGCTTCTTCCTGTCCACTAGGCCAC +AGCCGCCCTCCAGGCCCACTATGCACACATCTTCCCCTCCAAGGTTTGTTCTGCCCCTGC +CCTGACTCCCAGCCCTGTGGGGGTCCTGACCGCACCTCACCTGGCTCAGACTCTTGACGC +TGCCCTGGTCCACTGCTGCTGCCCCTGCCCTGACTCCCAGCCCTGCCTGACCCCACCTCA +ACCTGCTCAGGCTCTGGCACAACCCTGGCTGCCCTGCCACTGCCTCTGCCCCAGAGTTGG +GGCCTTGACAGCCTGGTTGGAAGGGGACACCCCAGCCCTGCCTCAACACCTGGGGGTCTC +CATAACTACCACAGGCA +>ENST00000514436.1|ENSG00000250575.1|OTTHUMG00000002861.2|OTTHUMT00000008000.2|AL732372.3-201|AL732372.3|1239|unprocessed_pseudogene| +ATGGGGCTCATTCCTGACAACGGCCTTTCCAGGCCCAGTTTTTCCCTTCCGGCGGCCTCT +CCGGGCCCAGAACCTCCTCAAGTCGGCCTCTCCAGACCCACTTGCACCCTCCGGGCGTTC +TCTCCGGGCCCAGCTCTTCTTCCTGGTTGGGTCTCCAGGCCCGATTCCTGCCTCTCAACA +ACCTCTTTGGACTCAGTGCCTACCCATCTCCTGGCGGCCTTGGTCGGCCCACAGCTTCCT +CAAGCCAAGCTCCCCAGGCCCAGGTCAGGCCTCACGGTGGCCTCTCCAGGATGAGCTCCT +GCCCTCCGATGGCATCTCCAGGCCCCAAATGGTCTCCGGTCGGTGGGCTCCTCCACGCCA +AGGTTGGGCCTCCCGGCGACCGCCGCAGGCCCAAGTTGTCCTGAAGTCGGGCTCTCCCGG +CCCTGCCTCCCAGCAAGTAAGCAAGCTCTTTTGGCTCAACTCCTGCCCAGCTCCTGAAGA +GCTTGGTTGCAGAAACTTTGGGGTCTACAAACGCAGGCGGGAGCTGAGCCAAAAGAGCTT +GTTTGCTGGGAGGTGGGAGATGCAGCCAGGAGGAACAGCTGGGCAATGCGGGAGGCAGAG +GCCAGGCCTCCTTAAGTTGGCCTCTCAGACCCACTTGCAGCCTCCCGGCGCCCCCTCCGG +GCCCAGCTCTTCCTCCCGGCTGCATCTCCAGGCCGGACTCTGGCCCGACTCCAGGTCCCA +ACAACGTCTTTGGACTCAGCTCCTGCCCAGCTCCCAGCGGCCCTGGTAGGCCCACAACTT +CCCTAAGCCAAGCTCCCCAGGCCCAGCTCAGGCCTCGCGGTGGCCTCTCCAGGCTCAGCT +CCTGGCCCTCCGATGACATCTGCAGGCCCCAAATGGCCTCCGGTCGGTGGGCTCCTCTAG +GCCCAGCTTGGGCCTCCCGGCGGCCTCCGCAGGCCCAAATCGTCCCGAAGTCAGTCTCTC +CAGGCTTAGCTCCAGCCTCCCGGCGGCCTCTGCAGGCCCAAGTCGTCCTCAAGTCGGCCT +GGAAGTGGGCCTGGAAGAGCAGCAAGTCGGCCTCCCTGGGCCCAGCTCCGTCCTCTCGAC +GGCCTCTCCAGGTGCAAAACTTCCTCGAGTCAGCCTCTCCAGGCCCAGCTCCTCCTGCCT +CCCAGTGGCCTCTTTCGGCCCAGCCCAGCTCATGGCTCTCGGCGGCCTTCCCAGGCCCCG +CTTTTGACTTTTGGCAGCCTCTTCAGGCGCAGAACTTGA +>ENST00000614007.1|ENSG00000278757.1|-|-|U6.90-201|U6|104|snRNA| +ATGCTTGCCTCAGTAGCACACATACTTAAGTTGGAACAATAGAGAGATTGGCACGGCCTC +TGTGAAAGAATGACATGCAAATTTGTGAAGCATTCCATATTTTT +>ENST00000634833.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000488646.2|AL669831.3-201|AL669831.3|906|processed_transcript| +ACTCACTATAACCTCAAACTCCTGGGCTCAAGCAATCCTCCCACCTCAGCCTCCGGAGTA +GCTGGGACTACAGCTCTGCTGCCCCCTGCCCCATACACGTGATGGAGCAGAAAACGTGCT +GTGTGAACCTGTGACTTCAGGGCCTGTTGACGTGGTCGTGCTTGCATACTCTCTGGACTG +GACCTCACTGTGGGAACAACAAGATCAACAAGAGGAGCAAGAACAACATCAAGAGTCAGG +GCCCGGGGGTCCTGACGGGTACAGGATGGGTACAGACCCACACAGGAATCCCAGAGTGTG +TTCCACAGCAGGACACGCCTGCGCTGAAAGAGTGGGCAGAAAGGAGCTGACCTGGGATTA +TGATCCAAACTCAGCTGGGCCTCCCCTACCTGCCCCAGGATTGTGGAGTGAGAACGTTGC +AGCAGGGGAGAACAACGCAGCAAAGCACAGCAGGGGAACCGGAAATGCTCACCCTTTGAC +AGGACCAAACCAATGCACAACTCCTACGTACTGATGGTGGTCTTACGTTTCCCTAAGTTT +CTGCCGACTAAACTGTGCACACGTTCTCAGGACCTCCTGAAGCTGCGTCACAGGCGCTGA +TCAAAGAACACAACCAAGAGTTTGGCCTTTTCTTCAGCACTGGGAATTGTGATCCAAAGC +TTTTCCTGATGAGGCACAAAGTTGGAGAAACAAAACGCAAACTAAGCAACAATGAAACAG +AACAGAGTGAATCTGCTGTAGCTCAAGAGAGGACGTAGCTGCCCCCACCCCGCATCCCCG +GGCTCGGGTTTGCCTTGCTGACCTCTGCTGCCACCTGGTGCCGCACAGAGAAACTGAGGA +GAAACCACATCAGTCTCCTTCAGCCTCAGCTTCACATCTGTGGGTCAAGCAACCCTTTCA +GAAGCT +>ENST00000417636.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006453.2|AL669831.3-202|AL669831.3|553|processed_transcript| +AATATCTCGTCATGGACTGTGCCCCGCTCGAGCCTCTCCACAGGCAGCCGGAAGGAAAGT +GGAGGGAGCTGCTCCTTTCCGTAGCCGGGGTGCCCACCCCAACCAGGCTGCCTCTGCCAC +CCAAGACAGAGGTTCTCTGATAATAATTTGTGGGGCTTGTTTCCAGAGACCACACCTGAA +GCTGCCAACTCCCCGGAGGGAAGGTCCTGATTAATGGCCGATGAATTTCTCCTTAAGGCC +CTGAAACTGCCTACTCAGAACCAAGCCAGTTTTTCCTGCCTGTCCTGTTTGGGCAGGCAG +AGGAGGCAGCTAGAAACCCATTATGCAGGGGATGGGGACCAAACCAATGCACAACTCCTA +CGTACTGATGGTGGTCTTACGTTTCCCTAAGTTTCTGCCGACTAAACTGTGCACACGTTC +TCAGGACCTCCTGAAGCTGCGTCACAGGCGCTGATCAAAGAACACAACCAAGAGTTTGGC +CTTTTCTTCAGCACTGGGAATTGTGATCCAAAGCTTTTCCTGATGAGGCACAAAGTTGGA +GAAACAAAACGCA +>ENST00000419394.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000008017.2|AL669831.3-203|AL669831.3|491|processed_transcript| +GACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAGGAAGAG +AAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCTGAAGCA +CTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCCTGATCCATATGAATTC +CTCTTATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAACTGACTATCACCTTGTTA +ATCATTCAGAAACATGTTGCAGGCTTAAGCCATTTTTGATATAGATACTGAAACAATTAC +TTGCTAAGAGCAAACTTGAAGGTATGGATAAGGCCCTGAGTCATCTTCCTGAGCTGAATG +ATAGTTAAGCTGAATTAACAATTTGGACAAGACAGCAAATGCTATTGTCCAAGTTTTCTA +AAGAAGAATCTGAAGTGAAATGACATCAAGAGACCTATCAAGACCTGTATCCAGGAAAAG +ACCAAACCAAT +>ENST00000440196.3|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000493605.1|AL669831.3-204|AL669831.3|1022|processed_transcript| +GGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTATTCGTAGGTTCAGT +ACCATTGATGGCCAATTGATTTGATGACCTTAGTTTAGGTATTGGGGCCAAAGGATGGAT +GACCATTTCAAACGATCCAGGCTAAGCCAGGAGGAGAGCTCAAAGTCTGATCTGCTCTGC +TGCCCCCTGCCCCATACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCA +GGGCCTGTTGACGTGGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAA +CAAGATCAACAAGAGGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGG +TACAGGATGGGTACAGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACGCC +TGCGCTGAAAGAGTGGGCAGAAAGGAGCTGACCTGGGATTATGATCCAAACTCAGCTGGG +CCTCCCCTACCTGCCCCAGGATTGTGGAGTGAGAACGTTGCAGCAGGGGAGAACAACGCA +GCAAAGCACAGCAGGGGAACCGGAAATGCTCACCCTTTGACAGGGTACTTTTAGTTCTGG +GGCCTTATCTTAAGGATATTCCAACATATACAAAAAGATTCATGCACAGAGATATTTACT +TTAGTATTATTTACCATAGGAAAAAAGTTGGAAACAATACATTTTATGTTCTGTAAAATG +AAAGAACAGTTAAATAAATCATGGCTCTAAGACGACTCCAGGGCTGTGTACGGAAGTTCA +GGGACTGAATCAGTTGAGGCTGTTCCACCTGAGCCTAAGCTTCCTCTGCTGTATGTCAGG +AATCAGGTGGGGGCCAAGCGAGATCACCACAGTGCAGGCCAGTGTGCAGGGCACAGGTGG +GCGCCAGGGAGGGGACGCCGCACAGCCCCATGCTCTCGCCAAAAACGTGCAGCACAATTT +GGAAGAAAACATTTCCATCTGTTAATAAAGAGCAACGGCCTCTGGTCATAAGTGACACAT +GA +>ENST00000641296.1|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000493606.1|AL669831.3-205|AL669831.3|1127|processed_transcript| +GGCTGGCCCAGTTCGGCTCGAATAAGGAGGCTTAGAGCTGTGCCTAGGACTCCAGCTCAT +GCGCCGAATAATAGACCTTAGTTTAGGTATTGGGGCCAAAGGATGGATGACCATTTCAAA +CGATCCAGGCTAAGCCAGGAGGAGAGCTCAAAGTCTGATCTGGGCTGGACTCTTGGTCTG +TCGTCCAGGATAGAGGGCAGGGATGCCATCACTACTCACTATAACCTCAAACTCCTGGGC +TCAAGCAATCCTCCCACCTCAGCCTCCGGAGTAGCTGGGACTACAGCTCTGCTGCCCCCT +GCCCCATACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCAGGGCCTGT +TGACGTGGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAACAAGATCA +ACAAGAGGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAGGAT +GGGTACAGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACGCCTGCGCTGA +AAGAGTGGGCAGAAAGGAGCTGACCTGGGATTATGATCCAAACTCAGCTGGGCCTCCCCT +ACCTGCCCCAGGATTGTGGAGTGAGAACGTTGCAGCAGGGGAGAACAACGCAGCAAAGCA +CAGCAGGGGAACCGGAAATGCTCACCCTTTGACAGGGTACTTTTAGTTCTGGGGCCTTAT +CTTAAGGATATTCCAACATATACAAAAAGATTCATGCACAGAGATATTTACTTTAGTATT +ATTTACCATAGGAAAAAAGTTGGAAACAATACATTTTATGTTCTGTAAAATGAAAGAACA +GTTAAATAAATCATGGCTCTAAGACGACTCCAGGGCTGTGTACGGAAGTTCAGGGACTGA +ATCAGTTGAGGCTGTTCCACCTGAGCCTAAGCTTCCTCTGCTGTATGTCAGGAATCAGGT +GGGGGCCAAGCGAGATCACCACAGTGCAGGCCAGTGTGCAGGGCACAGGTGGGCGCCAGG +GAGGGGACGCCGCACAGCCCCATGCTCTCGCCAAAAACGTGCAGCACAATTTGGAAGAAA +ACATTTCCATCTGTTAATAAAGAGCAACGGCCTCTGGTCATAAGTGA +>ENST00000357876.6|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006709.3|AL669831.3-206|AL669831.3|1702|processed_transcript| +CACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCT +GCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCAC +CCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTC +GCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACAC +CGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGA +GACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCG +TGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGT +TTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGA +GCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGG +CACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGT +GTCGCAGTTTCCACACCGTGAGCTGCTGAGACGGCACCCGCGTGAGTGTCGCAGTTTCCA +CACCGTGAGCTGCTGAGATGGCACCCGCGTGAGTGTCGCAGTTTCCACACCGTGAGCTGC +TGAGATGGCACCCGTGTGAGTGTCGCAGTTTCTACACGTGCCTCATTGCGTGTAAGATGC +TCAAGTGAGAGGAAGCTGGTGAACGGGTCTGTGGGAAGTTGCAGTACTGTCTTTGCAACT +CTTCTGGACATCTTTTTTTTTTTTTTAAATAAAACATTTTTAACGTGAAAATATGCAGAG +CACGGTGGCTCGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGTGGATCATGAG +GTCAGGAGTTCAAGACCAGCCTAGCCAACATGGTGAAACCCCATCTCTACTAAGAATACA +AAAATTAGCTGGGCGTGGTGATGGGCATCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA +GGAGAATCACTTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCGAGATCGCGCCCCTGCAC +TCCAGCCTGGGTGACAGAGCAAGACTCCATCTCAAAAAAAATAAAGGAAATATGCGTCGT +TGGATGCTGTATGACAATCAAGCTACTTATAACAAACAAAATTGAGAATGAAGGATTATG +ATCCAAACTCAGCTGGGCCTCCCCTACCTGCCCCAGGATTGTGGAGTGAGAACGTTGCAG +CAGGGGAGAACAACGCAGCAAAGCACAGCAGGGGAACCGGAAATGCTCACCCTTTGACAG +GGTACTTTTAGTTCTGGGGCCTTATCTTAAGGATATTCCAACATATACAAAAAGATTCAT +GCACAGAGATATTTACTTTAGTATTATTTACCATAGGAAAAAAGTTGGAAACAATACATT +TTATGTTCTGTAAAATGAAAGAACAGTTAAATAAATCATGGCTCTAAGACGACTCCAGGG +CTGTGTACGGAAGTTCAGGGACTGAATCAGTTGAGGCTGTTCCACCTGAGCCTAAGCTTC +CTCTGCTGTATGTCAGGAATCAGGTGGGGGCCAAGCGAGATCACCACAGTGCAGGCCAGT +GTGCAGGGCACAGGTGGGCGCC +>ENST00000635509.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000488653.2|AL669831.3-207|AL669831.3|941|processed_transcript| +GGAAGTTGGGTGGAAGCAGCGCGGACCCACGGCGCACCGAACGCACTCCAACAGAACCCG +ACGCAGACACGCGCTTTCAACCGGCGGAGACACTGGCAGGCTGATCCATATGAATTCCTC +TTATTAAGAAAAATAAAGCATCCAGGATTCAATGAAGAACTGACTATCACCTTGTTAATC +ATTCAGAAACATGTTGCAGGCTTAAGCCATTTTTGATATAGATACTGAAACAATTACTTG +CTAAGAGCAAACTTGAAGGTATGGATAAGGCCCTGAGTCATCTTCCTGAGCTGAATGATA +GTTAAGCTGAATGACTGGACCTCACTGTGGGAACAACAAGATCAACAAGAGGAGCAAGAA +CAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAGGATGGGTACAGACCCACAC +AGGAATCCCAGAGTGTGTTCCACAGCAGGACACGCCTGCGCTGAAAGAGTGGGCAGAAAG +GAGCTGACCTGGGATTATGATCCAAACTCAGCTGGGCCTCCCCTACCTGCCCCAGGATTG +TGGAGTGAGAACGTTGCAGCAGGGGAGAACAACGCAGCAAAGCACAGCAGGGGAACCGGA +AATGCTCACCCTTTGACAGGGTACTTTTAGTTCTGGGGCCTTATCTTAAGGATATTCCAA +CATATACAAAAAGATTCATGCACAGAGATATTTACTTTAGTATTATTTACCATAGGAAAA +AAGTTGGAAACAATACATTTTATGTTCTGTAAAATGAAAGAACAGTTAAATAAATCATGG +CTCTAAGACGACTCCAGGGCTGTGTACGGAAGTTCAGGGACTGAATCAGTTGAGGCTGTT +CCACCTGAGCCTAAGCTTCCTCTGCTGTATGTCAGGAATCAGGTGGGGGCCAAGCGAGAT +CACCACAGTGCAGGCCAGTGTGCAGGGCACAGGTGGGCGCC +>ENST00000634337.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000488647.3|AL669831.3-208|AL669831.3|891|processed_transcript| +GGAGCCTCACGCGCCTCTTACCTAACAGGAAGTTGGGTGGAAGCAGCGCGGACCCACGGC +GCACCGAACGCACTCCAACAGAACCCGACGCAGACACGCGCTTTCAACCGGCGGAGACAC +TGGCAGGGTTTATATGTGGAAGAGACTTGCCCTGTCTCAGCTCTGCTGCCCCCTGCCCCA +TACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCAGGGCCTGTTGACGT +GGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAACAAGATCAACAAGA +GGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAGGATGGGTAC +AGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACGCCTGCGCTGAAAGAGT +GGGCAGAAAGGAGCTGACCTGGGATTATGATCCAAACTCAGCTGGGCCTCCCCTACCTGC +CCCAGGATTGTGGAGTGAGAACGTTGCAGCAGGGGAGAACAACGCAGCAAAGCACAGCAG +GGGAACCGGAAATGCTCACCCTTTGACAGGGTACTTTTAGTTCTGGGGCCTTATCTTAAG +GATATTCCAACATATACAAAAAGATTCATGCACAGAGATATTTACTTTAGTATTATTTAC +CATAGGAAAAAAGTTGGAAACAATACATTTTATGTTCTGTAAAATGAAAGAACAGTTAAA +TAAATCATGGCTCTAAGACGACTCCAGGGCTGTGTACGGAAGTTCAGGGACTGAATCAGT +TGAGGCTGTTCCACCTGAGCCTAAGCTTCCTCTGCTGTATGTCAGGAATCAGGTGGGGGC +CAAGCGAGATCACCACAGTGCAGGCCAGTGTGCAGGGCACAGGTGGGCGCC +>ENST00000440200.5|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006710.2|AL669831.3-209|AL669831.3|413|processed_transcript| +CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAGCTCGAG +GAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCTGCCTCT +GAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCCTCTGCTGCCC +CCTGCCCCATACACGTGATGGAGCAGAAAACGTGCTGTGTGAACCTGTGACTTCAGGGCC +TGTTGACGTGGTCGTGCTTGCATACTCTCTGGACTGGACCTCACTGTGGGAACAACAAGA +TCAACAAGAGGAGCAAGAACAACATCAAGAGTCAGGGCCCGGGGGTCCTGACGGGTACAG +GATGGGTACAGACCCACACAGGAATCCCAGAGTGTGTTCCACAGCAGGACACG +>ENST00000452176.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006714.2|AL669831.3-210|AL669831.3|818|processed_transcript| +GCTCATGCGCCGAATAATAGACCTTAGTTTAGGTATTGGGGCCAAAGGATGGATGACCAT +TTCAAACGATCCAGGCTAAGCCAGGAGGAGAGCTCAAAGTCTGATCTGGTACGTTGCAAG +GATCGAGGGACAGAGCGTGCAGCAAGTGGCCAATGAATAGAAGTCTGGTTTTCTGAGCAT +CCTTGCAGCTGCAGGCTTCAGTCTACCAGAGAATGTGAGGTGTTATTCTTCTAGGGCAGT +GGTTAGAAAAGAAAATGAAAGTAGCAGTACTCTTTTCCTAATGCAACCATAGATGATCAG +AATTTGTAATCCATAAGGTAGAAGCTGCTGTGCCTGAAGAAATAGAAAGTGGGCAGAGGT +GGAGGGGCAAGAGCAGGAGGTGACCAATCTTGGAAATAAGGAAGGGAGAGAGGCAGCCAA +GCCAGCAGCTCCTAGGATTAGTCACTGCCTGGAGCCAGCTCTTGGAAGTTCCCCAGGAGC +TGTCCAGTCTTATGTCATGTCTAGTCAGCAGAGTCCCAAAGAAGCTTGTCATTCTCTAGG +CATTTGTGCTTACATTCTGATGGGCCTAATAGCAGGGAGATGACATGGAGCCCAGGCAGA +ACTGCTGAGATTTCTACTGGTCATGACCTCCATCTTCTCCTTCACACCTTTCCTACCTTT +CTTTTTTCATGCATTCAACAGACATTTATTACCCAATAAGTGCCAGGTAGTAAGCGAGGA +CCTGGGGAGAGCAGATGAGTAAGACACCGTCTCTGTCTCTCAGGAGCTCTCAGATTCTTA +GGGACACATGTACATCCTAATAAACACAGTGCATCTCA +>ENST00000441245.5|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006711.1|AL669831.3-211|AL669831.3|629|processed_transcript| +GAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAA +AGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCT +AACAATTTGGACAAGACAGCAAATGCTATTGTCCAAGTTTTCTAAAGAAGAATCTGAAGT +GAAATGACATCAAGAGACCTATCAAGACCTGTATCCAGGAAAAGAGTCTTGCTCTTGTTG +CCCAGGCTGGAGTGCAATGGCGTGATTTTGGCTCACTGTAAACTCCACCTCCTGGGTTCA +AGCGATTCTCCTGCCTCAGCCTCCCTAGTAGCTGGTATTACAGGTGCCTGCCACCACACC +CAGCTAATTTTTGTATTTTTAGTAGAGATGGGGGTTTCACCATGTTGGCCAGCCTGGTCC +CGAACTCCTGACCTTAGGTGATCCACCAGCATCGGCCTCCCAAAGTGCTGGGATTACAGG +TGTGAGCCACTGCGTCCAGCCAGTGGTGGGTCTCATATCTCAATGTGGACTTTTACTAAC +TCCCGATGCCTCATTTTCCTCATCAGTTGAAAGGAATGAATGAAAGATTTGTGTTTTTCA +TATTACCAGGTAGATGATAAGGAGATTTT +>ENST00000414688.6|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000493607.1|AL669831.3-212|AL669831.3|421|processed_transcript| +GACAGGGTCTCCCTCTGTTGTCCAAGGCTGGAGTGTAGTAGTGCTATCGCAGCTGACTGC +AGCCTCAACCTTCCAGGCTGAAGCGATCCTCCCACCTCAACCTCCCACGTGGCTGAGACT +ACAGGTGCTTGCCACTATGCCCAACTAACATTTGGAATTTTCGTATACGTGGATTCTAGA +GGGGTGACAGCGAAACCTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGA +GATAAGGAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAA +AATGAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAA +ACATCCTGATCCATATGAATTCCTCTTATTAAGAAAAATAAAGCATCCAGGATTCAATGA +A +>ENST00000636676.1|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000489043.1|AL669831.3-213|AL669831.3|183|transcribed_processed_pseudogene| +TCACAGCTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGGAGATAAGGAAG +CTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATAGATTTTTATAAAATGAAAGCT +GCCTCTGAAGCACTGCAGACTCAGCTGAGCACCGATACAAAGAAAGACAAACATCGTAAG +AAG +>ENST00000447954.2|ENSG00000230021.9|OTTHUMG00000191652.2|OTTHUMT00000006878.3|AL669831.3-214|AL669831.3|355|processed_transcript| +TTTCACCTGCAGTTGAAGATCCGTGAGGTGCCCAGAAGATCATGCAGTCATCAGTCCCAC +GGAGCAGCCTGCGAGGCTGAGGCTCCTCCCACTGGACCGCCCCCCAACTGGCACCACTGC +TGCCCCTGCCCCTACTCTCAGCCTCACGTGACTCTCGGGCAGAAGCAGTGGTGGGGCAGC +CAGGGCAGCGTCAAGAGTCTGAGCCAGCTGCAGGACAAGTTCGAGCATCTTAAAATGATT +CAACAGGAGGAGATAAGGAAGCTCGAGGAAGAGAAAAAACAACTGGAAGGAGAAATCATA +GATTTTTATAAAATGAAAGCTGCCTCTGAAGCACTGCAGACTCAGCTGAGCACCG +>ENST00000423796.1|ENSG00000235146.2|OTTHUMG00000002329.1|OTTHUMT00000006707.1|AC114498.1-201|AC114498.1|607|lincRNA| +TACGGAAGCTTTAGGGAGGTGCTCTGAGACCCGAAACTAGACTCGACTTTAACAGACACA +GACGACCCTGAAGATGGAAATGTTTTCTTCCAAATTGTGCTGCACGTTTTTGGCGAGAGC +ATGGGGCTGTGCGGCGTCCCCTCCCTGGCGCCCACCTGTGCCCTGCACACTGGCCTGCAC +TGTGGTGATCTCGCTTGGCCCCCACCTGATTCCTGACATACAGCAGAGGAAGCTTAGGCT +CAGGTGGAACAGCCTCAACTGATTCAGTCCCTGAACTTCCGTACACAGCCCTGGAGTCGT +CTTAGAGCCATGATTTATTTAACTGTTCTTTCATTTTACAGAACATAAAATGTATTGTTT +CCAACTTTTTTCCTATGGTAAATAATACTAAAGTAAATATCTCTGTGCATGAATCTTTTT +GTATATGTTGGAATATCCTTAAGATAAGGCCCCAGAACTAAAAGTACCCTGTCAAAGGGT +GAGCATTTCCGGTTCCCCTGCTGTGCTTTGCTGCGTTGTTCTCCCCTGCTGCAACGTTCT +CACTCCACAATCCTGGGGCAGGTAGGGGAGGCCCAGCTGAGTTTGGATCATAATCCTGAA +AGACACA +>ENST00000450696.1|ENSG00000235146.2|OTTHUMG00000002329.1|OTTHUMT00000006706.1|AC114498.1-202|AC114498.1|402|lincRNA| +GACTCGACTTTAACAGACACAGACGACCCTGAAGGTGAGACTGTCTGCTGGTGGGATGCT +GGATGGAAATGTTTTCTTCCAAATTGTGCTGCACGTTTTTGGCGAGAGCATGGGGCTGTG +CGGCGTCCCCTCCCTGGCGCCCACCTGTGCCCTGCACACTGGCCTGCACTGTGGTGATCT +CGCTTGGCCCCCACCTGATTCCTGACATACAGCAGAGGAAGCTTAGGCTCAGGTGGAACA +GCCTCAACTGATTCAGTCCCTGAACTTCCGTACACAGCCCTGGAGTCGTCTTAGAGCCAT +GATTTATTTAACTGTTCTTTCATTTTACAGAACATAAAATGTATTGTTTCCAACTTTTTT +CCTATGGTAAATAATACTAAAGTAAATATCTCTGTGCATGAA +>ENST00000416931.1|ENSG00000225972.1|OTTHUMG00000002338.1|OTTHUMT00000006720.1|MTND1P23-201|MTND1P23|372|unprocessed_pseudogene| +TTTGACCTTCAGCAAGGTCAAAGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATAC +GCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATTATTATAATAAAC +ACCCTCACCACTACAATCTTCCTAGGAACAACATATAACGCACTCTCCCCTGAACTCTAC +ACAACATATTTTGTCACCAAGACCCTACTTCTGACCTCCCTGTTCTTATGAATTCGAACA +GCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCA +CTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCC +CCTCAAACCTAA +>ENST00000457540.1|ENSG00000225630.1|OTTHUMG00000002336.1|OTTHUMT00000006718.1|MTND2P28-201|MTND2P28|1044|unprocessed_pseudogene| +ATTAATCCCCTGGCCCAACCCGTCATCTACTCTACCATCTTTGCAGGCACACTCATCACA +GCGCTAAGCTCGCACTGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTT +ATTCCAGTTCTAACCAAAAAAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTC +CTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATCCTCTTCAACAATATACTC +TCCGGACAATGAACCATAACCAATACCACCAATCAATACTCATCATTAATAATCATAATG +GCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAA +GGCACCCCTCTGACATCCGGCCTGCTCCTTCTCACATGACAAAAACTAGCCCCCATCTCA +ATCATATACCAAATTTCTCCCTCATTAAACGTAAGCCTTCTCCTCACTCTTTCAATCTTA +TCCATCATGGCAGGCAGTTGAGGTGGATTAAACCAAACCCAACTACGCAAAATCTTAGCA +TACTCCTCAATTACCCACATAGGATGAATAACAGCAGTTCTACCGTACAACCCTAACATA +ACCATTCTTAATTTAACTATTTATATTATCCTAACTACTACCGCATTCCTACTACTCAAC +TTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTA +ACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTT +TTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACC +ATCATAGCCATCATCACCCTCCTTAACCTCTACTTCTACCTGCGCCTAATCTACTCCACC +TCAATCACACTACTCCCTATATCTAACAACGTAAAAATAAAATGACAGTTTGAACACACA +AAACCCACCCCATTCCTCCCCACACTCATCGCCCTTACCACACTGCTCCTACCTATCTCC +CCTTTTATGCTAATAATCTTATAG +>ENST00000414273.1|ENSG00000237973.1|OTTHUMG00000002333.2|OTTHUMT00000006715.2|MTCO1P12-201|MTCO1P12|1543|unprocessed_pseudogene| +ATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGAACACTATACCTA +TTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCGAA +CTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCC +CATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAAC +TGACTAGTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATA +AGCTTCTGACTCTTACCCCCCTCTCTCCTACTCCTGCTTGCATCTGCTATAGTGGAGGCC +GGCGCAGGAACAGGTTGAACAGTCTACCCTCCCTTGGCAGGGAACTACTCCCACCCTGGA +GCCTCCGTAGACCTAACCATCTTCTCCTTACACCTAGCAGGTATCTCCTCTATCTTAGGA +GCCATCAATTTCATCACAACAATTATTAATATAAAACCCCCTGCCATAACCCAATACCAA +ACGCCCCTTTTCGTCTGATCCGTCCTAATCACAGCAGTCTTACTTCTCCTATCTCTCCCA +GTCCTAGCCGCTGGCATCACTATACTACTAACAGACCGTAACCTCAACACCACCTTCTTC +GACCCAGCCGGAGGAGGAGACCCCATTCTATACCAACACCTATTCTGATTTTTCGGTCAC +CCTGAAGTTTATATTCTCATCCTACCAGGCTTCGGAATAATCTCCCATATTGTAACTTAC +TACTCCGGGAAAAAAAGAACCATTTGGATACATAGGTATGGTCTGAGCTATGATATCAAT +TGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGAATAGACGTAGA +CACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAGT +ATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTG +AGCCCTAGGATTTATTTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAA +CTCATCACTAGACATCGTACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGT +CCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCTATT +CTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCGCTATCATATTCATCGG +CGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGCCTATCCGGAATGCCCCGACGTTA +CTCGGACTATCCCGATGCATACACCACATGAAATATCCTATCATCTGTAGGCTCATTCAT +TTCTCTAACAGCAGTAATATTAATAATTTTCATAATTTGAGAAGCCTTCGCTTCGAAGCG +AAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCC +ACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGA +>ENST00000621981.1|ENSG00000278791.1|-|-|MIR6723-201|MIR6723|89|miRNA| +ATGCATCGGGATAGTCCGAGTAACGTCGGGGCATTCCGGATAGGCCGAGAAAGTGTTGTG +GGAAGAAAGTTAGATTTACGCCGATGAAT +>ENST00000427426.1|ENSG00000229344.1|OTTHUMG00000002334.1|OTTHUMT00000006716.1|MTCO2P12-201|MTCO2P12|682|unprocessed_pseudogene| +ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTT +ATCATCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTAC +GCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAA +ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTTATCGCCCTCCCATCC +CTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCTTTACCATCAAATCAATT +GGCCATCAATGGTACTGAACCTACGAATACACCGACTACGGCGGACTAATCTTCAACTCC +TACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAAT +CGAGTAGTACTCCCGGTTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTA +CACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAAC +CAAACCACTTTCACTGCTACACGACCAGGGGTATACTACGGCCAATGCTCTGAAATCTGT +GGAGCAAACCAGTTTTATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAAT +AGGGCCCGTATTTACCCTATAG +>ENST00000467115.1|ENSG00000240409.1|OTTHUMG00000002473.1|OTTHUMT00000007027.1|MTATP8P1-201|MTATP8P1|207|unprocessed_pseudogene| +ATGCCCCAACTAAATACTACCGTATGACCCACCATAATTACCCCCATACTCCTTACACTA +TTCCTCATCACCCAACTAAAAATATTAAATACAAATTACCACCTACCTCCCTCACCAAAG +CCCATAAAAATAAAAAACTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTTCA +CTTCATTCATTGCCCCCACAATCCTAG +>ENST00000514057.1|ENSG00000248527.1|OTTHUMG00000002335.2|OTTHUMT00000006717.2|MTATP6P1-201|MTATP6P1|681|unprocessed_pseudogene| +ATGAACGAAAATCTGTTCACTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCA +GTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAAC +CGACTAATTACCACCCAACAATGACTAATCCAACTAACCTCAAAACAAATGATAGCCATA +CACAACACTAAGGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTTTATTGCC +ACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATA +AACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATTATAGGCTTTCGCTCTAAG +ATTAAAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCTATA +CTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTA +ACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCACACTAGCA +ATATCAACTATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACT +ATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTTACACTTCTAGTAAGCCTC +TACCTGCACGACAACACATAA +>ENST00000416718.2|ENSG00000198744.5|OTTHUMG00000002337.2|OTTHUMT00000006719.2|MTCO3P12-201|MTCO3P12|547|unprocessed_pseudogene| +ATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGGCCCCTAACAGGG +GCCCTCTCAGCCCTCCTAATGACCTCCGGCCTAGCCATGTGATTTCACTTCCACTCCACA +ACCCTCCTCATACTAGGCCTACTAACCAACACACTAACCATATACCAATGATGGCGCGAT +GTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAGAAAGGCCTTCGA +TACGGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTTCTGAGCC +TTTTACCACTCCAGCCTAGCTCCCACCCCCCAACTAGGGGGACACTGGCCCCCAACAGGC +ATCACCCCGCTAAATCCCCTAGAAGTCCCACTCCTAAACACATCCGTATTACTCGCATCA +GGGGTATCAATCACCTGAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATT +CAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAG +TACTTCG +>ENST00000438434.2|ENSG00000268663.1|OTTHUMG00000002340.3|OTTHUMT00000006722.3|WBP1LP6-201|WBP1LP6|424|processed_pseudogene| +GGAAGACTCCTGGGTGGCATGGAGCTCTTGCACCTCTAGGCACTGCCCAGCCCTGTGTCA +GCCAGGGCTGAACCCCCACAGGATAAGGAAGCCTGTGTGTGTACCAACAATCAAAGCTAC +ATCTGTGACACAACAGGACACTGCTATGGGCAGTCTCAGTGTTGTAACTACTACTATGAA +CATTGGTGGTTCTGGCTCGCGTGGACCATCACCATCATCCTGAGCTGCTGCTGTGTCTGC +CACCACAGCCAAGCCAGCCCTCAAGTCCAGCAGTAGCAACATGAAATCAACCTGCCTGCC +TATCCAGAAGCCCGCAATTACTCAGTGCTACCATTTTATTTCACCAAACTATTTATTACC +TTCTTATGAGGAAGTGGTGAACTAACCTCCACCTGTTTCCCTCCCTGTCTGTCCATTGTG +GATG +>ENST00000332831.4|ENSG00000284662.1|OTTHUMG00000002581.3|OTTHUMT00000007334.3|OR4F16-201|OR4F16|995|protein_coding| +AGCCCAGTTGGCTGGACCAATGGATGGAGAGAATCACTCAGTGGTATCTGAGTTTTTGTT +TCTGGGACTCACTCATTCATGGGAGATCCAGCTCCTCCTCCTAGTGTTTTCCTCTGTGCT +CTATGTGGCAAGCATTACTGGAAACATCCTCATTGTGTTTTCTGTGACCACTGACCCTCA +CTTACACTCCCCCATGTACTTTCTACTGGCCAGTCTCTCCTTCATTGACTTAGGAGCCTG +CTCTGTCACTTCTCCCAAGATGATTTATGACCTGTTCAGAAAGCGCAAAGTCATCTCCTT +TGGAGGCTGCATCGCTCAAATCTTCTTCATCCACGTCGTTGGTGGTGTGGAGATGGTGCT +GCTCATAGCCATGGCCTTTGACAGATATGTGGCCCTATGTAAGCCCCTCCACTATCTGAC +CATTATGAGCCCAAGAATGTGCCTTTCATTTCTGGCTGTTGCCTGGACCCTTGGTGTCAG +TCACTCCCTGTTCCAACTGGCATTTCTTGTTAATTTAGCCTTCTGTGGCCCTAATGTGTT +GGACAGCTTCTACTGTGACCTTCCTCGGCTTCTCAGACTAGCCTGTACCGACACCTACAG +ATTGCAGTTCATGGTCACTGTTAACAGTGGGTTTATCTGTGTGGGTACTTTCTTCATACT +TCTAATCTCCTACGTCTTCATCCTGTTTACTGTTTGGAAACATTCCTCAGGTGGTTCATC +CAAGGCCCTTTCCACTCTTTCAGCTCACAGCACAGTGGTCCTTTTGTTCTTTGGTCCACC +CATGTTTGTGTATACACGGCCACACCCTAATTCACAGATGGACAAGTTTCTGGCTATTTT +TGATGCAGTTCTCACTCCTTTTCTGAATCCAGTTGTCTATACATTCAGGAATAAGGAGAT +GAAGGCAGCAATAAAGAGAGTATGCAAACAGCTAGTGATTTACAAGAGGATCTCATAAAT +GATATAATAAGCCCTTCTCATTAAACATGATATGG +>ENST00000440782.3|ENSG00000229376.3|OTTHUMG00000057431.3|OTTHUMT00000127611.3|CICP3-201|CICP3|2455|processed_pseudogene| +CGGCCCCCAGGCCTGCGTTCAGTGAGGCCTCCCGTGGCGTCAGCATGTTCGTGTGGAGGA +ATGTGGAAGGTCACTCTTCGGCCGTGTTCTCCTGGTACTCCATCCCCTTCCTGACCCCTC +CCTGCAGCCACACGAGGCCCAGCAACCTGCCAGTCACTCAGAGTCTCTGGCCTCCAACCA +GAGAAAACAACCTGCCAAGTTGGCAGCCGTTGCTCATGAGCGTCCACCAAGTGGGACAGG +GAGTGTTGACCCTGGGCGGCCCCCTGGAGCCACCTGCCCTGAAAGCCCAGGGCCCGCAAC +CCCACACACTTTGGGGGTGGTGGAACCTGGTAAAAGCTCACCTCCCACCATGGAGGAGGA +GCCCTGGGCCCCTCAGGGGAGTCCCTGCTGGACAGTGAGACAGAGAATGACCATGATGAT +GCTTTCCTCTCCATCATGTCTCCTGACACCCAGTTGCCTCTACCACTCAGATGATGTCAG +GCCCAGTCCCTCAGTGCCCTGCGCAAGGAAAAGGACTCATCTTCTGAGAAGGATGGACGC +AGCCCCAACAAATCAGACAAGGACCACATCCGGTGGCCCATGAGTGGCGCTCATGATCTT +CAGCAGGCGGCACCAGGCCCTGGCGGGGCGCACCAGGGTCACCCCAACCAGGATAACCGG +ACCGTCAGCCAGATGCTGAGCGAGCGGTGGTACACCCTGGGGCCCAATGAGATGCAGAAA +TACAACCTGGCCTTCCAGGTGAAGGTGGCCCACTTGCAACAAGGACCGAAAGAAGTCCAG +CTCAGAGGCCAAGCCCACAAGCCAGGGGCTAGCAGGAGTGTAACAAGGGCTCGTGGGAGC +GGAGCATATCAGAGACGGGCACTGCCACTGCCCCTGGGGTGTCCTCTGAACTCCTGTCAG +TTGCAGCCCAAACACTCCAGAGCTCGGATACCAAGGAGCAGCTTCTATGGGGCAGAACGG +CTGCACACAGTCAGGGAACCTGGCTCAGCCTGGCCCAAGCCTTCTCCCACAGCGGGGTAC +ACAGCCTGGACGGCAGGGAAATAGACCGTCAGGCACTACGGGAACTGACACAGGTGGTGT +CTGGCACTGCATCATACTCTGGCCCAAAGCCTTCTACTCAGCATGGAGCTCCAGGCCACT +TTGCAGCCCCTGGTGAGGGAGGTGACCCGTGGGCAGCCCTGCTGCCGCCCACGTGAGCTG +CTCATTCCCAGCACATGGCCAGCGAGGTCATAGCGAGTGACGAGGAGCACACGGTCATCC +ATGAGGAGGAGGGGGTGATGATGTCATTGCTGATGATGGCTTTAGCACCACCGACACCGA +TCTCAAGTTCAAGGAGTGGGTGACCGACTGAGAGTGGGGACAACTCTGGGGAGGAGCCAG +AGGGCAACAAGGGCTTTGGTGGGAAGGTATTTGCACCTGTCATTCCTTCCTCCTTTACTC +CTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATCCACCTGCAGCTTTTGG +CAGTCTATGGTCACACCCTGTCCTCCTCCTACACATACTCGGATGCTTCCTCCTCAACCT +TGGCACCCACCTCCTTCTTACTGGGCCCAGGAGCCTTCAAAGCCCAGGAGTCTGGTCAAC +GCAGCAGAGCGGGCCCCCTACGGCCCCAACCCCTGGGGATGGGGGCCCAGGGACGCCTTC +CAAGGTGGCCTGTTTCCTCCCAATGGATCCTGCCACCTTCTGGTGCAAGAGACCTGAAAG +TGTGGGTGACCTGGAGCTACCAGGCTCCTCAGTCATCAGGGTCCCTCCCAACACTAAGGC +TTTCCTAGGCAGGAGCTGGGCTGAGCCACCCGGGGGGCAGAGCCTGAAGAGAAACTGACT +GGGCTTTCGGGGTCGGGGCAGAGGGAACCCCACGGACATGGATCCCACACTGGAGGACCC +CACCGCGCCCAAATGCAAGATGAGAAGATGCTCCAGCTGCAGTCCAAAGCCCAACACCCC +CAAGTGTGCCATGTGTGATGGGGACAGCTTCCCCTTTGCCTGTACAGGTGGAGAAGCCGA +GGACAGGCTCAGGGAACCGGAGACCGAGAAGGCGCTGTCCTCTTCACTGCACGTACCCTG +GACCAGTGCCGGCCCTGATCATGCAGCTCTTCCAGGCCCACTGCTTCTTCCTGTCCACTA +GGCCACAGCCGCCCTCCAGGCCCACTATGCACACATCTTCCCCTCCAAGGTTTGTTCTGC +CCCTGCCCTGACTCCCAGCCCTGTGGGGGTCCTGACCGCACCTCACCTGACTGCTGCTGC +CCCTGCCCTGACTCCCAGCCCTGCCTGACCCCACCTCAACCTGCTCAGGCTCTGGCACAA +CCCTGGCTGCCCTGCCACTGCCTCTGCCCCAGAGTTGGGGCCTTGACAGCCTGGTTGGAA +GGGGACACCCCAGCCCTGCCTCAACACCTGGGGGTCTCCATAACTACCACAGGCA diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/dummy_staging_script.sh b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/dummy_staging_script.sh new file mode 100644 index 0000000000..950ecd48c3 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/dummy_staging_script.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +SAMPLE_ID="$1" +OUTPUT_FILE="$2" + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +ln -s ${SCRIPT_DIR}/../test_data/${SAMPLE_ID}.fa ${OUTPUT_FILE} \ No newline at end of file diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/sample_ids.txt b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/sample_ids.txt new file mode 100644 index 0000000000..68b03f8c9b --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/sample_ids.txt @@ -0,0 +1,2 @@ +metasub_fake_data +transcripts_100 diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/test.yml b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/test.yml new file mode 100644 index 0000000000..ac27022e24 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/test.yml @@ -0,0 +1,21 @@ +k: 10 +base_name: 'test_k10' +build_primary_graph: True +primarize_samples_separately: True + +output_directory: 'output_dir_test' +seqs_file_list_path: '' # using dummy_staging_script.sh staging script, to "stage" data + +annotation_labels_source: 'sequence_file_names' + +sample_ids_path: 'test_workflow/sample_ids.txt' +sample_staging_script_path: 'test_workflow/dummy_staging_script.sh' +sample_staging_file_ending: '.fasta.gz' + +brwt_relax_arity: 4 + +tmpdir: '/tmp' +rules: + build: + mem_mb: 1234 + diff --git a/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/transcript_paths.txt b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/transcript_paths.txt new file mode 100644 index 0000000000..b3f46bdce0 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/snakemake/test_workflow/transcript_paths.txt @@ -0,0 +1,2 @@ +test_data/metasub_fake_data.fa +test_data/transcripts_100.fa diff --git a/metagraph/workflows/metagraph_workflows/utils.py b/metagraph/workflows/metagraph_workflows/utils.py new file mode 100644 index 0000000000..5680a38ab4 --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/utils.py @@ -0,0 +1,151 @@ +import itertools +import logging +import re +import subprocess +from pathlib import Path +from typing import Union + +from metagraph_workflows import workflow_configs, utils +from metagraph_workflows.workflow_configs import GNU_TIME_CMD, TMP_DIR, \ + RULE_CONFIGS_KEY, SEQS_FILE_LIST_PATH, SEQS_DIR_PATH + +logger = logging.getLogger("metagraph_workflow") + + +def get_seqs_file_list_path(wdir, config): + if SEQS_FILE_LIST_PATH in config: + return config[SEQS_FILE_LIST_PATH] + + seqs_file_list_path = wdir/'sequence_file_list_path.txt' + seqs_dir_path = config.get(SEQS_DIR_PATH, None) + + if not seqs_dir_path: + raise ValueError(f"Neither {SEQS_FILE_LIST_PATH} nor {SEQS_DIR_PATH} parameter are set. Need either to proceed") + + utils.create_transcript_path_list(seqs_dir_path, seqs_file_list_path) + return seqs_file_list_path + + +def take_value_or_default(key, default, config): + return config[key] if (key in config.keys() and config[key]) else default + + +def create_transcript_path_list(path: Union[Path, str], transcript_path: Union[Path, str], suffix=''): + paths = [str(p.absolute()) for p in Path(path).glob(f'*{suffix}')] + + with open(transcript_path, 'w') as f: + f.write('\n'.join(paths)) + + +def get_sample_name(l): + file_name = Path(l.strip()).name + + m = re.compile(r'^([^.]*)\.(fasta|[a-zA-Z]{2,4})(\.gz)?$').match(file_name) + if m: + return m.groups()[0] + + return file_name + + +def derive_sample_dictionary(transcript_path_list_path: Union[Path, str]): + with open(transcript_path_list_path) as f: + ret = {get_sample_name(l): l.strip() for l in f} + return ret + + +def get_build_single_sample_input(config, orig_samples_path, seq_ids_dict): + def _sample_input(wildcards): + sample_id = wildcards[0] # TODO: + + if config[workflow_configs.SAMPLE_IDS_PATH]: + return orig_samples_path / f"{{sample_id}}{config[workflow_configs.SAMPLE_STAGING_FILE_ENDING]}" + else: + return seq_ids_dict[sample_id] + + return _sample_input + + +def get_build_joint_input(config, contigs_dir, seq_ids_dict, seqs_file_list_path): + sample_ids = set() + if workflow_configs.SAMPLE_IDS_PATH in config and config[workflow_configs.SAMPLE_IDS_PATH]: + with open(config[workflow_configs.SAMPLE_IDS_PATH]) as f: + sample_ids = {f"{l.strip()}" for l in f} + + def _get_build_graph_input(wildcards): + if config[workflow_configs.PRIMARIZE_SAMPLES_SEPARATELY]: + all_samples = sample_ids if sample_ids else seq_ids_dict.keys() + return [contigs_dir/f"{sample_id}_primary.fasta.gz" for sample_id in all_samples] + else: + return seqs_file_list_path + + return _get_build_graph_input + + +def generate_col_paths(annotation_cols_path, seqs_file_list_path, config): + sample_names = set() + + if workflow_configs.SAMPLE_IDS_PATH in config and config[workflow_configs.SAMPLE_IDS_PATH]: + with open(config[workflow_configs.SAMPLE_IDS_PATH]) as f: + sample_names = { f"{l.strip()}_primary.fasta.gz" for l in f} + + else: + with open(seqs_file_list_path) as f: + column_names = [f"{f.strip().rstrip('/').split('/')[-1]}" for f in + f.readlines()] + + duplicate_col_names = [grp_key for (grp_key, names_lst) in + itertools.groupby(sorted(column_names)) if + len(list(names_lst)) > 1] + + assert not duplicate_col_names, f"Found duplicate filenames: {', '.join(duplicate_col_names)}" + + if config[workflow_configs.PRIMARIZE_SAMPLES_SEPARATELY]: + sample_names = {f"{get_sample_name(c)}_primary.fasta.gz" for c in column_names} + else: + sample_names = set(column_names) + + return [annotation_cols_path / f"{c}.column.annodbg" for c in + sample_names] + + +def get_wdir(config): + return Path(config['output_directory']) + + +def get_gnu_time_command(config): + EMTPY_CMD = '' + cmd = config.get(GNU_TIME_CMD, EMTPY_CMD) + + if cmd: + test_cmd=[cmd, '--version'] + proc = subprocess.run(test_cmd, capture_output=True) + if proc.returncode == 0: + return f"{cmd} --verbose" + else: + logger.warning(f"Command {' '.join(test_cmd)} for GNU time could not be executed successfully: {proc.stderr}." + f" No timing information collected") + else: + logger.warning("No GNU Time command provided.") + + return EMTPY_CMD + + +def get_log_path(rule_name, config, wildcards=None): + log_dir = get_wdir(config)/'logs' + + if wildcards: + wildcard_str = '_'.join([f"{{{w}}}" for w in wildcards]) + return f"{log_dir}/{rule_name}/{rule_name}_{wildcard_str}.log" + else: + return f"{log_dir}/{rule_name}.log" + + +def temp_dir_config(config): + return f"--disk-swap {config[TMP_DIR]}" if TMP_DIR in config else '', + + +def get_rule_specific_config(rule, key, config): + if RULE_CONFIGS_KEY in config and rule in config[ + RULE_CONFIGS_KEY] and key in config[RULE_CONFIGS_KEY][rule]: + return config[RULE_CONFIGS_KEY][rule][key] + return None \ No newline at end of file diff --git a/metagraph/workflows/metagraph_workflows/workflow_configs.py b/metagraph/workflows/metagraph_workflows/workflow_configs.py new file mode 100644 index 0000000000..e07aeb5ebd --- /dev/null +++ b/metagraph/workflows/metagraph_workflows/workflow_configs.py @@ -0,0 +1,69 @@ +""" +Config parameters used by the snakemake workflow. +These can all appear in a workflow config/config files +""" + +from enum import Enum + +SEQS_FILE_LIST_PATH = 'seqs_file_list_path' +SEQS_DIR_PATH = "seqs_dir_path" + +TMP_DIR = 'tmpdir' + +PRIMARIZE_SAMPLES_SEPARATELY = 'primarize_samples_separately' + +KMC_MAX_BINS="kmc_max_bins" +KMC_MEM_MB_PER_THREAD="kmc_mem_mb_per_thread" +KMC_MEM_OVERHEAD_FACTOR= "kmc_mem_overhead_factor" + +SAMPLE_IDS_PATH="sample_ids_path" +SAMPLE_STAGING_SCRIPT_PATH="sample_staging_script_path" +SAMPLE_STAGING_SCRIPT_ADDITIONAL_OPTIONS="sample_staging_script_additional_options" +SAMPLE_STAGING_FILE_ENDING='sample_staging_file_ending' + +BRWT_RELAX_ARITY="brwt_relax_arity" +BRWT_PARALLEL_NODES="brwt_parallel_nodes" +BRWT_LINKAGE_SUBSAMPLE="brwt_linkage_subsample" + +MAX_THREADS = 'max_threads' +MAX_MEMORY_MB = 'max_memory_mb' +MAX_DISK_MB = 'max_disk_mb' +MAX_BUFFER_SIZE_MB = 'max_buffer_size_mb' + +RULE_CONFIGS_KEY = 'rules' +THREADS_KEY = 'threads' +MEM_MB_KEY = 'mem_mb' +DISK_MB_KEY = 'disk_mb' + +MEM_BUFFER_MB_KEY = 'mem_buffer_mb' +DISK_CAP_MB_KEY = 'disk_cap_mb' + +GNU_TIME_CMD = 'gnu_time_cmd' + + +class AnnotationLabelsSource(Enum): + SEQUENCE_HEADERS = 'sequence_headers' + SEQUENCE_FILE_NAMES = 'sequence_file_names' + + def to_annotation_cmd_option(self): + if self == self.SEQUENCE_FILE_NAMES: + return '--anno-filename' + elif self == self.SEQUENCE_HEADERS: + return '--anno-header' + else: + raise ValueError(f"Invalid value of AnnotationLabelsSource: got {self}") + + +class AnnotationFormats(Enum): + # COLUMN = 'column' # TODO: need special case in the workflow + ROW = 'row' + BIN_REL_WT_SDSL = 'bin_rel_wt_sdsl' + BIN_REL_WT = 'bin_rel_wt' + FLAT = 'flat' + RBFISH = 'rbfish' + BRWT = 'brwt' + RELAXED_BRWT = 'relax.brwt' + RB_BRWT = 'rb_brwt' + #RELAXED_RB_BRWT = 'relax.rb_brwt' # not possible + ROW_DIFF_BRWT = 'row_diff_brwt' + RELAXED_ROW_DIFF_BRWT = 'relax.row_diff_brwt' \ No newline at end of file diff --git a/metagraph/workflows/notebooks/workflow_end_to_end_example.ipynb b/metagraph/workflows/notebooks/workflow_end_to_end_example.ipynb new file mode 100644 index 0000000000..b2cdb09439 --- /dev/null +++ b/metagraph/workflows/notebooks/workflow_end_to_end_example.ipynb @@ -0,0 +1,920 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "proper-norway", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.053378Z", + "start_time": "2021-10-19T12:37:26.016001Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:44.254485Z", + "iopub.status.busy": "2021-03-31T07:55:44.253980Z", + "iopub.status.idle": "2021-03-31T07:55:44.393638Z", + "shell.execute_reply": "2021-03-31T07:55:44.392335Z", + "shell.execute_reply.started": "2021-03-31T07:55:44.254352Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "verified-apache", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.082089Z", + "start_time": "2021-10-19T12:37:26.056666Z" + } + }, + "outputs": [], + "source": [ + "# needed to call snakemake from jupyter notebook\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "hungarian-frequency", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.329909Z", + "start_time": "2021-10-19T12:37:26.086548Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:44.508512Z", + "iopub.status.busy": "2021-03-31T07:55:44.507820Z", + "iopub.status.idle": "2021-03-31T07:55:44.622865Z", + "shell.execute_reply": "2021-03-31T07:55:44.621694Z", + "shell.execute_reply.started": "2021-03-31T07:55:44.508477Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from metagraph_workflows import cli\n", + "from pathlib import Path\n", + "\n", + "from metagraph_workflows import workflow_configs\n", + "\n", + "import urllib" + ] + }, + { + "cell_type": "markdown", + "id": "intended-oklahoma", + "metadata": { + "execution": { + "iopub.execute_input": "2021-03-26T08:52:06.016430Z", + "iopub.status.busy": "2021-03-26T08:52:06.015907Z", + "iopub.status.idle": "2021-03-26T08:52:06.056002Z", + "shell.execute_reply": "2021-03-26T08:52:06.053820Z", + "shell.execute_reply.started": "2021-03-26T08:52:06.016390Z" + }, + "tags": [] + }, + "source": [ + "## Building Index" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "printable-sight", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.355841Z", + "start_time": "2021-10-19T12:37:26.333200Z" + } + }, + "outputs": [], + "source": [ + "temp_dir = Path('/tmp/metagraph_workflows')\n", + "temp_dir.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "young-syndication", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.385422Z", + "start_time": "2021-10-19T12:37:26.361187Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:46.048662Z", + "iopub.status.busy": "2021-03-31T07:55:46.047835Z", + "iopub.status.idle": "2021-03-31T07:55:46.118796Z", + "shell.execute_reply": "2021-03-31T07:55:46.117843Z", + "shell.execute_reply.started": "2021-03-31T07:55:46.048574Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sample_list_path = Path('/tmp/paths.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "colored-intervention", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.413353Z", + "start_time": "2021-10-19T12:37:26.387666Z" + } + }, + "outputs": [], + "source": [ + "# download can be unreliable at times. Try several times, possibly changing the protocol between http and ftp\n", + "\n", + "protocol = \"ftp\"\n", + "sample_download = [\n", + " f\"{protocol}://ftp.sra.ebi.ac.uk/vol1/fastq/SRR512/000/SRR5122830/SRR5122830_subreads.fastq.gz\",\n", + " f\"{protocol}://ftp.sra.ebi.ac.uk/vol1/fastq/SRR512/006/SRR5122826/SRR5122826_subreads.fastq.gz\"\n", + "]\n", + "\n", + "input_seqs = temp_dir/'input_seqs'\n", + "\n", + "input_seqs.mkdir(parents=True, exist_ok=True)\n", + "\n", + "for s in sample_download:\n", + " file_name = Path(urllib.parse.urlparse(s).path).name\n", + " sample_name = file_name.split('_')[0]\n", + " \n", + " if not (input_seqs/file_name).exists():\n", + " urllib.request.urlretrieve(s, input_seqs/file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "reflected-specification", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.444026Z", + "start_time": "2021-10-19T12:37:26.418382Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:46.122595Z", + "iopub.status.busy": "2021-03-31T07:55:46.121935Z", + "iopub.status.idle": "2021-03-31T07:55:46.191302Z", + "shell.execute_reply": "2021-03-31T07:55:46.190377Z", + "shell.execute_reply.started": "2021-03-31T07:55:46.122439Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def create_sample_list_path(path_list, out_path):\n", + " with open(out_path, \"w\") as f:\n", + " f.write('\\n'.join([str(p) for p in path_list]))\n", + "\n", + "create_sample_list_path(input_seqs.glob('*fastq*'), sample_list_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "healthy-hamilton", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.481634Z", + "start_time": "2021-10-19T12:37:26.447955Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:46.194052Z", + "iopub.status.busy": "2021-03-31T07:55:46.193386Z", + "iopub.status.idle": "2021-03-31T07:55:46.252664Z", + "shell.execute_reply": "2021-03-31T07:55:46.251673Z", + "shell.execute_reply.started": "2021-03-31T07:55:46.194012Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "output_dir = temp_dir /'output_dir'" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "portuguese-employer", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:26.949507Z", + "start_time": "2021-10-19T12:37:26.484629Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#############################\r\n", + "### Welcome to MetaGraph! ###\r\n", + "#############################\r\n", + "\r\n", + "Metagraph: comprehensive metagenome graph representation -- Version 0.1\r\n", + "\r\n", + "Usage: metagraph [command specific options]\r\n", + "\r\n", + "Available commands:\r\n", + "\tbuild\t\tconstruct a graph object from input sequence\r\n" + ] + } + ], + "source": [ + "# checking metagraph version\n", + "!metagraph --help 2>&1 | head" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "reported-march", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:57.440009Z", + "start_time": "2021-10-19T12:37:26.953032Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:46.465433Z", + "iopub.status.busy": "2021-03-31T07:55:46.465147Z", + "iopub.status.idle": "2021-03-31T07:55:50.278245Z", + "shell.execute_reply": "2021-03-31T07:55:50.277387Z", + "shell.execute_reply.started": "2021-03-31T07:55:46.465399Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2021-10-19 14:37:27,393 - WARNING: Command /usr/bin/time --version for GNU time could not be executed successfully: b'/usr/bin/time: illegal option -- -\\nusage: time [-lp] \\n'. No timing information collected\n", + "Building DAG of jobs...\n", + "2021-10-19 14:37:27,452 - WARNING: Building DAG of jobs...\n", + "Using shell: /usr/local/bin/bash\n", + "2021-10-19 14:37:27,481 - WARNING: Using shell: /usr/local/bin/bash\n", + "Provided cores: 1 (use --cores to define parallelism)\n", + "2021-10-19 14:37:27,482 - WARNING: Provided cores: 1 (use --cores to define parallelism)\n", + "Rules claiming more threads will be scaled down.\n", + "2021-10-19 14:37:27,484 - WARNING: Rules claiming more threads will be scaled down.\n", + "Job counts:\n", + "\tcount\tjobs\n", + "\t1\tall\n", + "\t1\tannotate\n", + "\t1\tannotate_row_diff_brwt\n", + "\t1\tbuild_joint_graph\n", + "\t1\tbuild_joint_primary\n", + "\t1\tgenerate_column_list\n", + "\t1\tprimarize_joint_graph\n", + "\t1\trelax_row_diff_brwt\n", + "\t1\ttransform_rd_stage0\n", + "\t1\ttransform_rd_stage1\n", + "\t1\ttransform_rd_stage2\n", + "\t11\n", + "2021-10-19 14:37:27,486 - WARNING: Job counts:\n", + "\tcount\tjobs\n", + "\t1\tall\n", + "\t1\tannotate\n", + "\t1\tannotate_row_diff_brwt\n", + "\t1\tbuild_joint_graph\n", + "\t1\tbuild_joint_primary\n", + "\t1\tgenerate_column_list\n", + "\t1\tprimarize_joint_graph\n", + "\t1\trelax_row_diff_brwt\n", + "\t1\ttransform_rd_stage0\n", + "\t1\ttransform_rd_stage1\n", + "\t1\ttransform_rd_stage2\n", + "\t11\n", + "\n", + "2021-10-19 14:37:27,489 - INFO: \n", + "[Tue Oct 19 14:37:27 2021]\n", + "2021-10-19 14:37:27,494 - INFO: [Tue Oct 19 14:37:27 2021]\n", + "rule build_joint_graph:\n", + " input: /tmp/paths.txt\n", + " output: /tmp/metagraph_workflows/output_dir/graph_canonical.dbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/build_joint_graph.log\n", + " jobid: 3\n", + " resources: mem_mb=4048, disk_mb=10240\n", + "2021-10-19 14:37:27,498 - INFO: rule build_joint_graph:\n", + " input: /tmp/paths.txt\n", + " output: /tmp/metagraph_workflows/output_dir/graph_canonical.dbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/build_joint_graph.log\n", + " jobid: 3\n", + " resources: mem_mb=4048, disk_mb=10240\n", + "\n", + "2021-10-19 14:37:27,501 - INFO: \n", + "[Tue Oct 19 14:37:27 2021]\n", + "2021-10-19 14:37:27,898 - INFO: [Tue Oct 19 14:37:27 2021]\n", + "Finished job 3.\n", + "2021-10-19 14:37:27,900 - INFO: Finished job 3.\n", + "1 of 11 steps (9%) done\n", + "2021-10-19 14:37:27,903 - INFO: 1 of 11 steps (9%) done\n", + "\n", + "2021-10-19 14:37:27,906 - INFO: \n", + "[Tue Oct 19 14:37:27 2021]\n", + "2021-10-19 14:37:27,908 - INFO: [Tue Oct 19 14:37:27 2021]\n", + "rule primarize_joint_graph:\n", + " input: /tmp/metagraph_workflows/output_dir/graph_canonical.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz\n", + " log: /tmp/metagraph_workflows/output_dir/logs/primarize_joint_graph.log\n", + " jobid: 2\n", + " resources: mem_mb=4048\n", + "2021-10-19 14:37:27,911 - INFO: rule primarize_joint_graph:\n", + " input: /tmp/metagraph_workflows/output_dir/graph_canonical.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz\n", + " log: /tmp/metagraph_workflows/output_dir/logs/primarize_joint_graph.log\n", + " jobid: 2\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:27,914 - INFO: \n", + "Removing temporary output file /tmp/metagraph_workflows/output_dir/graph_canonical.dbg.\n", + "2021-10-19 14:37:28,208 - WARNING: Removing temporary output file /tmp/metagraph_workflows/output_dir/graph_canonical.dbg.\n", + "[Tue Oct 19 14:37:28 2021]\n", + "2021-10-19 14:37:28,211 - INFO: [Tue Oct 19 14:37:28 2021]\n", + "Finished job 2.\n", + "2021-10-19 14:37:28,214 - INFO: Finished job 2.\n", + "2 of 11 steps (18%) done\n", + "2021-10-19 14:37:28,217 - INFO: 2 of 11 steps (18%) done\n", + "\n", + "2021-10-19 14:37:28,221 - INFO: \n", + "[Tue Oct 19 14:37:28 2021]\n", + "2021-10-19 14:37:28,227 - INFO: [Tue Oct 19 14:37:28 2021]\n", + "rule build_joint_primary:\n", + " input: /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/build_joint_primary.log\n", + " jobid: 1\n", + " resources: mem_mb=4048, disk_mb=10240\n", + "2021-10-19 14:37:28,239 - INFO: rule build_joint_primary:\n", + " input: /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/build_joint_primary.log\n", + " jobid: 1\n", + " resources: mem_mb=4048, disk_mb=10240\n", + "\n", + "2021-10-19 14:37:28,246 - INFO: \n", + "Removing temporary output file /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz.\n", + "2021-10-19 14:37:28,561 - WARNING: Removing temporary output file /tmp/metagraph_workflows/output_dir/graph_primary.fasta.gz.\n", + "[Tue Oct 19 14:37:28 2021]\n", + "2021-10-19 14:37:28,565 - INFO: [Tue Oct 19 14:37:28 2021]\n", + "Finished job 1.\n", + "2021-10-19 14:37:28,567 - INFO: Finished job 1.\n", + "3 of 11 steps (27%) done\n", + "2021-10-19 14:37:28,570 - INFO: 3 of 11 steps (27%) done\n", + "\n", + "2021-10-19 14:37:28,575 - INFO: \n", + "[Tue Oct 19 14:37:28 2021]\n", + "2021-10-19 14:37:28,578 - INFO: [Tue Oct 19 14:37:28 2021]\n", + "rule annotate:\n", + " input: /tmp/paths.txt, /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/columns/DONE, /tmp/metagraph_workflows/output_dir/columns/SRR5122826_subreads.fastq.gz.column.annodbg, /tmp/metagraph_workflows/output_dir/columns/SRR5122830_subreads.fastq.gz.column.annodbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/annotate.log\n", + " jobid: 8\n", + " resources: mem_mb=4048\n", + "2021-10-19 14:37:28,592 - INFO: rule annotate:\n", + " input: /tmp/paths.txt, /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/columns/DONE, /tmp/metagraph_workflows/output_dir/columns/SRR5122826_subreads.fastq.gz.column.annodbg, /tmp/metagraph_workflows/output_dir/columns/SRR5122830_subreads.fastq.gz.column.annodbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/annotate.log\n", + " jobid: 8\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:28,595 - INFO: \n", + "Touching output file /tmp/metagraph_workflows/output_dir/columns/DONE.\n", + "2021-10-19 14:37:29,548 - WARNING: Touching output file /tmp/metagraph_workflows/output_dir/columns/DONE.\n", + "[Tue Oct 19 14:37:29 2021]\n", + "2021-10-19 14:37:29,581 - INFO: [Tue Oct 19 14:37:29 2021]\n", + "Finished job 8.\n", + "2021-10-19 14:37:29,585 - INFO: Finished job 8.\n", + "4 of 11 steps (36%) done\n", + "2021-10-19 14:37:29,590 - INFO: 4 of 11 steps (36%) done\n", + "\n", + "2021-10-19 14:37:29,595 - INFO: \n", + "[Tue Oct 19 14:37:29 2021]\n", + "2021-10-19 14:37:29,599 - INFO: [Tue Oct 19 14:37:29 2021]\n", + "localrule generate_column_list:\n", + " input: /tmp/metagraph_workflows/output_dir/columns/SRR5122826_subreads.fastq.gz.column.annodbg, /tmp/metagraph_workflows/output_dir/columns/SRR5122830_subreads.fastq.gz.column.annodbg\n", + " output: /tmp/metagraph_workflows/output_dir/columns.txt\n", + " jobid: 7\n", + "2021-10-19 14:37:29,603 - INFO: localrule generate_column_list:\n", + " input: /tmp/metagraph_workflows/output_dir/columns/SRR5122826_subreads.fastq.gz.column.annodbg, /tmp/metagraph_workflows/output_dir/columns/SRR5122830_subreads.fastq.gz.column.annodbg\n", + " output: /tmp/metagraph_workflows/output_dir/columns.txt\n", + " jobid: 7\n", + "\n", + "2021-10-19 14:37:29,609 - INFO: \n", + "[Tue Oct 19 14:37:30 2021]\n", + "2021-10-19 14:37:30,408 - INFO: [Tue Oct 19 14:37:30 2021]\n", + "Finished job 7.\n", + "2021-10-19 14:37:30,411 - INFO: Finished job 7.\n", + "5 of 11 steps (45%) done\n", + "2021-10-19 14:37:30,415 - INFO: 5 of 11 steps (45%) done\n", + "\n", + "2021-10-19 14:37:30,442 - INFO: \n", + "[Tue Oct 19 14:37:30 2021]\n", + "2021-10-19 14:37:30,445 - INFO: [Tue Oct 19 14:37:30 2021]\n", + "rule transform_rd_stage0:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt\n", + " output: /tmp/metagraph_workflows/output_dir/rd_cols/vector.row_count\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage0.log\n", + " jobid: 10\n", + " resources: mem_mb=2048\n", + "2021-10-19 14:37:30,450 - INFO: rule transform_rd_stage0:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt\n", + " output: /tmp/metagraph_workflows/output_dir/rd_cols/vector.row_count\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage0.log\n", + " jobid: 10\n", + " resources: mem_mb=2048\n", + "\n", + "2021-10-19 14:37:30,457 - INFO: \n", + "[Tue Oct 19 14:37:30 2021]\n", + "2021-10-19 14:37:30,752 - INFO: [Tue Oct 19 14:37:30 2021]\n", + "Finished job 10.\n", + "2021-10-19 14:37:30,754 - INFO: Finished job 10.\n", + "6 of 11 steps (55%) done\n", + "2021-10-19 14:37:30,756 - INFO: 6 of 11 steps (55%) done\n", + "\n", + "2021-10-19 14:37:30,762 - INFO: \n", + "[Tue Oct 19 14:37:30 2021]\n", + "2021-10-19 14:37:30,783 - INFO: [Tue Oct 19 14:37:30 2021]\n", + "rule transform_rd_stage1:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt, /tmp/metagraph_workflows/output_dir/rd_cols/vector.row_count\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg.pred, /tmp/metagraph_workflows/output_dir/graph.dbg.pred_boundary, /tmp/metagraph_workflows/output_dir/graph.dbg.rd_succ, /tmp/metagraph_workflows/output_dir/graph.dbg.succ, /tmp/metagraph_workflows/output_dir/graph.dbg.succ_boundary, /tmp/metagraph_workflows/output_dir/rd_cols/vectors.row_reduction\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage1.log\n", + " jobid: 9\n", + " resources: mem_mb=4048\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2021-10-19 14:37:30,800 - INFO: rule transform_rd_stage1:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt, /tmp/metagraph_workflows/output_dir/rd_cols/vector.row_count\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg.pred, /tmp/metagraph_workflows/output_dir/graph.dbg.pred_boundary, /tmp/metagraph_workflows/output_dir/graph.dbg.rd_succ, /tmp/metagraph_workflows/output_dir/graph.dbg.succ, /tmp/metagraph_workflows/output_dir/graph.dbg.succ_boundary, /tmp/metagraph_workflows/output_dir/rd_cols/vectors.row_reduction\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage1.log\n", + " jobid: 9\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:30,809 - INFO: \n", + "[Tue Oct 19 14:37:38 2021]\n", + "2021-10-19 14:37:38,427 - INFO: [Tue Oct 19 14:37:38 2021]\n", + "Finished job 9.\n", + "2021-10-19 14:37:38,430 - INFO: Finished job 9.\n", + "7 of 11 steps (64%) done\n", + "2021-10-19 14:37:38,431 - INFO: 7 of 11 steps (64%) done\n", + "\n", + "2021-10-19 14:37:38,435 - INFO: \n", + "[Tue Oct 19 14:37:38 2021]\n", + "2021-10-19 14:37:38,441 - INFO: [Tue Oct 19 14:37:38 2021]\n", + "rule transform_rd_stage2:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt, /tmp/metagraph_workflows/output_dir/rd_cols/vectors.row_reduction\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg.anchors, /tmp/metagraph_workflows/output_dir/rd_cols/DONE\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage2.log\n", + " jobid: 6\n", + " resources: mem_mb=4048\n", + "2021-10-19 14:37:38,453 - INFO: rule transform_rd_stage2:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/columns.txt, /tmp/metagraph_workflows/output_dir/rd_cols/vectors.row_reduction\n", + " output: /tmp/metagraph_workflows/output_dir/graph.dbg.anchors, /tmp/metagraph_workflows/output_dir/rd_cols/DONE\n", + " log: /tmp/metagraph_workflows/output_dir/logs/transform_rd_stage2.log\n", + " jobid: 6\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:38,465 - INFO: \n", + "Touching output file /tmp/metagraph_workflows/output_dir/rd_cols/DONE.\n", + "2021-10-19 14:37:50,273 - WARNING: Touching output file /tmp/metagraph_workflows/output_dir/rd_cols/DONE.\n", + "[Tue Oct 19 14:37:50 2021]\n", + "2021-10-19 14:37:50,294 - INFO: [Tue Oct 19 14:37:50 2021]\n", + "Finished job 6.\n", + "2021-10-19 14:37:50,296 - INFO: Finished job 6.\n", + "8 of 11 steps (73%) done\n", + "2021-10-19 14:37:50,300 - INFO: 8 of 11 steps (73%) done\n", + "\n", + "2021-10-19 14:37:50,327 - INFO: \n", + "[Tue Oct 19 14:37:50 2021]\n", + "2021-10-19 14:37:50,331 - INFO: [Tue Oct 19 14:37:50 2021]\n", + "rule annotate_row_diff_brwt:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg.anchors, /tmp/metagraph_workflows/output_dir/rd_cols/DONE, /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg, /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg.linkage\n", + " log: /tmp/metagraph_workflows/output_dir/logs/annotate_row_diff_brwt.log\n", + " jobid: 5\n", + " resources: mem_mb=4048\n", + "2021-10-19 14:37:50,334 - INFO: rule annotate_row_diff_brwt:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg.anchors, /tmp/metagraph_workflows/output_dir/rd_cols/DONE, /tmp/metagraph_workflows/output_dir/graph.dbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg, /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg.linkage\n", + " log: /tmp/metagraph_workflows/output_dir/logs/annotate_row_diff_brwt.log\n", + " jobid: 5\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:50,345 - INFO: \n", + "[Tue Oct 19 14:37:57 2021]\n", + "2021-10-19 14:37:57,042 - INFO: [Tue Oct 19 14:37:57 2021]\n", + "Finished job 5.\n", + "2021-10-19 14:37:57,045 - INFO: Finished job 5.\n", + "9 of 11 steps (82%) done\n", + "2021-10-19 14:37:57,047 - INFO: 9 of 11 steps (82%) done\n", + "\n", + "2021-10-19 14:37:57,049 - INFO: \n", + "[Tue Oct 19 14:37:57 2021]\n", + "2021-10-19 14:37:57,052 - INFO: [Tue Oct 19 14:37:57 2021]\n", + "rule relax_row_diff_brwt:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph.relax.row_diff_brwt.annodbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/relax_row_diff_brwt.log\n", + " jobid: 4\n", + " resources: mem_mb=4048\n", + "2021-10-19 14:37:57,063 - INFO: rule relax_row_diff_brwt:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.row_diff_brwt.annodbg\n", + " output: /tmp/metagraph_workflows/output_dir/graph.relax.row_diff_brwt.annodbg\n", + " log: /tmp/metagraph_workflows/output_dir/logs/relax_row_diff_brwt.log\n", + " jobid: 4\n", + " resources: mem_mb=4048\n", + "\n", + "2021-10-19 14:37:57,067 - INFO: \n", + "[Tue Oct 19 14:37:57 2021]\n", + "2021-10-19 14:37:57,377 - INFO: [Tue Oct 19 14:37:57 2021]\n", + "Finished job 4.\n", + "2021-10-19 14:37:57,379 - INFO: Finished job 4.\n", + "10 of 11 steps (91%) done\n", + "2021-10-19 14:37:57,380 - INFO: 10 of 11 steps (91%) done\n", + "\n", + "2021-10-19 14:37:57,383 - INFO: \n", + "[Tue Oct 19 14:37:57 2021]\n", + "2021-10-19 14:37:57,385 - INFO: [Tue Oct 19 14:37:57 2021]\n", + "localrule all:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/graph.relax.row_diff_brwt.annodbg\n", + " jobid: 0\n", + "2021-10-19 14:37:57,390 - INFO: localrule all:\n", + " input: /tmp/metagraph_workflows/output_dir/graph.dbg, /tmp/metagraph_workflows/output_dir/graph.relax.row_diff_brwt.annodbg\n", + " jobid: 0\n", + "\n", + "2021-10-19 14:37:57,395 - INFO: \n", + "[Tue Oct 19 14:37:57 2021]\n", + "2021-10-19 14:37:57,398 - INFO: [Tue Oct 19 14:37:57 2021]\n", + "Finished job 0.\n", + "2021-10-19 14:37:57,403 - INFO: Finished job 0.\n", + "11 of 11 steps (100%) done\n", + "2021-10-19 14:37:57,408 - INFO: 11 of 11 steps (100%) done\n", + "Complete log: /Users/marc/git/projects2014-metagenome/metagraph/workflows/notebooks/.snakemake/log/2021-10-19T143727.055182.snakemake.log\n", + "2021-10-19 14:37:57,415 - WARNING: Complete log: /Users/marc/git/projects2014-metagenome/metagraph/workflows/notebooks/.snakemake/log/2021-10-19T143727.055182.snakemake.log\n" + ] + } + ], + "source": [ + "cli.run_build_workflow(output_dir, sample_list_path, build_primary_graph=True, \n", + " force=True, k=5, verbose=False, threads=2,\n", + " annotation_labels_source=workflow_configs.AnnotationLabelsSource.SEQUENCE_HEADERS);" + ] + }, + { + "cell_type": "markdown", + "id": "prostate-plaza", + "metadata": { + "execution": { + "iopub.execute_input": "2021-03-26T08:53:52.948433Z", + "iopub.status.busy": "2021-03-26T08:53:52.947770Z", + "iopub.status.idle": "2021-03-26T08:53:52.995178Z", + "shell.execute_reply": "2021-03-26T08:53:52.993944Z", + "shell.execute_reply.started": "2021-03-26T08:53:52.948384Z" + }, + "tags": [] + }, + "source": [ + "## Querying Index" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "photographic-incentive", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:57.643953Z", + "start_time": "2021-10-19T12:37:57.448265Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:53.024150Z", + "iopub.status.busy": "2021-03-31T07:55:53.023769Z", + "iopub.status.idle": "2021-03-31T07:55:53.213976Z", + "shell.execute_reply": "2021-03-31T07:55:53.212880Z", + "shell.execute_reply.started": "2021-03-31T07:55:53.024121Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mcolumns\u001b[m\u001b[m graph.dbg.succ\r\n", + "columns.txt graph.dbg.succ_boundary\r\n", + "graph.dbg graph.relax.row_diff_brwt.annodbg\r\n", + "graph.dbg.anchors graph.row_diff_brwt.annodbg\r\n", + "graph.dbg.pred graph.row_diff_brwt.annodbg.linkage\r\n", + "graph.dbg.pred_boundary \u001b[34mlogs\u001b[m\u001b[m\r\n", + "graph.dbg.rd_succ \u001b[34mrd_cols\u001b[m\u001b[m\r\n" + ] + } + ], + "source": [ + "!ls {output_dir}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "neither-balance", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:57.678758Z", + "start_time": "2021-10-19T12:37:57.646744Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:53.216106Z", + "iopub.status.busy": "2021-03-31T07:55:53.215739Z", + "iopub.status.idle": "2021-03-31T07:55:53.268315Z", + "shell.execute_reply": "2021-03-31T07:55:53.267389Z", + "shell.execute_reply.started": "2021-03-31T07:55:53.216043Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "graph_path = output_dir / 'graph.dbg'\n", + "annotations = output_dir / 'graph.relax.row_diff_brwt.annodbg'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "herbal-portugal", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:57.735200Z", + "start_time": "2021-10-19T12:37:57.681257Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# some \"magic\" to start a metagraph server on the commandline\n", + "import asyncio\n", + "import atexit\n", + "\n", + "s = asyncio.create_subprocess_shell(f\"metagraph server_query -i {graph_path} -a {annotations}\")\n", + "process = asyncio.run(s)\n", + "\n", + "def kill_server():\n", + " process.kill()\n", + "\n", + "atexit.register(kill_server) # shutting down server, when jupyter kernel exits" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "heated-edmonton", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:58.754758Z", + "start_time": "2021-10-19T12:37:57.739594Z" + }, + "execution": { + "iopub.execute_input": "2021-03-31T07:55:53.331447Z", + "iopub.status.busy": "2021-03-31T07:55:53.331031Z", + "iopub.status.idle": "2021-03-31T07:55:53.576023Z", + "shell.execute_reply": "2021-03-31T07:55:53.572471Z", + "shell.execute_reply.started": "2021-03-31T07:55:53.331400Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# https://metagraph.ethz.ch/static/docs/api.html#install-api\n", + "from metagraph import client\n", + "cl = client.GraphClient('localhost', '5555')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "sunset-tours", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:58.792925Z", + "start_time": "2021-10-19T12:37:58.757139Z" + }, + "execution": { + "iopub.status.busy": "2021-03-31T07:55:53.581790Z", + "iopub.status.idle": "2021-03-31T07:55:53.582293Z", + "shell.execute_reply": "2021-03-31T07:55:53.582052Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "example_seq = 'ACCACCCAAGACTGTAATTGTTCCATCTACACAGGACATATCACAGGACACAGAATGGCTTGGGACATGATGATGAATTGGAGCCCCACCGCGACGCTGGTCCTCGCCCAACT'" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "entitled-watershed", + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-19T12:37:58.895090Z", + "start_time": "2021-10-19T12:37:58.795984Z" + }, + "execution": { + "iopub.status.busy": "2021-03-31T07:55:53.586151Z", + "iopub.status.idle": "2021-03-31T07:55:53.587415Z", + "shell.execute_reply": "2021-03-31T07:55:53.586566Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kmer_countsampleseq_description
0109SRR5122826.10
1109SRR5122826.20
2109SRR5122826.30
3109SRR5122826.40
4109SRR5122826.50
............
95109SRR5122826.1080
96109SRR5122826.1090
97109SRR5122826.1110
98109SRR5122826.1120
99109SRR5122826.1130
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " kmer_count sample seq_description\n", + "0 109 SRR5122826.1 0\n", + "1 109 SRR5122826.2 0\n", + "2 109 SRR5122826.3 0\n", + "3 109 SRR5122826.4 0\n", + "4 109 SRR5122826.5 0\n", + ".. ... ... ...\n", + "95 109 SRR5122826.108 0\n", + "96 109 SRR5122826.109 0\n", + "97 109 SRR5122826.111 0\n", + "98 109 SRR5122826.112 0\n", + "99 109 SRR5122826.113 0\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# we should find the sequence back!\n", + "cl.search([example_seq])" + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python (metagraph-workflows)", + "language": "python", + "name": "metagraph-workflows" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/metagraph/workflows/requirements.txt b/metagraph/workflows/requirements.txt new file mode 100644 index 0000000000..e451cad2a6 --- /dev/null +++ b/metagraph/workflows/requirements.txt @@ -0,0 +1 @@ +snakemake>=5 diff --git a/metagraph/workflows/setup.cfg b/metagraph/workflows/setup.cfg new file mode 100644 index 0000000000..254a0ed4f6 --- /dev/null +++ b/metagraph/workflows/setup.cfg @@ -0,0 +1,27 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:metagraph_workflows/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + + diff --git a/metagraph/workflows/setup.py b/metagraph/workflows/setup.py new file mode 100644 index 0000000000..e3c4cc1c3f --- /dev/null +++ b/metagraph/workflows/setup.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.rst') as readme_file: + readme = readme_file.read() + +setup_requirements = ['pytest-runner'] + +with open('requirements.txt') as f: + requirements = list(f.readlines()) + +test_requirements = ['pytest'] + +setup( + author="Marc Zimmermann", + author_email='marc.zimmermann@inf.ethz.ch', + maintainer="Mikhail Karasikov", + maintainer_email='mikhaika@inf.ethz.ch', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + description="Metagraph workflows", + entry_points={ + 'console_scripts': [ + 'metagraph-workflows=metagraph_workflows.cli:main' + ], + }, + install_requires=requirements, + license="MIT license", + long_description=readme, + include_package_data=True, + keywords='metagraph_workflows', + name='metagraph_workflows', + packages=find_packages(include=['metagraph_workflows']), + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/ratschlab/metagraph', + version='0.1.0', + zip_safe=False, +) diff --git a/metagraph/workflows/snakemake b/metagraph/workflows/snakemake new file mode 120000 index 0000000000..d81d64da0c --- /dev/null +++ b/metagraph/workflows/snakemake @@ -0,0 +1 @@ +metagraph_workflows/snakemake/ \ No newline at end of file diff --git a/metagraph/workflows/tests/__init__.py b/metagraph/workflows/tests/__init__.py new file mode 100644 index 0000000000..c52ff14f0e --- /dev/null +++ b/metagraph/workflows/tests/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Test package for metagraph_workflows.""" diff --git a/metagraph/workflows/tests/test_build_cli.py b/metagraph/workflows/tests/test_build_cli.py new file mode 100644 index 0000000000..affc89ef12 --- /dev/null +++ b/metagraph/workflows/tests/test_build_cli.py @@ -0,0 +1,76 @@ +import os +import subprocess +from itertools import product +from pathlib import Path + +import pytest + +import metagraph_workflows +from metagraph_workflows import cli, utils +from metagraph_workflows.workflow_configs import AnnotationLabelsSource, \ + AnnotationFormats + +WORKFLOW_ROOT = Path(metagraph_workflows.__file__).parent / 'snakemake' + + +def run_wrapper(args_list): + code_base = Path(os.path.realpath(__file__)).parent.parent + + process_args = ['python', '-m', 'metagraph_workflows.cli'] + args_list + + proc = subprocess.run([str(a) for a in process_args], + cwd=code_base, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + return proc + + +@pytest.fixture +def output_dir(tmpdir): + return tmpdir / 'output' + + +@pytest.fixture +def sample_list_path(tmpdir): + list_path = tmpdir / 'transcript_paths.txt' + utils.create_transcript_path_list(WORKFLOW_ROOT / 'test_data', list_path) + return list_path + + +@pytest.mark.parametrize('primary,annotation_format,annotation_label_src', list(product([False], [AnnotationFormats.ROW_DIFF_BRWT], [AnnotationLabelsSource.SEQUENCE_HEADERS])) + + list(product([False, True], AnnotationFormats, [AnnotationLabelsSource.SEQUENCE_FILE_NAMES]))) +def test_build_workflow(primary, annotation_format, annotation_label_src, sample_list_path, output_dir): + + base_args = ['build', + '--seqs-file-list-path', sample_list_path, + '-k', 5, + '--annotation-format', annotation_format.value, + '--annotation-labels-source', annotation_label_src.value] + + base_args += ['--build-primary-graph'] if primary else [] + + ret = run_wrapper(base_args + [output_dir]) + + if ret.returncode != 0: + print("Workflow test was not successful:") + print(ret.stdout.decode()) + + assert ret.returncode == 0, ret.stderr + + assert len(output_dir.listdir()) > 1 + + +def test_workflow_invocation_via_python(sample_list_path, output_dir): + assert cli.run_build_workflow(output_dir, seqs_file_list_path=sample_list_path) is None + + +def test_workflow_invocation_additional_args(sample_list_path, output_dir): + base_args = ['build', + '--seqs-file-list-path', sample_list_path, + '-k', 5, + '--additional-snakemake-args="summary=True"'] + + proc = run_wrapper(base_args + [output_dir]) + + assert proc.returncode == 0 + assert 'missing\tupdate' in proc.stdout.decode('UTF-8') + assert not output_dir.exists() # workflow should not run in snakemake 'summary' mode diff --git a/metagraph/workflows/tests/test_resource_management.py b/metagraph/workflows/tests/test_resource_management.py new file mode 100644 index 0000000000..b52192a84c --- /dev/null +++ b/metagraph/workflows/tests/test_resource_management.py @@ -0,0 +1,36 @@ +import pytest +import math + +from metagraph_workflows import resource_management as rm +from metagraph_workflows import workflow_configs + +@pytest.fixture() +def config(): + return { + workflow_configs.MAX_MEMORY_MB: 16000, + workflow_configs.MAX_BUFFER_SIZE_MB: 50000 + } + + +def test_TransformRdStage1Resources(config): + rule_name = 'transform_rd_stage1' + inst = rm.TransformRdStage1Resources(config) + + # by default get max available memory + assert inst.get_mem()(None, None, None) == 16000 + + base_mem = 1024 + + # now explicitly setting available memory for the rule + mem = 8000 + config['rules'] = {rule_name: {'mem_mb': mem}} + assert inst.get_mem()(None, None, None) == mem + + resources = {'mem_mb': mem} + assert inst.get_mem_buffer_gib()(None, None, None, resources) == int(math.ceil(0.8 * mem / 1024)) + + # now additionally setting mem cap explicitly + mem_buffer = 2048 + config['rules'][rule_name]['mem_buffer_mb'] = mem_buffer + assert inst.get_mem()(None, None, None) == mem + assert inst.get_mem_buffer_gib()(None, None, None, resources) == int(math.ceil(mem_buffer / 1024)) diff --git a/metagraph/workflows/tests/test_utils.py b/metagraph/workflows/tests/test_utils.py new file mode 100644 index 0000000000..782809ecfb --- /dev/null +++ b/metagraph/workflows/tests/test_utils.py @@ -0,0 +1,15 @@ +import pytest + +import metagraph_workflows.utils + +@pytest.mark.parametrize("case, expected", + [ + ('/my/path/sample.fasta', 'sample'), + ('/my/path/sample.fasta.gz', 'sample'), + ('/my/path/sample.txt', 'sample'), + ('/my/path/sample', 'sample'), + ('/my/path/sample/', 'sample'), + ] +) +def test_get_sample_name(case, expected): + assert metagraph_workflows.utils.get_sample_name(case) == expected \ No newline at end of file diff --git a/metagraph/workflows/tests/test_workflows.py b/metagraph/workflows/tests/test_workflows.py new file mode 100644 index 0000000000..2357448ae4 --- /dev/null +++ b/metagraph/workflows/tests/test_workflows.py @@ -0,0 +1,12 @@ + +from metagraph_workflows import cli + + +def test_parse_additional_snakemake_args(): + assert cli._parse_additional_snakemake_args('') == {} + assert cli._parse_additional_snakemake_args('dryrun=1') == { + 'dryrun': True} + + assert cli._parse_additional_snakemake_args( + 'some_param="hello world" another=123') == { + 'some_param': 'hello world', 'another': 123}