diff --git a/beacon/conf.py b/beacon/conf.py index b8f2e876..16a8771a 100644 --- a/beacon/conf.py +++ b/beacon/conf.py @@ -9,8 +9,8 @@ # # Beacon general info # -beacon_id = 'org.ega-archive.ga4gh-approval-beacon-test' # ID of the Beacon -beacon_name = 'GA4GH Approval Beacon Test' # Name of the Beacon service +beacon_id = 'org.ega-archive.beacon-ri-demo' # ID of the Beacon +beacon_name = 'Beacon Reference Implementation demo' # Name of the Beacon service api_version = 'v2.0.0' # Version of the Beacon implementation uri = 'https://beacon-apis-demo.ega-archive.org/api/' diff --git a/beacon/db/cohorts.py b/beacon/db/cohorts.py index d07d7fa0..37b72d79 100644 --- a/beacon/db/cohorts.py +++ b/beacon/db/cohorts.py @@ -82,7 +82,7 @@ def get_analyses_of_cohort(entry_id: Optional[str], qparams: RequestParams, data count = get_count(client.beacon.cohorts, query) with open("/beacon/beacon/request/cohorts.yml", 'r') as datasets_file: datasets_dict = yaml.safe_load(datasets_file) - cohort_ids=get_cross_query(datasets_dict[entry_id],'biosampleIds','id') + cohort_ids=get_cross_query(datasets_dict[entry_id],'biosampleIds','biosampleId') query = apply_filters(cohort_ids, qparams.query.filters, collection, {}) schema = DefaultSchemas.ANALYSES with open("/beacon/beacon/request/datasets.yml", 'r') as datasets_file: diff --git a/deploy/Makefile b/deploy/Makefile index 40072f42..a7326b38 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -1,29 +1,43 @@ build: - docker-compose up -d --build + docker-compose up -d --build beacon permissions db sleep 10 - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.filtering_terms.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.analyses.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.biosamples.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.cohorts.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.datasets.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.genomicVariations.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.individuals.deleteMany({})"' - docker exec deploy-db-1 /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.runs.deleteMany({})"' - docker cp cancer_dataset/analyses.json deploy-db-1:tmp/analyses.json - docker cp cancer_dataset/biosamples.json deploy-db-1:tmp/biosamples.json - docker cp cancer_dataset/cohorts.json deploy-db-1:tmp/cohorts.json - docker cp cancer_dataset/datasets.json deploy-db-1:tmp/datasets.json - docker cp cancer_dataset/genomicVariations.json deploy-db-1:tmp/genomicVariations.json - docker cp cancer_dataset/individuals.json deploy-db-1:tmp/individuals.json - docker cp cancer_dataset/runs.json deploy-db-1:tmp/runs.json - docker cp cancer_dataset/filtering_terms.json deploy-db-1:tmp/filtering_terms.json - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/datasets.json --collection datasets - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/individuals.json --collection individuals - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/cohorts.json --collection cohorts - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/analyses.json --collection analyses - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/biosamples.json --collection biosamples - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/runs.json --collection runs - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/genomicVariations.json --collection genomicVariations - docker exec deploy-db-1 mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/filtering_terms.json --collection filtering_terms + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.filtering_terms.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.analyses.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.biosamples.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.cohorts.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.datasets.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.genomicVariations.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.individuals.deleteMany({})"' + docker exec rimongo /bin/bash -c 'mongo beacon -u root -p example --authenticationDatabase admin --eval "db.runs.deleteMany({})"' + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/analyses.json rimongo:tmp/analyses.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/biosamples.json rimongo:tmp/biosamples.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/cohorts.json rimongo:tmp/cohorts.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/datasets.json rimongo:tmp/datasets.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/genomicVariations.json rimongo:tmp/genomicVariations.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/individuals.json rimongo:tmp/individuals.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/runs.json rimongo:tmp/runs.json + docker cp /data/vault/bio-scratch/arnau/beacon/beacon2-ri-tools-v2_test_anot/filtering_terms.json rimongo:tmp/filtering_terms.json + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/datasets.json --collection datasets + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/individuals.json --collection individuals + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/cohorts.json --collection cohorts + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/analyses.json --collection analyses + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/biosamples.json --collection biosamples + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/runs.json --collection runs + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/genomicVariations.json --collection genomicVariations + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/filtering_terms.json --collection filtering_terms + docker cp data/analyses.json rimongo:tmp/analyses.json + docker cp data/biosamples.json rimongo:tmp/biosamples.json + docker cp data/cohorts.json rimongo:tmp/cohorts.json + docker cp data/datasets.json rimongo:tmp/datasets.json + docker cp data/genomicVariations.json rimongo:tmp/genomicVariations.json + docker cp data/individuals.json rimongo:tmp/individuals.json + docker cp data/runs.json rimongo:tmp/runs.json + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/datasets.json --collection datasets + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/individuals.json --collection individuals + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/cohorts.json --collection cohorts + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/analyses.json --collection analyses + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/biosamples.json --collection biosamples + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/runs.json --collection runs + docker exec rimongo mongoimport --jsonArray --uri "mongodb://root:example@127.0.0.1:27017/beacon?authSource=admin" --file /tmp/genomicVariations.json --collection genomicVariations docker exec beacon python beacon/reindex.py docker exec beacon python beacon/db/extract_filtering_terms.py \ No newline at end of file diff --git a/deploy/conf.py b/deploy/conf.py index 448fd20f..b89be94e 100644 --- a/deploy/conf.py +++ b/deploy/conf.py @@ -3,8 +3,8 @@ # # Beacon general info # -beacon_id = 'org.ega-archive.ga4gh-approval-beacon-test' # ID of the Beacon -beacon_name = 'GA4GH Approval Beacon Test' # Name of the Beacon service +beacon_id = 'org.ega-archive.beacon-ri-demo' # ID of the Beacon +beacon_name = 'Beacon Reference Implementation demo' # Name of the Beacon service api_version = 'v2.0.0' # Version of the Beacon implementation uri = 'https://beacon-apis-demo.ega-archive.org/api/' diff --git a/deploy/data/cohorts_av.json b/deploy/data/cohorts_av.json deleted file mode 100644 index bab8368a..00000000 --- a/deploy/data/cohorts_av.json +++ /dev/null @@ -1,115 +0,0 @@ -[ - { - "cohortType": "study-defined", - "collectionEvents": [ - { - "eventDiseases": { - "availability": true, - "availabilityCount": 1700, - "distribution": { - "diseases": { - "acutebronchitis": 121, - "agranulocytosis": 111, - "asthma": 134, - "bipolaraffectivedisorder": 134, - "cardiomyopathy": 133, - "dentalcaries": 135, - "eatingdisorders": 134, - "fibrosisandcirrhosisofliver": 132, - "gastro-oesophagealrefluxdisease": 140, - "haemorrhoids": 127, - "influenzaduetocertainidentifiedinfluenzavirus": 135, - "insulin-dependentdiabetesmellitus": 165, - "irondeficiencyanaemia": 142, - "multiplesclerosis": 125, - "obesity": 136, - "sarcoidosis": 136, - "schizophrenia": 138, - "thyroiditis": 141, - "varicoseveinsoflowerextremities": 139 - } - } - }, - "eventEthnicities": { - "availability": true, - "availabilityCount": 54, - "distribution": { - "ethnicities": { - "African": 3, - "AnyotherAsianbackground": 4, - "AnyotherBlackbackground": 4, - "Anyothermixedbackground": 2, - "Anyotherwhitebackground": 2, - "AsianorAsianBritish": 4, - "Bangladeshi": 3, - "BlackorBlackBritish": 2, - "British": 4, - "Caribbean": 2, - "Chinese": 4, - "Indian": 2, - "Irish": 3, - "Mixed": 2, - "Otherethnicgroup": 4, - "Pakistani": 2, - "White": 2, - "WhiteandAsian": 3, - "WhiteandBlackAfrican": 3, - "WhiteandBlackCaribbean": 1 - } - } - }, - "eventGenders": { - "availability": true, - "availabilityCount": 3, - "distribution": { - "genders": { - "female": 2, - "male": 1 - } - } - }, - "eventLocations": { - "availability": true, - "availabilityCount": 1597, - "distribution": { - "locations": { - "England": 2, - "NorthernIreland": 2, - "RepublicofIreland": 3, - "Scotland": 4, - "Wales": 5 - } - } - } - } - ], - "id": "AV_synthetic_cohort", - "inclusionCriteria": { - "ageRange": { - "end": { - "iso8601duration": "P65Y" - }, - "start": { - "iso8601duration": "P18Y" - } - }, - "genders": [ - { - "id": "NCIT:C16576", - "label": "female" - }, - { - "id": "NCIT:C20197", - "label": "male" - } - ], - "locations": [ - { - "id": "GAZ:00150372", - "label": "UK" - } - ] - }, - "name": "AV synthetic cohort" - } -] \ No newline at end of file diff --git a/deploy/data/datasets_av.json b/deploy/data/datasets_av.json deleted file mode 100644 index 54aa9ae7..00000000 --- a/deploy/data/datasets_av.json +++ /dev/null @@ -1,79 +0,0 @@ -[ -{ - "createDateTime" : "2021-12-29T20:33:40Z", - "dataUseConditions" : { - "duoDataUse" : [ - { - "id" : "DUO:0000019", - "label" : "publication required", - "version" : "2019-01-07" - }, - { - "id" : "DUO:0000042", - "label" : "general research use", - "version" : "2019-01-07" - }, - { - "id" : "DUO:0000026", - "label" : "user specific restriction", - "version" : "2019-01-07" - }, - { - "id" : "DUO:0000028", - "label" : "institution specific restriction", - "version" : "2019-01-07" - } - ] - }, - "description" : "Please note: This synthetic data set (with cohort participants / subjects marked with FAKE) has no identifiable data and cannot be used to make any inference about cohort data or results. The purpose of this dataset is to aid development of technical implementations for cohort data discovery, harmonization, access, and federated analysis. In support of FAIRness in data sharing, this dataset is made freely available under the Creative Commons Licence (CC-BY). Please ensure this preamble is included with this dataset and that the CINECA project (funding: EC H2020 grant 825775) is acknowledged. For any questions please contact isuru@ebi.ac.uk or cthomas@ebi.ac.uk This dataset (CINECA_synthetic_cohort_EUROPE_UK1) consists of 2521 samples which have genetic data based on 1000 Genomes data (https://www.nature.com/articles/nature15393), and synthetic subject attributes and phenotypic data derived from UKBiobank (https://journals.plos.org/plosmedicine/article?id=10.1371/journal.pmed.1001779). These data were initially derived using the TOFU tool (https://github.com/spiros/tofu), which generates randomly generated values based on the UKBiobank data dictionary. Categorical values were randomly generated based on the data dictionary, continuous variables generated based on the distribution of values reported by the UK Biobank showcase, and date / time values were random. Additionally we split the phenotypes and attributes into 4 main classes - general, cancer, diabetes mellitus, and cardiac. We assigned the general attributes to all the samples, and the cardiac / diabetes mellitus / cancer attributes to a proportion of the total samples. Once the initial set of phenotypes and attributes were generated, the data data was checked for consistency and where possible dependent attributes were calculated from the independent variables generated by TOFU. For example, BMI was calculated from height and weight data, and age at death generated by date of death and date of birth. These data were then loaded to the development instance of Biosamples (https://www.ebi.ac.uk/biosamples/) which accessioned each of the samples. The genetic data are derived from the 1000 Genomes Phase 3 release (https://www.internationalgenome.org/category/phase-3/). The genotype data consists of a single joint call vcf files with call genotypes for all 2504 samples, plus bed, bim, fam, and nosex files generated via plink for these samples and genotypes. The genotype data has had a variety of errors introduced to mimic real data and as a test for quality control pipelines. These include gender mismatches, ethnic background mislabelling and low call rates for a randomly chosen subset of sample data as well as deviations from Hardy Weinberg equilibrium and low call rates for a random selection of variants. Additionally 40 samples have raw genetic data available in the form of both bam and cram files, including unmapped data. The gender of the samples in the 1000 genomes data has been matched to the synthetic phenotypic data generated for these samples. The genetic data was then linked to the synthetic data in BioSamples, and submitted to EGA.", - "externalUrl" : "https://ega-archive.org/datasets/EGAD00001006673/", - "id" : "AV_Dataset", - "info" : { - "beacon" : { - "contact" : "manuel.rueda@crg.eu", - "mapping" : "Manuel Rueda", - "version" : "v2.0" - }, - "dataset" : { - "derived" : [ - { - "EGA" : { - "contact" : "helpdesk@ega-archive.org", - "externalUrl" : "https://ega-archive.org/datasets/EGAD00001006673", - "license" : { - "$ref" : "#/dataUseConditions/duoDataUse" - } - } - }, - { - "BioSamples" : { - "contact" : "biosamples@ebi.ac.uk", - "externalUrl" : "https://www.ebi.ac.uk/biosamples", - "license" : "Creative Commons Licence (CC-BY)" - } - } - ], - "origin" : [ - { - "CINECAprojectEU" : { - "contact" : "cthomas@ebi.ac.uk", - "externalUrl" : "https://www.cineca-project.eu/cineca-synthetic-dataset", - "license" : "Creative Commons Licence (CC-BY)", - "managers" : " Coline Thomas, Isuru Liyanage and Dylan Spalding" - } - }, - { - "1000Genomes" : { - "externalUrl" : "https://www.internationalgenome.org/category/phase-3", - "license" : "CC BY-NC-SA 3.0", - "version" : "v5a phase 3 VCF" - } - } - ] - } - }, - "name" : "AV_Dataset", - "updateDateTime" : "2021-12-29T20:33:40Z", - "version" : "v1.0" -} -] diff --git a/deploy/data/genomicVariations2.json b/deploy/data/genomicVariations2.json deleted file mode 100644 index e5ff127b..00000000 --- a/deploy/data/genomicVariations2.json +++ /dev/null @@ -1,28 +0,0 @@ -[{ - "caseLevelData": [{"biosampleId": "HG03771", "zygosity": {"id": "GENO:GENO_0000458", "label": "0/1"}}], - "identifiers": { - "genomicHGVSId": "NC_000022.22:g.76T>TC" - }, - "variantInternalId": "15128712-ca70-11ee-bc69-0242ac170002:T:TC", - "variation": { - "location": { - "type": "SequenceLocation", - "sequence_id": "HGVSid:22:g.76T>TC", - "interval": { - "type": "SequenceInterval", - "start": { - "type": "Number", - "value": 76 - }, - "end": { - "type": "Number", - "value": 98 - } - } - }, - "alternateBases": "TC", - "referenceBases": "T", - "variantType": "SNP" - } - } -] diff --git a/deploy/data/genomicVariations_av.json b/deploy/data/genomicVariations_av.json deleted file mode 100644 index 011e63a8..00000000 --- a/deploy/data/genomicVariations_av.json +++ /dev/null @@ -1,51 +0,0 @@ -[ - { - "caseLevelData": [ - { - "biosampleId": "AV0101", - "zygosity": { - "id": "GENO:GENO_0000458", - "label": "0/1" - } - } - ], - "identifiers": { - "genomicHGVSId": "NC_000022.11:g.16050075A>G" - }, - "molecularAttributes": { - "aminoacidChanges": [ - "." - ], - "geneIds": [ - "CHR_START-DUXAP8" - ], - "molecularEffects": [ - { - "id": "ENSGLOSSARY:0000174", - "label": "intergenic_region" - } - ] - }, - "variantInternalId": "da5a95e4-bc26-11ee-b6b0-0242ac170002:A:G", - "variation": { - "location": { - "type": "SequenceLocation", - "sequence_id": "HGVSid:22:g.16050075A>G", - "interval": { - "type": "SequenceInterval", - "start": { - "type": "Number", - "value": 16050074 - }, - "end": { - "type": "Number", - "value": 16050075 - } - } - }, - "alternateBases": "A", - "referenceBases": "G", - "variantType": "SNP" - } - } -] \ No newline at end of file diff --git a/deploy/data/individuals_av.json b/deploy/data/individuals_av.json deleted file mode 100644 index 263c434e..00000000 --- a/deploy/data/individuals_av.json +++ /dev/null @@ -1,65 +0,0 @@ -[ - { - "ethnicity": { - "id": "NCIT:C42331", - "label": "African" - }, - "id": "AV0101", - "interventionsOrProcedures": [ - { - "procedureCode": { - "id": "OPCS4:L46.3", - "label": "OPCS(v4-0.0):Ligation of visceral branch of abdominal aorta NEC" - } - } - ], - "measures": [ - { - "assayCode": { - "id": "LOINC:35925-4", - "label": "BMI" - }, - "date": "2021-09-24", - "measurementValue": { - "unit": { - "id": "NCIT:C49671", - "label": "Kilogram per Square Meter" - }, - "value": 26.63838307 - } - }, - { - "assayCode": { - "id": "LOINC:3141-9", - "label": "Weight" - }, - "date": "2021-09-24", - "measurementValue": { - "unit": { - "id": "NCIT:C28252", - "label": "Kilogram" - }, - "value": 85.6358 - } - }, - { - "assayCode": { - "id": "LOINC:8308-9", - "label": "Height-standing" - }, - "date": "2021-09-24", - "measurementValue": { - "unit": { - "id": "NCIT:C49668", - "label": "Centimeter" - }, - "value": 179.2973 - } - } - ], - "sex": { - "id": "NCIT:C20197", - "label": "male" - } - } -] \ No newline at end of file