From d769b0dfbad96068730bde51a4b5e77a5fcd1beb Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Mon, 5 Dec 2016 16:29:44 -0500 Subject: [PATCH 1/2] Added first version of consent codes model from beacon. --- .gitignore | 75 +++++++ .travis.yml | 13 ++ CONTRIBUTING.md | 89 ++++++++ LICENSE | 1 + README.md | 19 ++ pom.xml | 252 +++++++++++++++++++++++ requirements.txt | 6 + src/main/resources/avro/consentcode.avdl | 88 ++++++++ tests/compile_schemas.py | 126 ++++++++++++ tests/test_maven.py | 31 +++ tests/test_protocol.py | 62 ++++++ tests/utils.py | 129 ++++++++++++ 12 files changed, 891 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 CONTRIBUTING.md create mode 100644 README.md create mode 100644 pom.xml create mode 100644 requirements.txt create mode 100644 src/main/resources/avro/consentcode.avdl create mode 100644 tests/compile_schemas.py create mode 100644 tests/test_maven.py create mode 100644 tests/test_protocol.py create mode 100644 tests/utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fded6d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,75 @@ +*.py[cod] +target +*~ +#* +doc/source/schemas/*.avpr +build + +#********** windows template********** + +# Windows image file caches +Thumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + + +#********** osx template********** + +.DS_Store + +# Thumbnails +._* + +# Files that might appear on external disk +.Spotlight-V100 +.Trashes + + +#********** linux template********** + +.* +!.gitignore +*~ + +# KDE +.directory + + +#********** emacs template********** + +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + + +#********** vim template********** + +.*.sw[a-z] +*.un~ +Session.vim + + +#********** maven template********** + +target/ + +#********** Travis CI ********** + +!.travis.yml + +#********** IntelliJ files ****** +*.iml + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..57d6378 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +language: python +sudo: true +python: + - 2.7 +branches: + only: + - master + - develop +install: + - pip install -r requirements.txt +script: + - flake8 tests + - nosetests -v tests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..39ff347 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,89 @@ +# How to contribute + +Thank you for taking the time to contribute. We appreciate it! + +There are two ways to contribute - via issues, which are used for discussion, and pull requests, which are concrete proposals of change. + +## Issues + +The project's [Issues Page](https://github.com/ga4gh/ga4gh-consent-policy/issues) is a forum to discuss both major and minor issues. It also serves as the means for collaborating with the group and discussing contributions that will ultimately lead to changes to the API. See the [Issue](#issue_resolution) section below for specifics on how issues are resolved by the community. + +## Pull Requests + +The way to contribute development effort and code to the project is via GitHub pull requests. GitHub provides a nice [overview on how to create a pull request](https://help.github.com/articles/creating-a-pull-request). + +Some general rules to follow: + +- [Fork](https://help.github.com/articles/fork-a-repo) the main project into your personal GitHub space to work on. +- Create a branch for each update that you're working on. These branches are often called "feature" or "topic" branches. Any changes that you push to your feature branch will automatically be shown in the pull request. +- Keep your pull requests as small as possible. Large pull requests are hard to review. Try to break up your changes into self-contained and incremental pull requests. +- The first line of commit messages should be a short (<80 character) summary, followed by an empty line and then any details that you want to share about the commit. +- Please try to follow the [existing syntax style](#syntax_style). + +## Issue Resolution + +Once a pull request or issue have been submitted, anyone can comment or vote on to express their opinion following the Apache voting system. Quick summary: + +- **+1** something you agree with +- **-1** if you have a strong objection to an issue, which will be taken very seriously. A -1 vote should provide an alternative solution. +- **+0** or **-0** for neutral comments or weak opinions. +- It's okay to have input without voting. +- Silence gives assent. + +A pull request with at least two **+1** votes, no **-1** votes, that has been open for at least 3 days is ready to be merged. We sometimes waive the 3 days for cosmetic-only changes -- use good judgment. If an issue gets any **-1** votes, the comments on the issue need to reach consensus before the issue can be resolved one way or the other. There isn't any strict time limit on a contentious issue. + +The project will strive for full consensus on everything until it runs into a problem with this model. + +## Syntax Style and Conventions + +The current code conventions for the source files are as follows: + +- Use two-space indentation, and no tabs. +- Hard-wrap code to 80 characters per line. +- Use `UpperCamelCase` for object or record names. +- Use `lowerCamelCase` for attribute or method names. +- Use `CONSTANT_CASE` for global and constant values. +- Comments: + - Comments should be indented at the same level as the surrounding code. + - Comments should precede the code that they make a comment on. Documentation comments will not work otherwise. + - Documentation comments, which are intended to be processed by avrodoc and displayed in the user-facing API documentation, must use the `/** ... */` style, and must not have a leading `*` on each internal line: + + /** + This documentation comment will be + processed correctly by avrodoc. + */ + + /** + * This documentation comment will have a + * bullet point at the start of every line + * when processed by avrodoc. + */ + + - Block and multi-line non-documentation comments, intended for schema developers only, must use the `/* ... */` style. + + /* + This multi-line comment will not appear in the + avrodoc documentation and is intended for + schema developers. + */ + + - All multi-line comments should have the comment text at the same indent level as the comment delimeters. + - One-line non-documentation comments, intended for schema developers only, must use the `// ...` style. + +## Gitflow Workflow + +Our workflow is based on [Gitflow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow), which defines a strict branching model designed around the project release. This workflow uses two branches to record the history of the project. The master branch stores the official release history, and the develop branch serves as an integration branch for features. Aside from these two main branches, the workflow utilizes topic and release branches. + +### Topic Branches + +If you wish to collaborate on a new feature with other GA4GH members you can ask that a topic branch be created. Since Github does not allow pull requests against branches that do not yet exist, you will have to create an issue asking for the topic branch to be created. + +Once a topic branch exists, pull requests can be made against it in the usual way. It may also be brought up to date with new changes merged into develop by anyone with commit access, if the changes produce merely a fast-forward merge for each constituent branch. However, if changes from the develop branch create a new merge commit in or or more of the repositories, that commit needs to be reviewed in a pull request. + +Changes made in a topic branch can be merged into develop by creating and then [resolving in the normal way](#issue_resolution) a pull request against the develop branch. + +Topic branches that have been merged into develop and that are no longer being developed upon should be [deleted](https://github.com/blog/1335-tidying-up-after-pull-requests) (they will still appear in the git history). + +### Release Branches + +From time to time the group will make a release. This is achieved by creating a branch named "release-foo", where foo is the release name. Only bug fixes are allowed to release branches. To refer to a specific version of a release branch either the commit id can be used, or alternatively (better), a tag can be created (which should be replicated across repositories). diff --git a/LICENSE b/LICENSE index 8dada3e..8f71f43 100644 --- a/LICENSE +++ b/LICENSE @@ -199,3 +199,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a446dcb --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# Consent Code Schemas [![Build Status](https://travis-ci.org/ga4gh/ga4gh-consent-policy.svg?branch=develop)](https://travis-ci.org/ga4gh/ga4gh-consent-policy) [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/ga4gh/ga4gh-consent-policy/develop/LICENSE) + +Data model for the data use conditions based on consent codes as introduced in [Consent Codes: Upholding Standard Data Use Conditions](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005772). + +## How to contribute + +Guidelines for contributing to this repository are listed in the [CONTRIBUTING.md](CONTRIBUTING.md) document. + +## How to build + +Prerequisites: Maven 3+, Java 1.6+. + +To generate Java code, run `mvn package` and check the output in the `target` directory. + +## How to test + +Prerequisites: Python 2.7 (incl. pip). + +Install dependencies with `pip install -r requirements.txt`. To run the test suite, use `nosetests -v tests`. To check the test code style violations, run `flake8 tests`. diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..6ec050d --- /dev/null +++ b/pom.xml @@ -0,0 +1,252 @@ + + + 4.0.0 + org.ga4gh + consentcode + jar + 0.1.0-SNAPSHOT + + Consent Code + GA4GH consent code data models + https://github.com/ga4gh/ga4gh-consent-policy + 2015 + + Global Alliance for Genomics and Health + http://genomicsandhealth.org + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + scm:git:https://github.com/ga4gh/ga4gh-consent-policy + scm:git:https://github.com/ga4gh/ga4gh-consent-policy + https://github.com/ga4gh/ga4gh-consent-policy + + + + GA4GH developers + https://github.com/orgs/ga4gh/people + Global Alliance Data Working Group + http://ga4gh.org/ + + + + + 1.8.0 + [1.6,) + [3.0.4,) + UTF-8 + UTF-8 + + + + + + + org.apache.avro + avro-maven-plugin + ${avro.version} + + + org.apache.maven.plugins + maven-clean-plugin + 2.6.1 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.3 + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.4 + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + org.apache.maven.plugins + maven-jar-plugin + 2.6 + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + true + true + true + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.1 + + true + false + release + deploy + + + + org.apache.maven.plugins + maven-resources-plugin + 2.7 + + + org.apache.maven.plugins + maven-source-plugin + 2.4 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + + + + + org.apache.avro + avro-maven-plugin + + + schemas + generate-sources + + schema + protocol + idl-protocol + + + ${project.basedir}/src/main/resources/avro + String + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-java + + enforce + + + + + ${maven.enforcer.maven-version} + + + ${maven.enforcer.jdk-version} + + + + + + + + + + + + org.apache.avro + avro + ${avro.version} + + + org.apache.avro + avro-ipc + ${avro.version} + + + + + + org.apache.avro + avro + compile + + + org.apache.avro + avro-ipc + compile + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + release + + + + org.apache.maven.plugins + maven-gpg-plugin + + + sign-artifacts + verify + + sign + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar-no-fork + + + + + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f665fc5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +PyYAML +avro +flake8 +humanize +nose +requests diff --git a/src/main/resources/avro/consentcode.avdl b/src/main/resources/avro/consentcode.avdl new file mode 100644 index 0000000..6e314d2 --- /dev/null +++ b/src/main/resources/avro/consentcode.avdl @@ -0,0 +1,88 @@ +@namespace("org.ga4gh.consentcode") + +/** +Data use conditions based on consent codes as introduced in +http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005772. +*/ +protocol ConsentCodeDataUseConditions { + +/** Data use condition. */ +record ConsentCodeDataUseCondition { + /** + Consent code abbreviation, e.g. `NRES` for no restrictions primary category. + */ + string code; + + /** Description of the condition. */ + union{ null, string } description = null; +} + +/** Data use of a resource based on consent codes. */ +record ConsentCodeDataUse { + /** + Primary data use category. + + Accepted values (- code: name - description): + - NRES: no restrictions - No restrictions on data use. + - GRU(CC): general research use and clinical care - For + health/medical/biomedical purposes and other biological research, + including the study of population origins or ancestry. + - HMB(CC): health/medical/biomedical research and clinical care - Use of the + data is limited to health/medical/biomedical purposes, does not include + the study of population origins or ancestry. + - DS-[XX](CC): disease-specific research and clinical care - Use of the data + must be related to [disease]. + - POA: population origins/ancestry research - Use of the data is limited to + the study of population origins or ancestry. + */ + ConsentCodeDataUseCondition primaryCategory; + + /** + Secondary data use categories. + + Accepted values (- code: name - description): + - RS-[XX]: other research-specific restrictions - Use of the data is limited + to studies of [research type] (e.g., pediatric research). + - RUO: research use only - Use of data is limited to research purposes + (e.g., does not include its use in clinical care). + - NMDS: no “general methods” research - Use of the data includes methods + development research (e.g., development of software or algorithms) ONLY + within the bounds of other data use limitations. + - GSO: genetic studies only - Use of the data is limited to genetic studies + only (i.e., no research using only the phenotype data). + */ + array secondaryCategories = []; + + /** + Data use requirements. + + Accepted values (- code: name - description): + - NPU: not-for-profit use only - Use of the data is limited to + not-for-profit organizations. + - PUB: publication required - Requestor agrees to make results of studies + using the data available to the larger scientific community. + - COL-[XX]: collaboration required - Requestor must agree to collaboration + with the primary study investigator(s). + - RTN: return data to database/resource - Requestor must return + derived/enriched data to the database/resource. + - IRB: ethics approval required - Requestor must provide documentation of + local IRB/REC approval. + - GS-[XX]: geographical restrictions - Use of the data is limited to within + [geographic region]. + - MOR-[XX]: publication moratorium/embargo - Requestor agrees not to publish + results of studies until [date]. + - TS-[XX]: time limits on use - Use of data is approved for [x months]. + - US: user-specific restrictions - Use of data is limited to use by approved + users. + - PS: project-specific restrictions - Use of data is limited to use within + an approved project. + - IS: institution-specific restrictions - Use of data is limited to use + within an approved institution. + */ + array requirements = []; + + /** Version of the data use specification. */ + string version; +} + +} diff --git a/tests/compile_schemas.py b/tests/compile_schemas.py new file mode 100644 index 0000000..35075df --- /dev/null +++ b/tests/compile_schemas.py @@ -0,0 +1,126 @@ +""" +Compiles avro schemas into python representations of those schemas +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import glob +import shutil +import os.path +import tempfile +import re + +import avro.schema + +import utils + + +class SchemaClass(object): + """ + Representation of an avro class + """ + def __init__(self, sourceFile): + self.sourceFile = sourceFile + with open(sourceFile) as sf: + self.schemaSource = sf.read() + self.schema = avro.schema.parse(self.schemaSource) + self.name = self.schema.name + + def getFields(self): + """ + Returns the list of avro fields sorted in order of name. + """ + return sorted(self.schema.fields, key=lambda f: f.name) + + def isSearchRequest(self): + """ + Returns True if the class we are converting is a subclass of + SearchRequest, and False otherwise. + """ + return re.search('Search.+Request', self.name) is not None + + def isSearchResponse(self): + """ + Returns True if the class we are converting is a subclass of + SearchResponse, and False otherwise. + """ + return re.search('Search.+Response', self.name) is not None + + +class SchemaProcessor(object): + """ + Compiles avro schemas into python classes + """ + def __init__(self, args): + self.version = args.version + self.tmpDir = tempfile.mkdtemp(prefix="ga4gh_") + self.avroJarPath = args.avro_tools_jar + # Note! The tarball does not contain the leading v + string = "schemas-{0}".format(self.version[1:]) + self.schemaDir = os.path.join(self.tmpDir, string) + self.avroJar = os.path.join(self.schemaDir, "avro-tools.jar") + self.avroPath = "src/main/resources/avro" + self.avdlDirectory = os.path.join(self.schemaDir, self.avroPath) + + def run(self): + self._getSchemaFromLocal() + self._compileSchemas() + self._initClasses() + self._initPostSignatures() + + def cleanup(self): + shutil.rmtree(self.tmpDir) + + def getClasses(self): + return self.classes + + def getPostSignatures(self): + return self.postSignatures + + def _compileSchemas(self): + url = "http://central.maven.org/maven2/org/apache/avro/avro-tools/"\ + "1.8.0/avro-tools-1.8.0.jar" + fileDownloader = utils.FileDownloader(url, self.avroJar) + fileDownloader.download() + cwd = os.getcwd() + os.chdir(self.avdlDirectory) + for avdlFile in glob.glob("*.avdl"): + self._convertAvro(avdlFile) + os.chdir(cwd) + + def _convertAvro(self, avdlFile): + args = ["java", "-jar", self.avroJar, "idl2schemata", avdlFile] + stdoutLines, stderrLines = utils.runCommandSplitsOutput(args) + printableArgs = "'{}'".format(" ".join(args)) + utils.ensureNoWarnings( + stdoutLines, "stdout of {}".format(printableArgs)) + utils.ensureNoWarnings( + stderrLines, "stderr of {}".format(printableArgs)) + + def _getSchemaFromLocal(self): + if not os.path.exists(self.avdlDirectory): + os.makedirs(self.avdlDirectory) + avdlFiles = glob.iglob(os.path.join(self.avroPath, "*.avdl")) + for avdlFile in avdlFiles: + if os.path.isfile(avdlFile): + shutil.copy2(avdlFile, self.avdlDirectory) + + def _initClasses(self): + self.classes = [] + for avscFile in glob.glob(os.path.join(self.avdlDirectory, "*.avsc")): + self.classes.append(SchemaClass(avscFile)) + self.requestClassNames = [ + cls.name for cls in self.classes if cls.isSearchRequest()] + self.responseClassNames = [ + cls.name for cls in self.classes if cls.isSearchResponse()] + + def _initPostSignatures(self): + self.postSignatures = [] + for request, response in zip( + self.requestClassNames, self.responseClassNames): + objname = re.search('Search(.+)Request', request).groups()[0] + url = '/{0}/search'.format(objname.lower()) + tup = (url, request, response) + self.postSignatures.append(tup) + self.postSignatures.sort() diff --git a/tests/test_maven.py b/tests/test_maven.py new file mode 100644 index 0000000..8b6467b --- /dev/null +++ b/tests/test_maven.py @@ -0,0 +1,31 @@ +""" +Runs the maven tests +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import shlex +import subprocess +import unittest + +import utils + + +class TestMaven(unittest.TestCase): + """ + Uses maven to run tests + """ + def testMaven(self): + # ensure the maven tests don't fail or issue warnings + mvnInstall = \ + "mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V" + self.runCommandCheckWarnings(mvnInstall) + mvnTest = "mvn test -B" + self.runCommandCheckWarnings(mvnTest) + + def runCommandCheckWarnings(self, cmd): + utils.log("Running '{}'".format(cmd)) + splits = shlex.split(cmd) + output = subprocess.check_output(splits).split('\n') + utils.ensureNoWarnings(output, cmd) diff --git a/tests/test_protocol.py b/tests/test_protocol.py new file mode 100644 index 0000000..36d48b1 --- /dev/null +++ b/tests/test_protocol.py @@ -0,0 +1,62 @@ +""" +Runs tests that ensure protocol invariants + +TODO add other tests including: +- some CI on postSignatures +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import unittest + +import avro.schema + +import compile_schemas + + +class TestValidateSchemas(unittest.TestCase): + """ + Ensure the schemas conform to certain rules + """ + @classmethod + def setupClass(cls): + args = cls._makeArgs() + cls.schemaProcessor = compile_schemas.SchemaProcessor(args) + cls.schemaProcessor.run() + + @classmethod + def tearDownClass(cls): + cls.schemaProcessor.cleanup() + + @classmethod + def getClasses(cls): + return cls.schemaProcessor.getClasses() + + @classmethod + def _makeArgs(self): + class FakeArgs(object): + pass + args = FakeArgs() + args.version = "test" + args.avro_tools_jar = None + return args + + def testSchemaProperties(self): + for schemaClass in self.getClasses(): + self._checkProperties(schemaClass) + + def _checkProperties(self, schemaClass): + """ + Checks that the class schema satisfies certain properties: + - every union must have null as the first type + """ + if isinstance(schemaClass.schema, avro.schema.RecordSchema): + for field in schemaClass.getFields(): + if isinstance(field.type, avro.schema.UnionSchema): + t0 = field.type.schemas[0] + if not (isinstance(t0, avro.schema.PrimitiveSchema) and + t0.type == "null"): + msg = "Schema union assumptions violated: {}.{}" + raise Exception(msg.format( + schemaClass.name, field.name)) diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..ff90a4f --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,129 @@ +""" +Utilities for tests +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import os +import shlex +import subprocess +import sys +import tempfile + +import requests +import humanize + + +def log(message): + print(message) + + +def getLinesFromLogFile(stream): + stream.flush() + stream.seek(0) + lines = stream.readlines() + return lines + + +def ensureNoWarnings(lines, streamName): + pattern = '[WARNING]' + matchingLines = [] + for line in lines: + if pattern in line: + matchingLines.append(line[:-1]) + if len(matchingLines) != 0: + raise Exception("warning(s) detected in {}:\n{}".format( + streamName, '\n'.join(matchingLines))) + + +def runCommandSplitsOutput(splits): + """ + Run a shell command and return the stdout and stderr of the command + """ + stdout = tempfile.NamedTemporaryFile() + stderr = tempfile.NamedTemporaryFile() + try: + process = subprocess.Popen(splits, stdout=stdout, stderr=stderr) + process.wait() + stdoutLines = getLinesFromLogFile(stdout) + stderrLines = getLinesFromLogFile(stderr) + finally: + stdout.close() + stderr.close() + return stdoutLines, stderrLines + + +def runCommandSplits(splits, silent=False): + """ + Run a shell command given the command's parsed command line + """ + if silent: + with open(os.devnull, 'w') as devnull: + subprocess.check_call(splits, stdout=devnull, stderr=devnull) + else: + subprocess.check_call(splits) + + +def runCommand(command, silent=False): + """ + Run a shell command + """ + splits = shlex.split(command) + runCommandSplits(splits, silent=silent) + + +class FileDownloader(object): + """ + Provides a wget-like file download and terminal display + """ + defaultChunkSize = 1048576 # 1MB + defaultStream = sys.stdout + + def __init__(self, url, path, chunkSize=defaultChunkSize, + stream=defaultStream): + self.url = url + self.path = path + self.basename = os.path.basename(url) + self.basenameLength = len(self.basename) + self.chunkSize = chunkSize + self.stream = stream + self.bytesWritten = 0 + self.displayIndex = 0 + self.displayWindowSize = 20 + + def download(self): + self.stream.write("Downloading '{}' to '{}'\n".format( + self.url, self.path)) + response = requests.get(self.url, stream=True) + response.raise_for_status() + self.contentLength = int(response.headers['content-length']) + with open(self.path, 'wb') as outputFile: + for chunk in response.iter_content(chunk_size=self.chunkSize): + self.bytesWritten += self.chunkSize + self._updateDisplay() + outputFile.write(chunk) + self.stream.write("\n") + self.stream.flush() + + def _getFileNameDisplayString(self): + if self.basenameLength <= self.displayWindowSize: + return self.basename + else: + return self.basename # TODO scrolling window here + + def _updateDisplay(self): + fileName = self._getFileNameDisplayString() + + # TODO contentLength seems to slightly under-report how many bytes + # we have to download... hence the min functions + percentage = min(self.bytesWritten / self.contentLength, 1) + numerator = humanize.filesize.naturalsize( + min(self.bytesWritten, self.contentLength)) + denominator = humanize.filesize.naturalsize( + self.contentLength) + + displayString = "{} {:<6.2%} ({:>9} / {:<9})\r" + self.stream.write(displayString.format( + fileName, percentage, numerator, denominator)) + self.stream.flush() From a2d84c2396312470595014530256d0400ae78386 Mon Sep 17 00:00:00 2001 From: Miro Cupak Date: Tue, 6 Dec 2016 23:15:14 -0500 Subject: [PATCH 2/2] Moved tests to an external module. --- .gitignore | 3 + .travis.yml | 6 +- README.md | 4 +- requirements.txt | 11 ++-- tests/compile_schemas.py | 126 -------------------------------------- tests/test_maven.py | 31 ---------- tests/test_protocol.py | 62 ------------------- tests/utils.py | 129 --------------------------------------- 8 files changed, 11 insertions(+), 361 deletions(-) delete mode 100644 tests/compile_schemas.py delete mode 100644 tests/test_maven.py delete mode 100644 tests/test_protocol.py delete mode 100644 tests/utils.py diff --git a/.gitignore b/.gitignore index fded6d9..afd2a51 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,6 @@ target/ #********** IntelliJ files ****** *.iml + +#********** Dependency files ******** +src/avro-schema-test-base diff --git a/.travis.yml b/.travis.yml index 57d6378..7b52296 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,9 @@ language: python -sudo: true python: - 2.7 branches: only: - master - develop -install: - - pip install -r requirements.txt script: - - flake8 tests - - nosetests -v tests + - nosetests -v $VIRTUAL_ENV/src/avro-schema-test-base \ No newline at end of file diff --git a/README.md b/README.md index a446dcb..f96bc5f 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,6 @@ To generate Java code, run `mvn package` and check the output in the `target` di ## How to test -Prerequisites: Python 2.7 (incl. pip). +Prerequisites: Python 2.7 (incl. Pip 7+). -Install dependencies with `pip install -r requirements.txt`. To run the test suite, use `nosetests -v tests`. To check the test code style violations, run `flake8 tests`. +Install dependencies with `pip install -r requirements.txt`. To run the test suite, use `nosetests -v`. diff --git a/requirements.txt b/requirements.txt index f665fc5..0e9a309 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -PyYAML -avro -flake8 -humanize -nose -requests +-e git+https://github.com/mcupak/avro-schema-test-base.git@v0.1.0#egg=avro-schema-test-base +avro==1.8.0 +humanize==0.5.1 +nose==1.3.7 +requests==2.12.3 diff --git a/tests/compile_schemas.py b/tests/compile_schemas.py deleted file mode 100644 index 35075df..0000000 --- a/tests/compile_schemas.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -Compiles avro schemas into python representations of those schemas -""" -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import glob -import shutil -import os.path -import tempfile -import re - -import avro.schema - -import utils - - -class SchemaClass(object): - """ - Representation of an avro class - """ - def __init__(self, sourceFile): - self.sourceFile = sourceFile - with open(sourceFile) as sf: - self.schemaSource = sf.read() - self.schema = avro.schema.parse(self.schemaSource) - self.name = self.schema.name - - def getFields(self): - """ - Returns the list of avro fields sorted in order of name. - """ - return sorted(self.schema.fields, key=lambda f: f.name) - - def isSearchRequest(self): - """ - Returns True if the class we are converting is a subclass of - SearchRequest, and False otherwise. - """ - return re.search('Search.+Request', self.name) is not None - - def isSearchResponse(self): - """ - Returns True if the class we are converting is a subclass of - SearchResponse, and False otherwise. - """ - return re.search('Search.+Response', self.name) is not None - - -class SchemaProcessor(object): - """ - Compiles avro schemas into python classes - """ - def __init__(self, args): - self.version = args.version - self.tmpDir = tempfile.mkdtemp(prefix="ga4gh_") - self.avroJarPath = args.avro_tools_jar - # Note! The tarball does not contain the leading v - string = "schemas-{0}".format(self.version[1:]) - self.schemaDir = os.path.join(self.tmpDir, string) - self.avroJar = os.path.join(self.schemaDir, "avro-tools.jar") - self.avroPath = "src/main/resources/avro" - self.avdlDirectory = os.path.join(self.schemaDir, self.avroPath) - - def run(self): - self._getSchemaFromLocal() - self._compileSchemas() - self._initClasses() - self._initPostSignatures() - - def cleanup(self): - shutil.rmtree(self.tmpDir) - - def getClasses(self): - return self.classes - - def getPostSignatures(self): - return self.postSignatures - - def _compileSchemas(self): - url = "http://central.maven.org/maven2/org/apache/avro/avro-tools/"\ - "1.8.0/avro-tools-1.8.0.jar" - fileDownloader = utils.FileDownloader(url, self.avroJar) - fileDownloader.download() - cwd = os.getcwd() - os.chdir(self.avdlDirectory) - for avdlFile in glob.glob("*.avdl"): - self._convertAvro(avdlFile) - os.chdir(cwd) - - def _convertAvro(self, avdlFile): - args = ["java", "-jar", self.avroJar, "idl2schemata", avdlFile] - stdoutLines, stderrLines = utils.runCommandSplitsOutput(args) - printableArgs = "'{}'".format(" ".join(args)) - utils.ensureNoWarnings( - stdoutLines, "stdout of {}".format(printableArgs)) - utils.ensureNoWarnings( - stderrLines, "stderr of {}".format(printableArgs)) - - def _getSchemaFromLocal(self): - if not os.path.exists(self.avdlDirectory): - os.makedirs(self.avdlDirectory) - avdlFiles = glob.iglob(os.path.join(self.avroPath, "*.avdl")) - for avdlFile in avdlFiles: - if os.path.isfile(avdlFile): - shutil.copy2(avdlFile, self.avdlDirectory) - - def _initClasses(self): - self.classes = [] - for avscFile in glob.glob(os.path.join(self.avdlDirectory, "*.avsc")): - self.classes.append(SchemaClass(avscFile)) - self.requestClassNames = [ - cls.name for cls in self.classes if cls.isSearchRequest()] - self.responseClassNames = [ - cls.name for cls in self.classes if cls.isSearchResponse()] - - def _initPostSignatures(self): - self.postSignatures = [] - for request, response in zip( - self.requestClassNames, self.responseClassNames): - objname = re.search('Search(.+)Request', request).groups()[0] - url = '/{0}/search'.format(objname.lower()) - tup = (url, request, response) - self.postSignatures.append(tup) - self.postSignatures.sort() diff --git a/tests/test_maven.py b/tests/test_maven.py deleted file mode 100644 index 8b6467b..0000000 --- a/tests/test_maven.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Runs the maven tests -""" -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import shlex -import subprocess -import unittest - -import utils - - -class TestMaven(unittest.TestCase): - """ - Uses maven to run tests - """ - def testMaven(self): - # ensure the maven tests don't fail or issue warnings - mvnInstall = \ - "mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V" - self.runCommandCheckWarnings(mvnInstall) - mvnTest = "mvn test -B" - self.runCommandCheckWarnings(mvnTest) - - def runCommandCheckWarnings(self, cmd): - utils.log("Running '{}'".format(cmd)) - splits = shlex.split(cmd) - output = subprocess.check_output(splits).split('\n') - utils.ensureNoWarnings(output, cmd) diff --git a/tests/test_protocol.py b/tests/test_protocol.py deleted file mode 100644 index 36d48b1..0000000 --- a/tests/test_protocol.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -Runs tests that ensure protocol invariants - -TODO add other tests including: -- some CI on postSignatures -""" -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import unittest - -import avro.schema - -import compile_schemas - - -class TestValidateSchemas(unittest.TestCase): - """ - Ensure the schemas conform to certain rules - """ - @classmethod - def setupClass(cls): - args = cls._makeArgs() - cls.schemaProcessor = compile_schemas.SchemaProcessor(args) - cls.schemaProcessor.run() - - @classmethod - def tearDownClass(cls): - cls.schemaProcessor.cleanup() - - @classmethod - def getClasses(cls): - return cls.schemaProcessor.getClasses() - - @classmethod - def _makeArgs(self): - class FakeArgs(object): - pass - args = FakeArgs() - args.version = "test" - args.avro_tools_jar = None - return args - - def testSchemaProperties(self): - for schemaClass in self.getClasses(): - self._checkProperties(schemaClass) - - def _checkProperties(self, schemaClass): - """ - Checks that the class schema satisfies certain properties: - - every union must have null as the first type - """ - if isinstance(schemaClass.schema, avro.schema.RecordSchema): - for field in schemaClass.getFields(): - if isinstance(field.type, avro.schema.UnionSchema): - t0 = field.type.schemas[0] - if not (isinstance(t0, avro.schema.PrimitiveSchema) and - t0.type == "null"): - msg = "Schema union assumptions violated: {}.{}" - raise Exception(msg.format( - schemaClass.name, field.name)) diff --git a/tests/utils.py b/tests/utils.py deleted file mode 100644 index ff90a4f..0000000 --- a/tests/utils.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -Utilities for tests -""" -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import shlex -import subprocess -import sys -import tempfile - -import requests -import humanize - - -def log(message): - print(message) - - -def getLinesFromLogFile(stream): - stream.flush() - stream.seek(0) - lines = stream.readlines() - return lines - - -def ensureNoWarnings(lines, streamName): - pattern = '[WARNING]' - matchingLines = [] - for line in lines: - if pattern in line: - matchingLines.append(line[:-1]) - if len(matchingLines) != 0: - raise Exception("warning(s) detected in {}:\n{}".format( - streamName, '\n'.join(matchingLines))) - - -def runCommandSplitsOutput(splits): - """ - Run a shell command and return the stdout and stderr of the command - """ - stdout = tempfile.NamedTemporaryFile() - stderr = tempfile.NamedTemporaryFile() - try: - process = subprocess.Popen(splits, stdout=stdout, stderr=stderr) - process.wait() - stdoutLines = getLinesFromLogFile(stdout) - stderrLines = getLinesFromLogFile(stderr) - finally: - stdout.close() - stderr.close() - return stdoutLines, stderrLines - - -def runCommandSplits(splits, silent=False): - """ - Run a shell command given the command's parsed command line - """ - if silent: - with open(os.devnull, 'w') as devnull: - subprocess.check_call(splits, stdout=devnull, stderr=devnull) - else: - subprocess.check_call(splits) - - -def runCommand(command, silent=False): - """ - Run a shell command - """ - splits = shlex.split(command) - runCommandSplits(splits, silent=silent) - - -class FileDownloader(object): - """ - Provides a wget-like file download and terminal display - """ - defaultChunkSize = 1048576 # 1MB - defaultStream = sys.stdout - - def __init__(self, url, path, chunkSize=defaultChunkSize, - stream=defaultStream): - self.url = url - self.path = path - self.basename = os.path.basename(url) - self.basenameLength = len(self.basename) - self.chunkSize = chunkSize - self.stream = stream - self.bytesWritten = 0 - self.displayIndex = 0 - self.displayWindowSize = 20 - - def download(self): - self.stream.write("Downloading '{}' to '{}'\n".format( - self.url, self.path)) - response = requests.get(self.url, stream=True) - response.raise_for_status() - self.contentLength = int(response.headers['content-length']) - with open(self.path, 'wb') as outputFile: - for chunk in response.iter_content(chunk_size=self.chunkSize): - self.bytesWritten += self.chunkSize - self._updateDisplay() - outputFile.write(chunk) - self.stream.write("\n") - self.stream.flush() - - def _getFileNameDisplayString(self): - if self.basenameLength <= self.displayWindowSize: - return self.basename - else: - return self.basename # TODO scrolling window here - - def _updateDisplay(self): - fileName = self._getFileNameDisplayString() - - # TODO contentLength seems to slightly under-report how many bytes - # we have to download... hence the min functions - percentage = min(self.bytesWritten / self.contentLength, 1) - numerator = humanize.filesize.naturalsize( - min(self.bytesWritten, self.contentLength)) - denominator = humanize.filesize.naturalsize( - self.contentLength) - - displayString = "{} {:<6.2%} ({:>9} / {:<9})\r" - self.stream.write(displayString.format( - fileName, percentage, numerator, denominator)) - self.stream.flush()