Source code for ga4gh.dos.test.compliance

# -*- coding: utf-8 -*-
import functools
import hashlib
import json
import logging
import random
import time
import unittest
try:
    import urllib.parse as urllib  # For Python 3 compat
except ImportError:
    import urllib
import uuid

import ga4gh.dos.schema

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def test_requires(*operations):
    """
    This is a decorator that identifies what DOS operations a given test
    case uses (where each DOS operation is named by its `operationId` in
    the schema, e.g. ListDataBundles, UpdateDataObject, GetServiceInfo,
    etc.) and skips them if the operation is not supported by the
    implementation under test.

    For example, given this test setup::

        class Test(AbstractComplianceTest):
            supports = ['UpdateDataBundles']

            @test_requires('UpdateDataBundles')
            def test_update_data_bundles(self):
                self.dos_request('PUT', '/databundles/1234')

            @test_requires('ListDataBundles', 'UpdateDataBundles')
            def test_list_and_update_data_bundles(self):
                self.dos_request('GET', '/databundles')
                self.dos_request('PUT', '/databundles/1234')

    ``test_update_data_bundles`` would run and ``test_list_and_update_data_bundles``
    would be skipped.

    :param str \*operations: the operations supported by the decorated
                             test case
    """
    def decorator(func):
        @functools.wraps(func)
        def wrapper(self):
            unsupported = [op for op in operations if op not in self.supports]
            if unsupported:
                raise unittest.SkipTest("not supported: " + ", ".join(unsupported))
            return func(self)
        return wrapper
    return decorator


[docs]class AbstractComplianceTest(unittest.TestCase): """ This class implements a number of compliance tests for Data Object Service implementations. It is meant to provide a single, standardized test harness to verify that a given DOS implementation acts in a manner consistent with the schema. Using the test harness is pretty straightforward, and only requires implementing a method that can make requests to the service under test (:meth:`~AbstractComplianceTest._make_request`). As this class subclasses :class:`unittest.TestCase`, all the functions exposed to a subclass of :class:`unittest.TestCase` (e.g. :meth:`~unittest.TestCase.setUpClass`) are available for use. This test suite does not perform any authentication testing. Requests made during testing are made with the assumption that they will be properly authenticated in :meth:`_make_request` or similar. For a service built using Chalice, you would likely be able to write something similar to this:: from ga4gh.dos.test.compliance import AbstractComplianceTest from chalice import LocalGateway, Config from my_chalice_app import chalice_app class TestApp(AbstractComplianceTest): @classmethod def setUpClass(cls): cls.lg = LocalGateway(chalice_app, Config()) @classmethod def _make_request(self, meth, path, headers=None, body=None) headers = headers or {} r = self.lg.handle_request(method=meth, path='/ga4gh/dos/v1' + path, headers=headers, body=body) return r['body'], r['statusCode'] You would then be able to run the compliance test suite however you normally run your tests (e.g. ``nosetests`` or ``python -m unittest discover``). :var supports: a list of supported DOS operations. By default, this is the list of all DOS operations, named by the `operationId` key in the schema:: supports = ['GetServiceInfo', 'GetDataBundleVersions', 'CreateDataBundle', 'ListDataBundles', 'UpdateDataObject', 'GetDataObject', ...] Adding / removing operations from this list will adjust which tests are run. So, doing something like:: class Test(AbstractComplianceTest): self.supports = ['ListDataObjects'] would skip all tests calling UpdateDataBundle, GetDataBundle, and any other endpoint that is not ListDataObjects. """ # Populate :var:`supports` with the `operationId` of each DOS endpoint # specified in the schema. supports = [] for path in ga4gh.dos.schema.present_schema()['paths'].values(): for method in path.values(): supports.append(method['operationId'])
[docs] @classmethod def _make_request(cls, meth, path, headers=None, body=None): """ Method that makes requests to a DOS implementation under test given a method, path, request headers, and a request body. The provided path is the path provided in the Data Object Service schema - this means that in your implementation of this method, you might need to prepend the provided path with your ``basePath``, e.g. ``/ga4gh/dos/v1``. This method should return a tuple of the raw request content as a string and the return code of the request as an int. :param str meth: the HTTP method to use in the request (i.e. GET, PUT, etc.) :param str path: path to make a request to, sans hostname (e.g. `/databundles`) :param dict headers: headers to include with the request :param dict body: data to be included in the request body (serialized as JSON) :rtype: tuple :returns: a tuple of the response body as a JSON-formatted string and the response code as an int """ raise NotImplementedError
@classmethod def dos_request(cls, meth, path, headers=None, body=None, expected_status=200): """ Wrapper function around :meth:`AbstractComplianceTest._make_request`. Logs the request being made, makes the request with :meth:`._make_request`, checks for errors, and performs transparent JSON de/serialization. It is assumed that any request made through this function is a request made to the underlying DOS implementation - e.g., ``self.dos_request('https://example.com/')`` should be expected to fail. :param str meth: the HTTP method to use in the request (i.e. GET, PUT, etc.) :param str path: path to make a request to, sans hostname (e.g. `/databundles`) :param dict headers: headers to include with the request :param dict body: data to be included in the request body (**not** serialized as JSON) :param int expected_status: expected HTTP status code. If the status code is not expected, an error will be raised. :rtype: dict :returns: the response body """ # Log the request being made, make the request itself, then log the response. logger.debug("%s %s", meth, path) # DOS only really speaks JSON, so we can assume that if data is being # sent with a request, that data will be JSON headers = headers or {} if body and 'Content-Type' not in headers: headers['Content-Type'] = 'application/json' request, status = cls._make_request(meth=meth, path=path, headers=headers, body=json.dumps(body)) logger.info("{meth} {path} [{status}]".format(**locals())) # Check to make sure the return code is what we expect msg = "{meth} {path} returned {status}, expected {expected_status}: {request}" # We could use :meth:`assertEqual` here, but if we do, # :meth:`dos_request` must be an instance method. Since the only # advantage we really lose is a prettier error message, we can # be a little verbose this one time. # It's preferable that :meth:`dos_request` be defined as a class method # to allow one-time server setup to be performed in meth:`setUpClass`, # which must necessarily be a class method. if not status == expected_status: raise AssertionError(msg.format(**locals())) # Return the deserialized request body return json.loads(request) @staticmethod def get_query_url(path, **kwargs): """ Returns the given path with the provided kwargs concatenated as query parameters, e.g.:: >>> self.get_query_url('/dataobjects', alias=123) '/dataobjects?alias=123' :param str path: URL path without query parameters :param kwargs: query parameters :rtype: str """ return path + '?' + urllib.urlencode(kwargs) @staticmethod def generate_data_objects(amount): """ Yields a specified number of data objects with random attributes. :param int amount: the amount of data objects to generate """ for _ in range(amount): yield { 'id': str(uuid.uuid1()), 'name': str(uuid.uuid1()), 'size': str(random.randint(2**0, 2**32)), 'created': '2018-08-29T19:58:52.648Z', 'updated': '2018-08-29T19:58:52.648Z', 'version': str(uuid.uuid1()), 'mime_type': 'application/json', 'checksums': [{ 'checksum': hashlib.md5(str(uuid.uuid1()).encode('utf-8')).hexdigest(), 'type': 'md5' }], 'urls': [ {'url': str(uuid.uuid1())}, {'url': str(uuid.uuid1())} ], 'description': str(uuid.uuid1()), 'aliases': [str(uuid.uuid1())], } @staticmethod def generate_data_bundles(amount): """ Yields a specified number of data bundles with random attributes. :param int amount: the amount of data bundles to generate """ for bdl in AbstractComplianceTest.generate_data_objects(amount): del bdl['name'] del bdl['size'] del bdl['mime_type'] del bdl['urls'] bdl.update({'data_object_ids': [str(uuid.uuid1()), str(uuid.uuid1())]}) yield bdl def get_random_data_object(self): """ Retrieves a 'random' data object by performing a ListDataObjects request with a large page size then randomly selecting a data object from the response. As this test utilizes the ListDataObjects operation, be sure to specify that as a test requirement with :func:`test_requires` when using this context manager in a test case. Usage:: obj, url = self.get_random_data_object() :returns: a random data object as a dict and its relative URL (e.g. '/dataobjects/abcdefg-12345') as a string :rtype: tuple """ r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=100)) data_obj = random.choice(r['data_objects']) url = '/dataobjects/' + data_obj['id'] return data_obj, url def get_random_data_bundle(self): """ Retrieves a 'random' data bundle. Similar to :meth:`get_random_data_object` but retrieves a data bundle instead. """ r = self.dos_request('GET', self.get_query_url('/databundles', page_size=100)) data_bdl = random.choice(r['data_bundles']) url = '/databundles/' + data_bdl['id'] return data_bdl, url # # ListDataObject tests @test_requires('ListDataObjects') def test_list_data_objects_simple(self): """ Smoke test to verify that `GET /dataobjects` returns a response. """ r = self.dos_request('GET', '/dataobjects') self.assertTrue(r) @test_requires('ListDataObjects') def test_list_data_objects_by_checksum(self): """ Test that filtering by checksum in ListDataObjects works nicely. Since we can assume that checksums are unique between data objects, we can test this functionality by selecting a random data object then using ListDataObjects with a checksum parameter and asserting that only one result is returned and that the result returned is the same as the one queried. """ obj, _ = self.get_random_data_object() for cs in obj['checksums']: url = self.get_query_url('/dataobjects', checksum=cs['checksum'], checksum_type=cs['type']) r = self.dos_request('GET', url) self.assertEqual(len(r['data_objects']), 1) self.assertEqual(r['data_objects'][0]['id'], obj['id']) @test_requires('ListDataObjects') def test_list_data_objects_by_alias(self): """ Tests that filtering by alias in ListDataObjects works. We do this by selecting a random data object with ListDataObjects then performing another ListDataObjects query but filtering by the alias, then checking that every returned object contains the proper aliases. """ reference_obj, _ = self.get_random_data_object() url = self.get_query_url('/dataobjects', alias=reference_obj['aliases'][0]) queried_objs = self.dos_request('GET', url)['data_objects'] for queried_obj in queried_objs: self.assertIn(reference_obj['aliases'][0], queried_obj['aliases']) @test_requires('ListDataObjects') def test_list_data_objects_with_nonexist_alias(self): """ Test to ensure that looking up a nonexistent alias returns an empty list. """ alias = str(uuid.uuid1()) # An alias that is unlikely to exist body = self.dos_request('GET', self.get_query_url('/dataobjects', alias=alias)) self.assertEqual(len(body['data_objects']), 0) @test_requires('ListDataObjects') def test_list_data_objects_paging(self): """ Demonstrates basic paging features. """ # Test the page_size parameter r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=3)) self.assertEqual(len(r['data_objects']), 3) r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=7)) self.assertEqual(len(r['data_objects']), 7) # Next, given that the adjusting page_size works, we can test that paging # works by making a ListDataObjects request with page_size=2, then making # two requests with page_size=1, and comparing that the results are the same. both = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=2)) self.assertEqual(len(both['data_objects']), 2) first = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=1)) self.assertEqual(len(first['data_objects']), 1) second = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=1, page_token=first['next_page_token'])) self.assertEqual(len(second['data_objects']), 1) self.assertEqual(first['data_objects'][0], both['data_objects'][0]) self.assertEqual(second['data_objects'][0], both['data_objects'][1]) @test_requires('ListDataObjects') def test_list_data_object_querying(self): """ Tests if ListDataObject handles multiple query parameters correctly. """ # ListDataObjects supports querying by checksum, URL, and alias. # To test this, let us take a data object with a unique checksum, # URL, and alias: obj, _ = self.get_random_data_object() def query(expected_results, expected_object=None, **kwargs): """ Makes a ListDataObject query with parameters specifying the checksum, URL, and alias of the ``obj`` data object above. :param int expected_results: the amount of results to expect from the ListDataObjects request :param dict expected_object: if expected_results is 1, then if only one object is returned from the query, assert that the returned object is this object :param kwargs: query parameters for the ListDataObjects request """ args = { 'url': obj['urls'][0]['url'], 'alias': obj['aliases'][0], 'checksum': obj['checksums'][0]['checksum'], 'checksum_type': obj['checksums'][0]['type'] } args.update(kwargs) url = self.get_query_url('/dataobjects', **args) r = self.dos_request('GET', url) self.assertEqual(len(r['data_objects']), expected_results) if expected_object and expected_results == 1: self.assertEqual(expected_object, r['data_objects'][0]) rand = str(uuid.uuid1()) # If the data object we selected has a unique checksum, alias, and URL, # then when we make a ListDataObjects requesting all three of those # parameters, we should receive exactly one data object back - the one # we chose above. query(expected_results=1, expected_object=obj) # That said, if we query for the above checksum and alias but also # query for a URL that is unlikely to exist, then we should receive # no results, as the search criteria should be logically ANDed together. # If `expected_results != 0`, then it is likely that the criteria are # being ORed. query(expected_results=0, url=rand) # And to finish up the test, we repeat the test directly aforementioned # on the other two attributes we expect to be unique. query(expected_results=0, alias=rand) query(expected_results=0, checksum=rand) # # GetDataObject tests @test_requires('ListDataObjects', 'GetDataObject') def test_get_data_object(self): """ Lists Data Objects and then gets one by ID. """ data_obj_1, url = self.get_random_data_object() data_obj_2 = self.dos_request('GET', url)['data_object'] # Test that the data object randomly chosen via `/dataobjects` # can be retrieved via `/dataobjects/{data_object_id}` self.assertEqual(data_obj_1, data_obj_2) @test_requires('ListDataBundles', 'GetDataBundle') def test_get_data_bundle(self): """ Lists data bundles and then gets one by ID. """ data_bdl_1, url = self.get_random_data_bundle() data_bdl_2 = self.dos_request('GET', url)['data_bundle'] # Test that the data object randomly chosen via `/databundles` # can be retrieved via `/databundles/{data_bundle_id}` self.assertEqual(data_bdl_1, data_bdl_2) @test_requires('ListDataBundles') def test_list_data_bundles_with_nonexist_alias(self): """ Test to ensure that searching for data bundles with a nonexistent alias returns an empty list. """ alias = str(uuid.uuid1()) # An alias that is unlikely to exist body = self.dos_request('GET', self.get_query_url('/databundles', alias=alias)) self.assertEqual(len(body['data_bundles']), 0) @test_requires('GetDataBundle') def test_get_nonexistent_data_bundle(self): """ Verifies that requesting a data bundle that doesn't exist results in HTTP 404 """ bdl, url = self.get_random_data_bundle() self.dos_request('GET', '/databundles/NonexistentDataBundle', body={'data_bundle': bdl}, expected_status=404) @test_requires('UpdateDataObject') def test_update_nonexistent_data_object(self): """ Verifies that trying to update a data object that doesn't exist returns HTTP 404 """ obj, url = self.get_random_data_object() self.dos_request('PUT', '/dataobjects/NonexistentObjID', expected_status=404, body={'data_object': obj, 'data_object_id': obj['id']}) @test_requires('GetDataObject', 'ListDataObjects') def test_update_data_object_with_bad_request(self): """ Verifies that attempting to update a data object with a malformed request returns HTTP 400 """ _, url = self.get_random_data_object() self.dos_request('PUT', url, expected_status=400, body={'abc': ''}) @test_requires('ListDataObjects', 'UpdateDataObject', 'GetDataObject') def test_alias_update(self): """ Demonstrates updating a data object with a given alias. """ alias = 'daltest:' + str(uuid.uuid1()) # First, select a "random" object that we can test data_object, url = self.get_random_data_object() # Try and update with no changes. self.dos_request('PUT', url, body={'data_object': data_object}) # We specify the Content-Type since Chalice looks for it when # deserializing the request body server-side # Test adding an alias (acceptably unique to try # retrieving the object by the alias) data_object['aliases'].append(alias) # Try and update, this time with a change. update_response = self.dos_request('PUT', url, body={'data_object': data_object}) self.assertEqual(data_object['id'], update_response['data_object_id']) time.sleep(2) # Test and see if the update took place by retrieving the object # and checking its aliases get_response = self.dos_request('GET', url) self.assertEqual(update_response['data_object_id'], get_response['data_object']['id']) self.assertIn(alias, get_response['data_object']['aliases']) # Testing the update again by using a DOS ListDataObjectsRequest # to locate the object by its new alias. list_request = { 'alias': alias, # We know the alias is unique, so even though page_size > 1 # we expect only one result. 'page_size': 10 } list_url = self.get_query_url('/dataobjects', **list_request) list_response = self.dos_request('GET', list_url) self.assertEqual(1, len(list_response['data_objects'])) self.assertIn(alias, list_response['data_objects'][0]['aliases']) # # Tear down and remove the test alias # params['body']['data_object']['aliases'].remove(alias) # self.dos_request('PUT', url, **params) @test_requires('ListDataObjects', 'UpdateDataObject') def test_full_data_object_update(self): """ Demonstrates updating multiple fields of a data object at once. This incidentally also tests object conversion. """ # First, select a "random" object that we can test data_object, url = self.get_random_data_object() # Make a new data object that is different from the data object we retrieved attributes = { # 'name' and 'description' are optional fields and might not be present 'name': data_object.get('name', '') + 'test-suffix', # See DataBiosphere/dos-azul-lambda#87 # 'description': data_object.get('description', '') + 'Change This', 'urls': [ {'url': 'https://cgl.genomics.ucsc.edu/'}, {'url': 'https://github.com/DataBiosphere'} ] } data_object.update(attributes) # Now update the old data object with the new attributes we added self.dos_request('PUT', url, body={'data_object': data_object}) time.sleep(2) # Give the server some time to catch up # Test and see if the update took place get_response = self.dos_request('GET', url)['data_object'] # We only compare the change attributes as DOS implementations # can update timestamps server-side self.assertEqual(get_response['name'], data_object['name']) self.assertEqual(get_response['urls'], data_object['urls'])