Source code for ga4gh.dos.test.compliance

# -*- coding: utf-8 -*-
import functools
import hashlib
import json
import logging
import random
import time
import unittest
try:
    import urllib.parse as urllib  # For Python 3 compat
except ImportError:
    import urllib
import uuid

import ga4gh.dos.schema

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def test_requires(*operations):
    """
    This is a decorator that identifies what DOS operations a given test
    case uses (where each DOS operation is named by its `operationId` in
    the schema, e.g. ListDataBundles, UpdateDataObject, GetServiceInfo,
    etc.) and skips them if the operation is not supported by the
    implementation under test.

    For example, given this test setup::

        class Test(AbstractComplianceTest):
            supports = ['UpdateDataBundles']

            @test_requires('UpdateDataBundles')
            def test_update_data_bundles(self):
                self.dos_request('PUT', '/databundles/1234')

            @test_requires('ListDataBundles', 'UpdateDataBundles')
            def test_list_and_update_data_bundles(self):
                self.dos_request('GET', '/databundles')
                self.dos_request('PUT', '/databundles/1234')

    ``test_update_data_bundles`` would run and ``test_list_and_update_data_bundles``
    would be skipped.

    :param str \*operations: the operations supported by the decorated
                             test case
    """
    def decorator(func):
        @functools.wraps(func)
        def wrapper(self):
            unsupported = [op for op in operations if op not in self.supports]
            if unsupported:
                raise unittest.SkipTest("not supported: " + ", ".join(unsupported))
            return func(self)
        return wrapper
    return decorator


[docs]class AbstractComplianceTest(unittest.TestCase):
    """
    This class implements a number of compliance tests for Data Object Service
    implementations. It is meant to provide a single, standardized test
    harness to verify that a given DOS implementation acts in a manner
    consistent with the schema.

    Using the test harness is pretty straightforward, and only requires
    implementing a method that can make requests to the service under test
    (:meth:`~AbstractComplianceTest._make_request`). As this class subclasses
    :class:`unittest.TestCase`, all the functions exposed to a subclass
    of :class:`unittest.TestCase` (e.g. :meth:`~unittest.TestCase.setUpClass`)
    are available for use.

    This test suite does not perform any authentication testing. Requests made
    during testing are made with the assumption that they will be properly
    authenticated in :meth:`_make_request` or similar.

    For a service built using Chalice, you would likely be able to write
    something similar to this::

        from ga4gh.dos.test.compliance import AbstractComplianceTest
        from chalice import LocalGateway, Config
        from my_chalice_app import chalice_app

        class TestApp(AbstractComplianceTest):
            @classmethod
            def setUpClass(cls):
                cls.lg = LocalGateway(chalice_app, Config())

            @classmethod
            def _make_request(self, meth, path, headers=None, body=None)
                headers = headers or {}
                r = self.lg.handle_request(method=meth, path='/ga4gh/dos/v1' + path,
                                           headers=headers, body=body)
                return r['body'], r['statusCode']

    You would then be able to run the compliance test suite however you
    normally run your tests (e.g. ``nosetests`` or ``python -m unittest discover``).

    :var supports: a list of supported DOS operations. By default, this is
                   the list of all DOS operations, named by the `operationId`
                   key in the schema::

                      supports = ['GetServiceInfo', 'GetDataBundleVersions',
                                  'CreateDataBundle', 'ListDataBundles',
                                  'UpdateDataObject', 'GetDataObject', ...]

                   Adding / removing operations from this list will adjust
                   which tests are run. So, doing something like::

                      class Test(AbstractComplianceTest):
                          self.supports = ['ListDataObjects']

                   would skip all tests calling UpdateDataBundle, GetDataBundle,
                   and any other endpoint that is not ListDataObjects.
    """
    # Populate :var:`supports` with the `operationId` of each DOS endpoint
    # specified in the schema.
    supports = []
    for path in ga4gh.dos.schema.present_schema()['paths'].values():
        for method in path.values():
            supports.append(method['operationId'])

[docs]    @classmethod
    def _make_request(cls, meth, path, headers=None, body=None):
        """
        Method that makes requests to a DOS implementation under test
        given a method, path, request headers, and a request body.

        The provided path is the path provided in the Data Object Service
        schema - this means that in your implementation of this method,
        you might need to prepend the provided path with your ``basePath``,
        e.g. ``/ga4gh/dos/v1``.

        This method should return a tuple of the raw request content as a
        string and the return code of the request as an int.

        :param str meth: the HTTP method to use in the request (i.e. GET,
                         PUT, etc.)
        :param str path: path to make a request to, sans hostname (e.g.
                         `/databundles`)
        :param dict headers: headers to include with the request
        :param dict body: data to be included in the request body (serialized
                          as JSON)
        :rtype: tuple
        :returns: a tuple of the response body as a JSON-formatted string and the
                  response code as an int
        """
        raise NotImplementedError

    @classmethod
    def dos_request(cls, meth, path, headers=None, body=None, expected_status=200):
        """
        Wrapper function around :meth:`AbstractComplianceTest._make_request`.
        Logs the request being made, makes the request with
        :meth:`._make_request`, checks for errors, and performs transparent
        JSON de/serialization.

        It is assumed that any request made through this function is a
        request made to the underlying DOS implementation - e.g.,
        ``self.dos_request('https://example.com/')`` should be expected
        to fail.

        :param str meth: the HTTP method to use in the request (i.e. GET,
                         PUT, etc.)
        :param str path: path to make a request to, sans hostname (e.g.
                         `/databundles`)
        :param dict headers: headers to include with the request
        :param dict body: data to be included in the request body
                          (**not** serialized as JSON)
        :param int expected_status: expected HTTP status code. If the status
                                    code is not expected, an error will be
                                    raised.
        :rtype: dict
        :returns: the response body
        """
        # Log the request being made, make the request itself, then log the response.
        logger.debug("%s %s", meth, path)
        # DOS only really speaks JSON, so we can assume that if data is being
        # sent with a request, that data will be JSON
        headers = headers or {}
        if body and 'Content-Type' not in headers:
            headers['Content-Type'] = 'application/json'
        request, status = cls._make_request(meth=meth, path=path, headers=headers,
                                            body=json.dumps(body))
        logger.info("{meth} {path} [{status}]".format(**locals()))

        # Check to make sure the return code is what we expect
        msg = "{meth} {path} returned {status}, expected {expected_status}: {request}"
        # We could use :meth:`assertEqual` here, but if we do,
        # :meth:`dos_request` must be an instance method. Since the only
        # advantage we really lose is a prettier error message, we can
        # be a little verbose this one time.
        # It's preferable that :meth:`dos_request` be defined as a class method
        # to allow one-time server setup to be performed in meth:`setUpClass`,
        # which must necessarily be a class method.
        if not status == expected_status:
            raise AssertionError(msg.format(**locals()))

        # Return the deserialized request body
        return json.loads(request)

    @staticmethod
    def get_query_url(path, **kwargs):
        """
        Returns the given path with the provided kwargs concatenated as
        query parameters, e.g.::

            >>> self.get_query_url('/dataobjects', alias=123)
            '/dataobjects?alias=123'

        :param str path: URL path without query parameters
        :param kwargs: query parameters
        :rtype: str
        """
        return path + '?' + urllib.urlencode(kwargs)

    @staticmethod
    def generate_data_objects(amount):
        """
        Yields a specified number of data objects with random attributes.

        :param int amount: the amount of data objects to generate
        """
        for _ in range(amount):
            yield {
                'id': str(uuid.uuid1()),
                'name': str(uuid.uuid1()),
                'size': str(random.randint(2**0, 2**32)),
                'created': '2018-08-29T19:58:52.648Z',
                'updated': '2018-08-29T19:58:52.648Z',
                'version': str(uuid.uuid1()),
                'mime_type': 'application/json',
                'checksums': [{
                    'checksum': hashlib.md5(str(uuid.uuid1()).encode('utf-8')).hexdigest(),
                    'type': 'md5'
                }],
                'urls': [
                    {'url': str(uuid.uuid1())},
                    {'url': str(uuid.uuid1())}
                ],
                'description': str(uuid.uuid1()),
                'aliases': [str(uuid.uuid1())],
            }

    @staticmethod
    def generate_data_bundles(amount):
        """
        Yields a specified number of data bundles with random attributes.

        :param int amount: the amount of data bundles to generate
        """
        for bdl in AbstractComplianceTest.generate_data_objects(amount):
            del bdl['name']
            del bdl['size']
            del bdl['mime_type']
            del bdl['urls']
            bdl.update({'data_object_ids': [str(uuid.uuid1()), str(uuid.uuid1())]})
            yield bdl

    def get_random_data_object(self):
        """
        Retrieves a 'random' data object by performing a ListDataObjects
        request with a large page size then randomly selecting a data
        object from the response.

        As this test utilizes the ListDataObjects operation, be sure to
        specify that as a test requirement with :func:`test_requires`
        when using this context manager in a test case.

        Usage::

            obj, url = self.get_random_data_object()

        :returns: a random data object as a dict and its relative URL
                  (e.g. '/dataobjects/abcdefg-12345') as a string
        :rtype: tuple
        """
        r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=100))
        data_obj = random.choice(r['data_objects'])
        url = '/dataobjects/' + data_obj['id']
        return data_obj, url

    def get_random_data_bundle(self):
        """
        Retrieves a 'random' data bundle. Similar to :meth:`get_random_data_object`
        but retrieves a data bundle instead.
        """
        r = self.dos_request('GET', self.get_query_url('/databundles', page_size=100))
        data_bdl = random.choice(r['data_bundles'])
        url = '/databundles/' + data_bdl['id']
        return data_bdl, url

    # # ListDataObject tests
    @test_requires('ListDataObjects')
    def test_list_data_objects_simple(self):
        """
        Smoke test to verify that `GET /dataobjects` returns a response.
        """
        r = self.dos_request('GET', '/dataobjects')
        self.assertTrue(r)

    @test_requires('ListDataObjects')
    def test_list_data_objects_by_checksum(self):
        """
        Test that filtering by checksum in ListDataObjects works nicely.
        Since we can assume that checksums are unique between data
        objects, we can test this functionality by selecting a random
        data object then using ListDataObjects with a checksum parameter
        and asserting that only one result is returned and that the
        result returned is the same as the one queried.
        """
        obj, _ = self.get_random_data_object()
        for cs in obj['checksums']:
            url = self.get_query_url('/dataobjects', checksum=cs['checksum'], checksum_type=cs['type'])
            r = self.dos_request('GET', url)
            self.assertEqual(len(r['data_objects']), 1)
            self.assertEqual(r['data_objects'][0]['id'], obj['id'])

    @test_requires('ListDataObjects')
    def test_list_data_objects_by_alias(self):
        """
        Tests that filtering by alias in ListDataObjects works. We do
        this by selecting a random data object with ListDataObjects
        then performing another ListDataObjects query but filtering
        by the alias, then checking that every returned object contains
        the proper aliases.
        """
        reference_obj, _ = self.get_random_data_object()
        url = self.get_query_url('/dataobjects', alias=reference_obj['aliases'][0])
        queried_objs = self.dos_request('GET', url)['data_objects']
        for queried_obj in queried_objs:
            self.assertIn(reference_obj['aliases'][0], queried_obj['aliases'])

    @test_requires('ListDataObjects')
    def test_list_data_objects_with_nonexist_alias(self):
        """
        Test to ensure that looking up a nonexistent alias returns an
        empty list.
        """
        alias = str(uuid.uuid1())  # An alias that is unlikely to exist
        body = self.dos_request('GET', self.get_query_url('/dataobjects', alias=alias))
        self.assertEqual(len(body['data_objects']), 0)

    @test_requires('ListDataObjects')
    def test_list_data_objects_paging(self):
        """
        Demonstrates basic paging features.
        """
        # Test the page_size parameter
        r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=3))
        self.assertEqual(len(r['data_objects']), 3)
        r = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=7))
        self.assertEqual(len(r['data_objects']), 7)

        # Next, given that the adjusting page_size works, we can test that paging
        # works by making a ListDataObjects request with page_size=2, then making
        # two requests with page_size=1, and comparing that the results are the same.
        both = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=2))
        self.assertEqual(len(both['data_objects']), 2)
        first = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=1))
        self.assertEqual(len(first['data_objects']), 1)
        second = self.dos_request('GET', self.get_query_url('/dataobjects', page_size=1,
                                                            page_token=first['next_page_token']))
        self.assertEqual(len(second['data_objects']), 1)
        self.assertEqual(first['data_objects'][0], both['data_objects'][0])
        self.assertEqual(second['data_objects'][0], both['data_objects'][1])

    @test_requires('ListDataObjects')
    def test_list_data_object_querying(self):
        """
        Tests if ListDataObject handles multiple query parameters correctly.
        """
        # ListDataObjects supports querying by checksum, URL, and alias.
        # To test this, let us take a data object with a unique checksum,
        # URL, and alias:
        obj, _ = self.get_random_data_object()

        def query(expected_results, expected_object=None, **kwargs):
            """
            Makes a ListDataObject query with parameters specifying
            the checksum, URL, and alias of the ``obj`` data object above.

            :param int expected_results: the amount of results to expect
                                         from the ListDataObjects request
            :param dict expected_object: if expected_results is 1, then
                                         if only one object is returned
                                         from the query, assert that the
                                         returned object is this object
            :param kwargs: query parameters for the ListDataObjects request
            """
            args = {
                'url': obj['urls'][0]['url'],
                'alias': obj['aliases'][0],
                'checksum': obj['checksums'][0]['checksum'],
                'checksum_type': obj['checksums'][0]['type']
            }
            args.update(kwargs)
            url = self.get_query_url('/dataobjects', **args)
            r = self.dos_request('GET', url)
            self.assertEqual(len(r['data_objects']), expected_results)
            if expected_object and expected_results == 1:
                self.assertEqual(expected_object, r['data_objects'][0])

        rand = str(uuid.uuid1())

        # If the data object we selected has a unique checksum, alias, and URL,
        # then when we make a ListDataObjects requesting all three of those
        # parameters, we should receive exactly one data object back - the one
        # we chose above.
        query(expected_results=1, expected_object=obj)

        # That said, if we query for the above checksum and alias but also
        # query for a URL that is unlikely to exist, then we should receive
        # no results, as the search criteria should be logically ANDed together.
        # If `expected_results != 0`, then it is likely that the criteria are
        # being ORed.
        query(expected_results=0, url=rand)

        # And to finish up the test, we repeat the test directly aforementioned
        # on the other two attributes we expect to be unique.
        query(expected_results=0, alias=rand)
        query(expected_results=0, checksum=rand)

    # # GetDataObject tests
    @test_requires('ListDataObjects', 'GetDataObject')
    def test_get_data_object(self):
        """
        Lists Data Objects and then gets one by ID.
        """
        data_obj_1, url = self.get_random_data_object()
        data_obj_2 = self.dos_request('GET', url)['data_object']
        # Test that the data object randomly chosen via `/dataobjects`
        # can be retrieved via `/dataobjects/{data_object_id}`
        self.assertEqual(data_obj_1, data_obj_2)

    @test_requires('ListDataBundles', 'GetDataBundle')
    def test_get_data_bundle(self):
        """
        Lists data bundles and then gets one by ID.
        """
        data_bdl_1, url = self.get_random_data_bundle()
        data_bdl_2 = self.dos_request('GET', url)['data_bundle']
        # Test that the data object randomly chosen via `/databundles`
        # can be retrieved via `/databundles/{data_bundle_id}`
        self.assertEqual(data_bdl_1, data_bdl_2)

    @test_requires('ListDataBundles')
    def test_list_data_bundles_with_nonexist_alias(self):
        """
        Test to ensure that searching for data bundles with a nonexistent
        alias returns an empty list.
        """
        alias = str(uuid.uuid1())  # An alias that is unlikely to exist
        body = self.dos_request('GET', self.get_query_url('/databundles', alias=alias))
        self.assertEqual(len(body['data_bundles']), 0)

    @test_requires('GetDataBundle')
    def test_get_nonexistent_data_bundle(self):
        """
        Verifies that requesting a data bundle that doesn't exist results in HTTP 404
        """
        bdl, url = self.get_random_data_bundle()
        self.dos_request('GET', '/databundles/NonexistentDataBundle',
                         body={'data_bundle': bdl}, expected_status=404)

    @test_requires('UpdateDataObject')
    def test_update_nonexistent_data_object(self):
        """
        Verifies that trying to update a data object that doesn't exist
        returns HTTP 404
        """
        obj, url = self.get_random_data_object()
        self.dos_request('PUT', '/dataobjects/NonexistentObjID', expected_status=404,
                         body={'data_object': obj, 'data_object_id': obj['id']})

    @test_requires('GetDataObject', 'ListDataObjects')
    def test_update_data_object_with_bad_request(self):
        """
        Verifies that attempting to update a data object with a malformed
        request returns HTTP 400
        """
        _, url = self.get_random_data_object()
        self.dos_request('PUT', url, expected_status=400, body={'abc': ''})

    @test_requires('ListDataObjects', 'UpdateDataObject', 'GetDataObject')
    def test_alias_update(self):
        """
        Demonstrates updating a data object with a given alias.
        """
        alias = 'daltest:' + str(uuid.uuid1())
        # First, select a "random" object that we can test
        data_object, url = self.get_random_data_object()

        # Try and update with no changes.
        self.dos_request('PUT', url, body={'data_object': data_object})
        # We specify the Content-Type since Chalice looks for it when
        # deserializing the request body server-side

        # Test adding an alias (acceptably unique to try
        # retrieving the object by the alias)
        data_object['aliases'].append(alias)

        # Try and update, this time with a change.
        update_response = self.dos_request('PUT', url,
                                           body={'data_object': data_object})
        self.assertEqual(data_object['id'], update_response['data_object_id'])

        time.sleep(2)

        # Test and see if the update took place by retrieving the object
        # and checking its aliases
        get_response = self.dos_request('GET', url)
        self.assertEqual(update_response['data_object_id'], get_response['data_object']['id'])
        self.assertIn(alias, get_response['data_object']['aliases'])

        # Testing the update again by using a DOS ListDataObjectsRequest
        # to locate the object by its new alias.
        list_request = {
            'alias': alias,
            # We know the alias is unique, so even though page_size > 1
            # we expect only one result.
            'page_size': 10
        }
        list_url = self.get_query_url('/dataobjects', **list_request)
        list_response = self.dos_request('GET', list_url)
        self.assertEqual(1, len(list_response['data_objects']))
        self.assertIn(alias, list_response['data_objects'][0]['aliases'])

        # # Tear down and remove the test alias
        # params['body']['data_object']['aliases'].remove(alias)
        # self.dos_request('PUT', url, **params)

    @test_requires('ListDataObjects', 'UpdateDataObject')
    def test_full_data_object_update(self):
        """
        Demonstrates updating multiple fields of a data object at once.
        This incidentally also tests object conversion.
        """
        # First, select a "random" object that we can test
        data_object, url = self.get_random_data_object()

        # Make a new data object that is different from the data object we retrieved
        attributes = {
            # 'name' and 'description' are optional fields and might not be present
            'name': data_object.get('name', '') + 'test-suffix',
            # See DataBiosphere/dos-azul-lambda#87
            # 'description': data_object.get('description', '') + 'Change This',
            'urls': [
                {'url': 'https://cgl.genomics.ucsc.edu/'},
                {'url': 'https://github.com/DataBiosphere'}
            ]
        }
        data_object.update(attributes)

        # Now update the old data object with the new attributes we added
        self.dos_request('PUT', url, body={'data_object': data_object})
        time.sleep(2)  # Give the server some time to catch up

        # Test and see if the update took place
        get_response = self.dos_request('GET', url)['data_object']
        # We only compare the change attributes as DOS implementations
        # can update timestamps server-side
        self.assertEqual(get_response['name'], data_object['name'])
        self.assertEqual(get_response['urls'], data_object['urls'])