Source code for covid19_inference.model.model

# ------------------------------------------------------------------------------ #
# This file implements the abstract model base class.
# It has some helper properties to access date ranges and hierarchical details
# ------------------------------------------------------------------------------ #

import datetime
import logging

import numpy as np
from pymc3 import Model  # this import is needed to get pymc3-style "with ... as model:"

# we cannot import utility, would create recursive dependencies
# from . import utility as ut

log = logging.getLogger(__name__)


# can we rename this guy to model base or something?
[docs]class Cov19Model(Model):
    """
        Abstract base class for the dynamic model of covid-19 propagation.
        Derived from :class:`pymc3.Model`.

        Parameters below are passed to the constructor.

        Attributes (Variables) are available after creation and can be accessed from
        every instance. Some background:

            * The simulation starts `diff_data_sim` days before the data.
            * The data has a certain length, on which the inference is based. This
              length is given by `new_cases_obs`.
            * After the inference, a forecast takes of length `fcast_len` takes
              place, starting on the day after the last data point in `new_cases_obs`.
            * In total, traces produced by a model run have the length
              `sim_len = diff_data_sim + data_len + fcast_len`
            * Date ranges include both boundaries. For example, if `data_begin` is March
              1 and `data_end` is March 3 then `data_len` will be 3.

        Parameters
        ----------
        new_cases_obs : 1 or 2d array
            If the array is two-dimensional, an hierarchical model will be constructed.
            First dimension is then time, the second the region/country.
        data_begin : datatime.datetime
            Date of the first data point
        fcast_len : int
            Number of days the simulations runs longer than the data
        diff_data_sim : int
            Number of days the simulation starts earlier than the data. Should be
            significantly longer than the delay between infection and report of cases.
        N_population : number or 1d array
            Number of inhabitance in region, needed for the S(E)IR model. Is ideally 1
            dimensional if new_cases_obs is 2 dimensional
        name : string
            suffix appended to the name of random variables saved in the trace
        model :
            specify a model, if this one should expand another
        shifted_cases : bool
            when enabled (True), interprets short intervals of zero cases as days,
            where no reporting happens and adds model cases to next non-zero-case day

        Attributes
        ----------
        new_cases_obs : 1 or 2d array
            as passed during construction

        data_begin : datatime.datetime
            date of the first data point in the data

        data_end : datatime.datetime
            date of the last data point in the data

        sim_begin : datatime.datetime
            date at which the simulation begins

        sim_end : datatime.datetime
            date at which the simulation ends (should match fcast_end)

        fcast_begin : datatime.datetime
            date at which the forecast starts (should be one day after data_end)

        fcast_end : datatime.datetime
            data at which the forecast ends

        data_len : int
            total number of days in the data

        sim_len : int
            total number of days in the simulation

        fcast_len : int
            total number of days in the forecast

        diff_data_sim : int
            difference in days between the simulation begin and the data begin.
            The simulation starting time is usually earlier than the data begin.

        Example
        -------
        .. code-block::

            with Cov19Model(**params) as model:
                # Define model here
    """

    def __init__(
        self,
        new_cases_obs,
        data_begin,
        fcast_len,
        diff_data_sim,
        N_population,
        data_end=None,
        name="",
        model=None,
        shifted_cases=True,
    ):

        super().__init__(name=name, model=model)

        new_cases_obs[
            new_cases_obs < 0
        ] = 0  # set negative values to 0 (dirty fix, but better than nothing...)

        # first dim time, second might be state
        self.shifted_cases = shifted_cases
        self.new_cases_obs = np.array(new_cases_obs, dtype=np.dtype(float))
        self.sim_ndim = new_cases_obs.ndim
        self.N_population = N_population

        # these are dates specifying the bounds of data, simulation and forecast.
        # Jonas Sebastian and Paul agreed to use fully inclusive intervals this makes
        # calculating ranges a bit harder but function arguments are more intuitive.
        # 01 Mar, 02 Mar, 03 Mar
        # data_begin = 01 Mar
        # data_end = 03 Mar
        # [data_begin, data_end]
        # (data_end - data_begin).days = 2

        # assign properties
        self._data_begin = data_begin
        self._sim_begin = self.data_begin - datetime.timedelta(days=diff_data_sim)
        if data_end is None:
            self._data_end = self.data_begin + datetime.timedelta(
                days=len(new_cases_obs) - 1
            )
            # totel length of simulation, get later via the shape
            sim_len = len(new_cases_obs) + diff_data_sim + fcast_len
            if sim_len < len(new_cases_obs) + diff_data_sim:
                raise RuntimeError(
                    "Simulation ends before the end of the data. Increase num_days_sim."
                )
        else:
            self._data_end = data_end
            sim_len = (
                (self.data_end - self.data_begin).days + 1 + diff_data_sim + fcast_len
            )

        self._sim_end = self.data_end + datetime.timedelta(days=fcast_len)

        # shape and dimension of simulation
        if self.sim_ndim == 1:
            self.sim_shape = (sim_len,)
        elif self.sim_ndim == 2:
            self.sim_shape = (sim_len, self.new_cases_obs.shape[1])
        elif self.sim_ndim == 3:
            self.sim_shape = (
                sim_len,
                self.new_cases_obs.shape[1],
                self.new_cases_obs.shape[2],
            )

        if self.data_end > datetime.datetime.today():
            log.warning(
                f"Your last data point is in the future ({self.data_end}). "
                + "Are you traveling faster than light?"
            )

    """
        Properties
        ----------
        Useful properties, mainly used by the plot module.
    """

    """
        Utility properties
    """

    @property
    def shape_of_regions(self):
        # Number of regions as tuple of int
        return () if self.sim_ndim == 1 else self.sim_shape[-1]

    @property
    def is_hierarchical(self):
        return self.new_cases_obs.ndim >= 2

    """
        Forecast properties
    """

    @property
    def fcast_begin(self):
        # Returns date on which the forecast starts i.e. the day after the data ends
        return self.data_end + datetime.timedelta(days=1)

    @property
    def fcast_end(self):
        # Returns date on which the simulation and the forecast end
        return self.sim_end

    @property
    def fcast_len(self):
        # Returns the length of the forecast in days
        return (self.sim_end - self.data_end).days

    """
        Data properties
    """

    @property
    def data_len(self):
        return self.new_cases_obs.shape[0]

    @property
    def data_dim(self):
        return self.new_cases_obs.shape[1]

    @property
    def data_begin(self):
        return self._data_begin

    @property
    def data_end(self):
        return self._data_end

    """
        Simulation properties
    """

    @property
    def sim_len(self):
        return self.sim_shape[0]

    @property
    def sim_begin(self):
        return self._sim_begin

    @property
    def sim_end(self):
        return self._sim_end

    @property
    def diff_data_sim(self):
        return (self.data_begin - self.sim_begin).days

    """
        Other helpfull properties
    """

    @property
    def untransformed_freeRVs(self):
        """
            Returns the names of all free parameters of the model, usefull for plotting!

            Returns
            -------
            : list
                all variable names
        """

        """
            For every free random varibale in pymc3 with the _log__ suffix there exists
            an Random Variable without logscale i.e. without _log__. There is no
            easy function to return the names of these function that's why we
            Get the names like that.
        """
        varnames = [str(x).replace("_log__", "") for x in self.free_RVs]
        return varnames


def modelcontext(model):
    """
        return the given model or try to find it in the context if there was
        none supplied.
    """
    if model is None:
        return Cov19Model.get_context()
    return model


def set_missing_priors_with_default(priors_dict, default_priors):
    """
        Takes a dict with custom priors and a dict with defaults and sets keys that
        are not given
    """
    for prior_name in priors_dict.keys():
        if prior_name not in default_priors:
            log.warning(f"Prior with name {prior_name} not known")

    for prior_name, value in default_priors.items():
        if prior_name not in priors_dict:
            priors_dict[prior_name] = value
            log.info(f"{prior_name} was set to default value {value}")