from __future__ import annotations
import argparse
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterable
from functools import lru_cache
from typing import Any, Callable, Dict, Optional
from dae.genomic_resources.gene_models import (
GeneModels,
build_gene_models_from_resource,
)
from dae.genomic_resources.reference_genome import (
ReferenceGenome,
build_reference_genome_from_resource,
)
from dae.genomic_resources.repository import GenomicResourceRepo
from dae.genomic_resources.repository_factory import (
build_genomic_resource_repository,
)
_REGISTERED_CONTEXT_PROVIDERS: list[GenomicContextProvider] = []
_REGISTERED_CONTEXTS: list[GenomicContext] = []
GC_GRR_KEY = "genomic_resources_repository"
GC_REFERENCE_GENOME_KEY = "reference_genome"
GC_GENE_MODELS_KEY = "gene_models"
logger = logging.getLogger(__name__)
[docs]class GenomicContext(ABC):
"""Abstract base class for genomic context."""
[docs] def get_reference_genome(self) -> Optional[ReferenceGenome]:
"""Return reference genome from context."""
obj = self.get_context_object(GC_REFERENCE_GENOME_KEY)
if obj is None:
return None
if isinstance(obj, ReferenceGenome):
return obj
raise ValueError(
f"The context returned a wrong type for a reference genome: "
f"{type(obj)}")
[docs] def get_gene_models(self) -> Optional[GeneModels]:
"""Return gene models from context."""
obj = self.get_context_object(GC_GENE_MODELS_KEY)
if obj is None:
return None
if isinstance(obj, GeneModels):
return obj
raise ValueError(
f"The context returned a wrong type for gene models: "
f"{type(obj)}")
[docs] def get_genomic_resources_repository(
self) -> Optional[GenomicResourceRepo]:
"""Return genomic resources repository from context."""
obj = self.get_context_object(GC_GRR_KEY)
if obj is None:
return None
if isinstance(obj, GenomicResourceRepo):
return obj
raise ValueError(
f"The context returned a wrong type for GRR: "
f"{type(obj)}")
[docs] @abstractmethod
def get_context_object(self, key: str) -> Optional[Any]:
"""Return a genomic context object corresponding to the passed key.
If there is no such object returns None.
"""
[docs] @abstractmethod
def get_context_keys(self) -> set[str]:
"""Return set of all keys that could be found in the context."""
[docs] @abstractmethod
def get_source(self) -> tuple[str, ...]:
"""Return a tuple of strings that identifies the genomic context."""
[docs]class GenomicContextProvider(ABC):
"""Abstract base class for genomic contexts provider."""
[docs] @abstractmethod
def get_context_provider_priority(self) -> int:
pass
[docs] @abstractmethod
def get_context_provider_type(self) -> str:
pass
[docs] @abstractmethod
def get_contexts(self) -> Iterable[GenomicContext]:
pass
[docs]class SimpleGenomicContext(GenomicContext):
"""Simple implementation of genomic context."""
def __init__(
self, context_objects: Dict[str, Any], source: tuple[str, ...]):
self._context: Dict[str, Any] = context_objects
self._source = source
[docs] def get_context_object(self, key: str) -> Optional[Any]:
return self._context.get(key)
[docs] def get_context_keys(self) -> set[str]:
return set(self._context.keys())
[docs] def get_source(self) -> tuple[str, ...]:
return self._source
[docs] def get_all_context_objects(self) -> Dict[str, Any]:
return self._context
[docs]class SimpleGenomicContextProvider(GenomicContextProvider):
"""Simple implementation of genomic contexts provider."""
def __init__(
self,
context_builder: Callable[[], Optional[GenomicContext]],
provider_type: str,
priority: int):
self._type: str = provider_type
self._priority: int = priority
self._context_builder = context_builder
self._contexts: Optional[Iterable[GenomicContext]] = None
[docs] def get_context_provider_priority(self) -> int:
return self._priority
[docs] def get_context_provider_type(self) -> str:
return f"SingleGenomicContextProvider[{self._type}]"
[docs] def get_contexts(self) -> Iterable[GenomicContext]:
if self._contexts is None:
try:
context = self._context_builder()
if context is None:
self._contexts = []
else:
self._contexts = [context]
except Exception: # pylint: disable=broad-except
logger.error(
"problem while building genomic context",
exc_info=True)
self._contexts = []
return self._contexts
[docs]def register_context_provider(
context_provider: GenomicContextProvider) -> None:
"""Register genomic context provider."""
logger.debug(
"Registering the %s "
"genomic context generator with priority %s",
context_provider.get_context_provider_type(),
context_provider.get_context_provider_priority())
_REGISTERED_CONTEXT_PROVIDERS.append(context_provider)
[docs]def register_context(context: GenomicContext) -> None:
logger.debug(
"Registering the %s "
"genomic context",
context.get_source())
_REGISTERED_CONTEXTS.insert(0, context)
[docs]class PriorityGenomicContext(GenomicContext):
"""Defines a priority genomic context."""
def __init__(self, contexts: Iterable[GenomicContext]):
self.contexts = contexts
if self.contexts:
logger.info("Using the following genomic context:")
for context in self.contexts:
logger.info("\t%s", context.get_source())
else:
logger.info("No genomic contexts are available.")
[docs] def get_context_object(self, key: str) -> Optional[Any]:
for context in self.contexts:
obj = context.get_context_object(key)
if obj:
logger.info(
"object with key %s found in the context %s",
key, context.get_source())
return obj
return None
[docs] def get_context_keys(self) -> set[str]:
result: set[str] = set()
for context in self.contexts:
result = result.union(context.get_context_keys())
return result
[docs] @lru_cache(maxsize=32)
def get_source(self) -> tuple[str, ...]:
result = ["PriorityGenomicContext"]
for context in self.contexts:
result.append(str(context.get_source()))
return tuple(result)
[docs]def get_genomic_context() -> GenomicContext:
contexts = _REGISTERED_CONTEXTS[:]
for provider in sorted(_REGISTERED_CONTEXT_PROVIDERS,
key=lambda g: (g.get_context_provider_priority(),
g.get_context_provider_type())):
contexts.extend(provider.get_contexts())
return PriorityGenomicContext(contexts)
[docs]class CLIGenomicContext(SimpleGenomicContext):
"""Defines CLI genomics context."""
[docs] @staticmethod
def add_context_arguments(parser: argparse.ArgumentParser) -> None:
"""Add command line arguments to the argument parser."""
parser.add_argument(
"-g", "--grr-filename", "--grr", default=None,
help="The GRR configuration file. If the argument is absent, "
"the a GRR repository from the current genomic context "
"(i.e. gpf_instance) will be used or, if that fails, the "
"default GRR configuration will be used.")
parser.add_argument(
"--grr-directory", default=None,
help="Local GRR directory to use as repository.")
parser.add_argument(
"-R", "--reference-genome-resource-id", "--ref", default=None,
help="The resource id for the reference genome. If the argument "
"is absent the reference genome from the current genomic "
"context will be used.")
parser.add_argument(
"-G", "--gene-models-resource-id", "--genes", default=None,
help="The resource is of the gene models resource. If the argument"
" is absent the gene models from the current genomic "
"context will be used.")
parser.add_argument(
"-ar", "--allow-repeated-attributes", default=False,
action="store_true",
help="Rename repeated attributes instead of raising"
" an error.")
[docs] @staticmethod
def register(args: argparse.Namespace) -> None:
context = CLIGenomicContext.context_builder(args)
register_context(context)
[docs] @staticmethod
def context_builder(args: argparse.Namespace) -> CLIGenomicContext:
"""Build a CLI genomic context."""
context_objects: Dict[str, Any] = {}
grr = None
if args.grr_filename is None and args.grr_directory is None:
grr = get_genomic_context().get_context_object(GC_GRR_KEY)
elif args.grr_filename is not None:
logger.info(
"Using the GRR configured in the file "
"%s as requested on the "
"command line.", args.grr_filename)
grr = build_genomic_resource_repository(
file_name=args.grr_filename)
elif args.grr_directory is not None:
logger.info(
"Using local GRR directory "
"%s as requested on the "
"command line.", args.grr_directory)
grr = build_genomic_resource_repository({
"id": "local",
"type": "directory",
"directory": args.grr_directory,
})
if grr is None:
raise ValueError("Can't resolve genomic context GRR")
context_objects[GC_GRR_KEY] = grr
if args.reference_genome_resource_id is not None:
logger.info(
"Using the reference genome from resource "
"%s provided on the command line.",
args.reference_genome_resource_id)
resource = grr.get_resource(args.reference_genome_resource_id)
genome = build_reference_genome_from_resource(resource)
genome.open()
context_objects[GC_REFERENCE_GENOME_KEY] = genome
if args.gene_models_resource_id is not None:
logger.info(
"Using the gene models from resource "
"%s provided on the command line.",
args.gene_models_resource_id)
resource = grr.get_resource(args.gene_models_resource_id)
gene_models = build_gene_models_from_resource(resource).load()
context_objects[GC_GENE_MODELS_KEY] = gene_models
return CLIGenomicContext(
context_objects, source=("CLIGenomicContext", ))
[docs]class DefaultRepositoryContextProvider(SimpleGenomicContextProvider):
"""Genomic context provider for default GRR."""
[docs] @staticmethod
def context_builder() -> GenomicContext:
grr = build_genomic_resource_repository()
return SimpleGenomicContext(
{
GC_GRR_KEY: grr,
},
("default_genomic_resources_repository", grr.repo_id),
)
def __init__(self) -> None:
super().__init__(
DefaultRepositoryContextProvider.context_builder,
"DefaultGRRProvider",
1000)
[docs] @staticmethod
def register() -> None:
register_context_provider(DefaultRepositoryContextProvider())