Source code for FEV_KEGG.Evolution.Clade

from FEV_KEGG.Graph.SubstanceGraphs import SubstanceEcGraph, SubstanceEnzymeGraph
from FEV_KEGG.Evolution.Taxonomy import NCBI, Taxonomy
from FEV_KEGG.KEGG.Organism import Group 
from FEV_KEGG.Evolution.Events import GeneFunctionAddition, GeneFunctionLoss, GeneFunctionDivergence, GeneFunctionConservation, SimpleGeneDuplication,\
    NeofunctionalisedECs, NeofunctionalisedEnzymes, Neofunctionalisation, FunctionChange
from FEV_KEGG import settings
from builtins import str
from FEV_KEGG.Drawing import Export
import math
from typing import Dict, Set, Tuple
from FEV_KEGG.Graph.Elements import Enzyme, GeneID, EcNumber

defaultExcludeUnclassified = True
"""
If *True*, ignore taxons with a path containing the string 'unclassified'.
This can be overridden in each relevant method's `excludeUnclassified` parameter in this module.
"""

defaultExcludeMultifunctionalEnzymes = settings.defaultNoMultifunctional
"""
If *True*, ignore enzymes with more than one EC number.
This can be overridden in each relevant method's `excludeMultifunctionalEnzymes` parameter in this module.
"""

defaultMajorityPercentageCoreMetabolism = 80
"""
Default percentage of organisms in the clade, which have to possess an EC number, for it to be included in the core metabolism of the clade.
See :func:`FEV_KEGG.KEGG.Organism.Group.majorityEcGraph`.
This can be overridden in each relevant method's `majorityPercentageCoreMetabolism` parameter in this module.
"""

defaultMajorityPercentageNeofunctionalisation = 0
"""
Default percentage of organisms in the clade, which have to possess the same "neofunctionalised" EC number, for it to be included in the set of "neofunctionalised" EC numbers of the clade.
See :class:`FEV_KEGG.KEGG.Evolution.Events.NeofunctionalisedECs`.
This can be overridden in each relevant method's `majorityPercentageNeofunctionalisation` parameter in this module.
"""

defaultEValue = settings.defaultEvalue
"""
Default threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
"""

defaultOneOrganismPerSpecies = settings.defaultOneOrganismPerSpecies
"""
Default descision whether to use only the first organism for each species in NCBI taxonomy.
"""

[docs]class Clade(object): def __init__(self, ncbiNames: 'e.g. Enterobacter or Proteobacteria/Gammaproteobacteria. Allows list of names, e.g. ["Gammaproteobacteria", "/Archaea"]', excludeUnclassified = defaultExcludeUnclassified, oneOrganismPerSpecies = defaultOneOrganismPerSpecies): """ A clade in NCBI taxonomy, containing all leaf taxon's KEGG organisms. Parameters ---------- ncbiNames : str or Iterable[str] String(s) a taxon's path must contain to be included in this clade. excludeUnclassified : bool, optional If *True*, ignore taxons with a path containing the string 'unclassified'. oneOrganismPerSpecies : bool, optional If *True*, use only the first organism of each species. Attributes ---------- self.ncbiNames : Iterable[str] Part of the path of each leaf taxon to be included in this clade. A single string is wrapped in a list. self.group The :class:`FEV_KEGG.KEGG.Organism.Group` of KEGG organisms created from the found leaf taxons. Raises ------ ValueError If no clade with `ncbiNames` in its path could be found. Warnings -------- It is possible to include organisms of several clades in the same Clade object! For example, if you were to search for `ncbiNames` == 'Donaldus Duckus', you would get every organism within '/Bacteria/Donaldus Duckus' **and** '/Archaea/Order/Donaldus Duckus'. Use the slash (/) notation to make sure you only get the taxon you want, e.g. 'Proteobacteria/Gammaproteobacteria' or '/Archaea'. """ taxonomy = NCBI.getTaxonomy() if isinstance(ncbiNames, str): ncbiNames = [ncbiNames] self.ncbiNames = ncbiNames allOrganisms = set() for ncbiName in ncbiNames: organisms = taxonomy.getOrganismAbbreviationsByPath(ncbiName, exceptPaths=('unclassified' if excludeUnclassified else None), oneOrganismPerSpecies=oneOrganismPerSpecies) if organisms is None or len(organisms) == 0: raise ValueError("No clade of this path found: " + ncbiName) allOrganisms.update(organisms) self.group = Group( allOrganisms ) self._lastNeofunctionalisedEnzymesCache = None self._lastGeneDuplicatedEnzymesMatches = None
[docs] def collectiveMetabolism(self, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes, addEcDescriptions = False) -> SubstanceEcGraph: """ The Substance-EC graph representing the collective metabolic network, occuring in any organism of the clade. This includes each and every EC number which occurs in any organism of this clade. Parameters ---------- excludeMultifunctionalEnzymes : bool, optional If *True*, ignore enzymes with more than one EC number. Returns ------- SubstanceEcGraph Collective metabolic network of EC numbers, including counts of occurence in each of the clade's organisms. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ graph = self.group.collectiveEcGraph(noMultifunctional = excludeMultifunctionalEnzymes, addCount = True, keepOnHeap = True, addEcDescriptions = addEcDescriptions) graph.name = 'Collective metabolism ECs ' + ' '.join(self.ncbiNames) return graph
[docs] def collectiveMetabolismEnzymes(self, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEnzymeGraph: """ The Substance-Enzyme graph representing the collective metabolic network, occuring in any organism of the clade. This includes each and every enzyme of every organism of this clade. Parameters ---------- excludeMultifunctionalEnzymes : bool, optional If *True*, ignore enzymes with more than one EC number. Returns ------- SubstanceEnzymeGraph Collective metabolic network of enzymes. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ graph = self.group.collectiveEnzymeGraph(noMultifunctional = excludeMultifunctionalEnzymes, keepOnHeap = True) graph.name = 'Collective metabolism enzymes ' + ' '.join(self.ncbiNames) return graph
[docs] def coreMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEcGraph: """ The Substance-EC graph representing the common metabolic network, shared among all organisms of the clade. This includes only EC numbers which occur in at least `majorityPercentageCoreMetabolism` % of all organisms of this clade. Parameters ---------- majorityPercentageCoreMetabolism : int, optional A path (substance -> EC -> product) has to occur in `majorityPercentageCoreMetabolism` % of the clade's organisms to be included. excludeMultifunctionalEnzymes : bool, optional If *True*, ignore enzymes with more than one EC number. Returns ------- SubstanceEcGraph Core metabolic network of EC numbers. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ graph = self.group.majorityEcGraph(majorityPercentage = majorityPercentageCoreMetabolism, noMultifunctional = excludeMultifunctionalEnzymes, keepOnHeap = True) graph.name = 'Core metabolism ECs ' + ' '.join(self.ncbiNames) return graph
[docs] def coreMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEnzymeGraph: """ The Substance-Enzyme graph representing the common metabolic network, shared among all organisms of the clade. This includes every Enzyme associated with an EC number occuring in core metabolism (see :func:`substanceEcGraph`), no matter from which organism it stems. Parameters ---------- majorityPercentageCoreMetabolism : int, optional A path (substance -> EC -> product) has to occur in `majorityPercentageCoreMetabolism` % of the clade's organisms to be included. excludeMultifunctionalEnzymes : bool, optional If *True*, ignore enzymes with more than one EC number. Returns ------- SubstanceEnzymeGraph Core metabolic network of enzymes. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ graph = self.group.collectiveEnzymeGraphByEcMajority(majorityPercentage = majorityPercentageCoreMetabolism, majorityTotal = None, noMultifunctional = excludeMultifunctionalEnzymes) graph.name = 'Core metabolism Enzymes ' + ' '.join(self.ncbiNames) return graph
@property def organismsCount(self) -> int: """ The number of organisms (leaf taxons) in this clade. Returns ------- int The number of organisms (leaf taxons) in this clade. """ return self.group.organismsCount # gene duplication
[docs] def geneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph: """ The substance-Enzyme graph of all gene duplicated enzymes of the core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. colour : bool, optional If *True*, colours the gene-duplicated enzyme edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Alternatively, you can specify a :class:`Export.Colour`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph containing all gene-duplicated enzymes, and nothing else. If `colour` == *True*, returns the full core metabolism enzyme graph, colouring gene-duplicated enzymes green. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) geneDuplicationModel = SimpleGeneDuplication # geneDuplicationModel = SimpleGroupGeneDuplication(sameGroupOrganisms = self.group) # filter core metabolism enzyme graph geneDuplicatedEnzymes = geneDuplicationModel.filterEnzymes(enzymeGraph, eValue = defaultEValue, ignoreDuplicatesOutsideSet = True, preCalculatedEnzymes = None) # colour core metabolism if colour is not False: if colour is True: colourToUse = Export.Colour.GREEN else: colourToUse = colour geneDuplicatedEnzymesOnly = geneDuplicatedEnzymes geneDuplicatedEnzymes = enzymeGraph Export.addColourAttribute(geneDuplicatedEnzymes, colourToUse, nodes = False, edges = geneDuplicatedEnzymesOnly.getEdges()) geneDuplicatedEnzymes.name = 'Gene-duplicated core metabolism enzymes ' + ' '.join(self.ncbiNames) return geneDuplicatedEnzymes
[docs] def geneDuplicatedEnzymesDict(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Dict[Enzyme, Set[GeneID]]: """ All gene duplicated enzymes of the core metabolism, pointing to all their duplicates. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. Returns ------- Dict[Enzyme, Set[GeneID]] Each gene ID on the right usually has an entry of its own, as an enzyme object, on the left, because they are each others homologs. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) geneDuplicationModel = SimpleGeneDuplication geneIDsForEnzyme = geneDuplicationModel.getEnzymes(enzymeGraph, returnMatches = True, ignoreDuplicatesOutsideSet = True, preCalculatedEnzymes = None) # if keepOnHeap is True: # self._geneDuplicatedEnzymesObject = geneIDsForEnzyme return geneIDsForEnzyme
[docs] def geneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]: """ All gene duplicated enzymes of the core metabolism, paired with each of their duplicates. If enzyme A is a duplicate of enzyme B and vice versa, this does not return duplicates, but returns only one pair, with the "smaller" enzyme as the first value. An enzyme is "smaller" if its gene ID string is "smaller". Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. Returns ------- Set[Tuple[Enzyme, Enzyme]] Set of gene-duplicated enzymes, broken down into pairs of enzymes. Can obviously create many duplicates left and right. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ enzymes = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes() geneDuplicationModel = SimpleGeneDuplication geneIdToEnzyme = dict() for enzyme in enzymes: geneIdToEnzyme[enzyme.geneID] = enzyme enzymePairs = geneDuplicationModel.getEnzymePairs(enzymes, ignoreDuplicatesOutsideSet = True, geneIdToEnzyme = geneIdToEnzyme, preCalculatedEnzymes = None) return enzymePairs
# neofunctionalisation def _neofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism, eValue = defaultEValue, considerOnlyECs = None): # check if the last calculation can be returned if hasattr(self, '_lastNeofunctionalisedEnzymesCache') and self._lastNeofunctionalisedEnzymesCache is not None and considerOnlyECs is None: lastMajorityPercentage, lastNeofunctionalisedEnzymes = self._lastNeofunctionalisedEnzymesCache if lastMajorityPercentage == majorityPercentageCoreMetabolism: return lastNeofunctionalisedEnzymes else: self._lastNeofunctionalisedEnzymesCache = None # calculate enzymes = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) if considerOnlyECs is not None: enzymes.keepEnzymesByEC(considerOnlyECs) enzymes = enzymes.getEnzymes() geneDuplicationModel = SimpleGeneDuplication # geneDuplicationModel = SimpleGroupGeneDuplication(sameGroupOrganisms = self.group) neofunctionalisedEnzymes = NeofunctionalisedEnzymes(enzymes, geneDuplicationModel, eValue = eValue) # Cache calculation if considerOnlyECs is None: self._lastNeofunctionalisedEnzymesCache = (majorityPercentageCoreMetabolism, neofunctionalisedEnzymes) return neofunctionalisedEnzymes
[docs] def neofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False, eValue = defaultEValue, considerOnlyECs = None) -> SubstanceEnzymeGraph: """ The substance-Enzyme graph of all neofunctionalised enzymes of the core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. colour : bool, optional If *True*, colours the neofunctionalised enzyme edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Alternatively, you can specify a :class:`Export.Colour`. eValue : float, optional Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant. considerOnlyECs : Iterable[EcNumber], optional If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph containing all neofunctionalised enzymes, and nothing else. If `colour` == *True*, returns the full core metabolism enzyme graph, colouring neofunctionalised enzymes green. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get neofunctionalisations neofunctionalisedEnzymes = self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs) # filter core metabolism enzyme graph enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) neofunctionalisedMetabolism = neofunctionalisedEnzymes.filterGraph(enzymeGraph, minimumEcDifference = None) # colour core metabolism if colour is not False: if colour is True: colourToUse = Export.Colour.GREEN else: colourToUse = colour neofunctionalisedMetabolismOnly = neofunctionalisedMetabolism neofunctionalisedMetabolism = enzymeGraph Export.addColourAttribute(neofunctionalisedMetabolism, colourToUse, nodes = False, edges = neofunctionalisedMetabolismOnly.getEdges()) neofunctionalisedMetabolism.name = 'Neofunctionalised core metabolism enzymes ' + ' '.join(self.ncbiNames) return neofunctionalisedMetabolism
[docs] def neofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False, eValue = defaultEValue, considerOnlyECs = None) -> SubstanceEcGraph: """ The substance-EC graph of EC numbers belonging to function changes of neofunctionalised enzymes of the core metabolism. Only EC numbers which could have actually taken part in a function change are reported. This is because enzymes can have multiple EC numbers, while only some might be eligible for a function change. For example, consider enzyme A (1.2.3.4, 6.5.4.3) and enzyme B (1.2.3.4, 4.5.6.7). 1.2.3.4 can never change its function to itself, which leaves 1.2.3.4 <-> 6.5.4.3, 1.2.3.4 <-> 4.5.6.7, and 4.5.6.7 <-> 6.5.4.3 as possible function changes. This obviously requires a function to change to a single other function, without splitting or merging, which might be biologically inacurate. However, this should happen rarely, plus one could exclude all enzymes with multiple functions from the core metabolism in the first place. The maximum expectation value (e-value) necessary for a sequence alignment to constitute a "similar sequence" can be changed via :attr:`defaultEValue`. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. majorityPercentageNeofunctionalisation : int, optional Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms. A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly; or only recently, with not all organisms of the child clade having picked up the new function, yet. colour : bool, optional If *True*, colours the neofunctionalised EC edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Alternatively, you can specify a :class:`Export.Colour`. eValue : float, optional Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant. considerOnlyECs : Iterable[EcNumber], optional If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation. Returns ------- SubstanceEcGraph The substance-EC graph representing the metabolic network which was probably affected due to neofunctionalisations of the core metabolism of the clade. If `colour` == *True*, returns the full union of parent and child, colouring neofunctionalised ECs green. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get neofunctionalisations neofunctionalisedECs = NeofunctionalisedECs(self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs)) # filter core metabolism EC graph coreMetabolism = self.coreMetabolism(majorityPercentageCoreMetabolism) minimumOrganismsCount = math.ceil(self.organismsCount * (majorityPercentageNeofunctionalisation / 100)) neofunctionalisedMetabolism = neofunctionalisedECs.filterGraph(coreMetabolism, minimumEcDifference = None, minimumOrganismsCount = minimumOrganismsCount) # colour core metabolism if colour is not False: if colour is True: colourToUse = Export.Colour.GREEN else: colourToUse = colour neofunctionalisedMetabolismOnly = neofunctionalisedMetabolism neofunctionalisedMetabolism = coreMetabolism Export.addColourAttribute(neofunctionalisedMetabolism, colourToUse, nodes = False, edges = neofunctionalisedMetabolismOnly.getEdges()) neofunctionalisedMetabolism.name = 'Neofunctionalised core metabolism ' + ' '.join(self.ncbiNames) return neofunctionalisedMetabolism
[docs] def neofunctionalisations(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, eValue = defaultEValue, considerOnlyECs = None) -> Set[Neofunctionalisation]: """ Get neofunctionalisation events of all enzymes in the core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. eValue : float, optional Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant. considerOnlyECs : Iterable[EcNumber], optional If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation. Returns ------- Set[Neofunctionalisation] Set of possible neofunctionalisation events. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get neofunctionalisations return self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs).getNeofunctionalisations()
[docs] def neofunctionalisationsForFunctionChange(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, eValue = defaultEValue, considerOnlyECs = None) -> Dict[FunctionChange, Set[Neofunctionalisation]]: """ Get neofunctionalisation events of all enzymes in the core metabolism, grouped by each possible function change event. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. majorityPercentageNeofunctionalisation : int, optional Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms. A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly; or only recently, with not all organisms of the child clade having picked up the new function, yet. eValue : float, optional Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant. considerOnlyECs : Iterable[EcNumber], optional If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation. Returns ------- Dict[FunctionChange, Set[Neofunctionalisation]] Dictionary of function changes, pointing to a set of neofunctionalisations which might have caused them. Since an enzyme of a neofunctionalisation can have multiple EC numbers, all combinations of the two enzymes' EC numbers are formed and treated as separate possible function changes. The neofunctionalisation is then saved again for each function change, which obviously leads to duplicated neofunctionalisation objects. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get neofunctionalisations minimumOrganismsCount = math.ceil(self.organismsCount * (majorityPercentageNeofunctionalisation / 100)) return NeofunctionalisedECs(self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs)).getNeofunctionalisationsForFunctionChange(minimumOrganismsCount = minimumOrganismsCount)
# redundancy of neofunctionalisation
[docs] def redundantECsForContributingNeofunctionalisation(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, eValue = defaultEValue, redundancyType: 'RedundancyType' = None, considerOnlyECs = None) -> Dict[Neofunctionalisation, Set[EcNumber]]: """ Get neofunctionalisation events of all enzymes in the core metabolism, which contribute to redundancy, pointing to the EC numbers their function changes' EC numbers provides redundancy for. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. majorityPercentageNeofunctionalisation : int, optional Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms. A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly; or only recently, with not all organisms of the child clade having picked up the new function, yet. eValue : float, optional Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant. redundancyType : RedundancyType Definition of redundancy for which to check the neofunctionalisation's contribution. Default to `RedundancyType.default`. considerOnlyECs : Iterable[EcNumber], optional If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation. Returns ------- Dict[FunctionChange, Set[Neofunctionalisation]] Dictionary of function changes, pointing to a set of neofunctionalisations which might have caused them. Since an enzyme of a neofunctionalisation can have multiple EC numbers, all combinations of the two enzymes' EC numbers are formed and treated as separate possible function changes. The neofunctionalisation is then saved again for each function change, which obviously leads to duplicated neofunctionalisation objects. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ from FEV_KEGG.Robustness.Topology.Redundancy import Redundancy, RedundancyContribution, RedundancyType if redundancyType is None: redundancyType = RedundancyType.default #- calculate "neofunctionalised" ECs neofunctionalisedMetabolismSet = self.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, eValue, considerOnlyECs).getECs() neofunctionalisationsForFunctionChange = self.neofunctionalisationsForFunctionChange(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, eValue, considerOnlyECs) #- calculate redundancy redundancy = Redundancy( self.coreMetabolism(majorityPercentageCoreMetabolism) ) redundancyContribution = RedundancyContribution(redundancy, neofunctionalisedMetabolismSet) contributedECsForContributingNeofunctionalisedEC = redundancyContribution.getContributedKeysForSpecial(redundancyType) contributingNeofunctionalisedECs = set(contributedECsForContributingNeofunctionalisedEC.keys()) #- REPEAT for each function change consisting of "neofunctionalised" ECs, which also contribute to redundancy contributingNeofunctionalisations = dict() for functionChange, neofunctionalisations in neofunctionalisationsForFunctionChange.items(): #- report enzyme pairs of neofunctionalisations, which caused the EC to be considered "neofunctionalised", and are in return contributing to redundancy if functionChange.ecA in contributingNeofunctionalisedECs or functionChange.ecB in contributingNeofunctionalisedECs: # function change contributes to redundancy for neofunctionalisation in neofunctionalisations: currentSetOfContributedECs = contributingNeofunctionalisations.get(neofunctionalisation, None) if currentSetOfContributedECs is None: currentSetOfContributedECs = set() contributingNeofunctionalisations[neofunctionalisation] = currentSetOfContributedECs for ec in functionChange.ecPair: contributedECs = contributedECsForContributingNeofunctionalisedEC.get(ec, None) if contributedECs is not None: currentSetOfContributedECs.update(contributedECs) return contributingNeofunctionalisations
[docs]class CladePair(object): def __init__(self, parent, child, excludeUnclassified = defaultExcludeUnclassified, oneOrganismPerSpecies = defaultOneOrganismPerSpecies): """ Two clades in NCBI taxonomy, 'child' is assumed younger than 'parent'. Does not check if the child taxon is actually a child of the parent taxon. Therefore, it would be possible to pass a list of NCBI names to the underlying :class:`Clade` objects by instantiating `parent` = List[str] and/or `child` = List[str]. This is useful when comparing groups of organisms which are, according to NCBI, not related. Parameters ---------- parent : str or List[str] or Clade Path(s) of the parent clade's taxon, as defined by NCBI taxonomy, e.g. 'Proteobacteria/Gammaproteobacteria'. Or a ready :class:`Clade` object. child : str or List[str] or Clade Path(s) of the child clade's taxon, as defined by NCBI taxonomy, e.g. 'Enterobacter'. Or a ready :class:`Clade` object. excludeUnclassified : bool, optional If *True*, ignore taxons with a path containing the string 'unclassified'. Only used if one of `parent` and/or `child` is not already a :class:`Clade`. oneOrganismPerSpecies : bool, optional If *True*, use only the first organism of each species. Attributes ---------- self.childClade : :class:`Clade` self.parentClade : :class:`Clade` """ # read NCBI names from Clade object, if necessary if isinstance(parent, Clade): self.parentClade = parent else: self.parentClade = Clade(parent, excludeUnclassified, oneOrganismPerSpecies=oneOrganismPerSpecies) if isinstance(child, Clade): self.childClade = child else: self.childClade = Clade(child, excludeUnclassified, oneOrganismPerSpecies=oneOrganismPerSpecies) @property def parentNCBInames(self): """ All names/paths in NCBI taxonomy used to create the parent clade. """ return self.parentClade.ncbiNames @property def childNCBInames(self): """ All names/paths in NCBI taxonomy used to create the child clade. """ return self.childClade.ncbiNames # set-operations on core metabolism ## for EC graphs
[docs] def conservedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph: """ Substance-EC graph of the conserved core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade. The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism. Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0. Returns ------- SubstanceEcGraph The substance-EC graph representing the metabolic network which stayed the same between the core metabolism of the parent (assumed older) and the core metabolism of the child (assumed younger). Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) graph = GeneFunctionConservation.getGraph(parentCoreMetabolism, childCoreMetabolism) graph.name = 'Conserved metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
[docs] def addedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph: """ Substance-EC graph of the added core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade. The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism. Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0. Returns ------- SubstanceEcGraph The substance-EC graph representing the metabolic network which was added to the core metabolism of the parent (assumed older) on the way to the core metabolism of the child (assumed younger). Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) graph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism) graph.name = 'Added metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
[docs] def lostMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph: """ Substance-EC graph of the lost core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade. The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism. Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0. Returns ------- SubstanceEcGraph The substance-EC graph representing the metabolic network which got lost from the core metabolism of the parent (assumed older) on the way to the core metabolism of the child (assumed younger). Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) graph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism) graph.name = 'Lost metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
[docs] def divergedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEcGraph: """ Substance-EC graph of the diverged core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade. The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism. Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0. colour : bool, optional If *True*, colours the lost EC edges in blue, and the added EC edges in red. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEcGraph The substance-EC graph representing the metabolic network which changed between the core metabolism of the parent (assumed older) and the core metabolism of the child (assumed younger). Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) if colour is True: lostGraph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism) lostEdges = lostGraph.getEdges() addedGraph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism) addedEdges = addedGraph.getEdges() graph = lostGraph.union(addedGraph, addCount = False, updateName = False) Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = lostEdges) Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = addedEdges) else: graph = GeneFunctionDivergence.getGraph(parentCoreMetabolism, childCoreMetabolism) graph.name = 'Diverged metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
[docs] def unifiedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEcGraph: """ Substance-EC graph of the unified core metabolisms. The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the parent's EC edges in blue, the child's EC edges in red, and the shared EC edges in pink. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEcGraph The substance-EC graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. See Also -------- :mod:`FEV_KEGG.Drawing.Export` : Export the graph into a file, e.g. for visualisation in Cytoscape. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) graph = parentCoreMetabolism.union(childCoreMetabolism, addCount = False, updateName = False) graph.name = 'Unified metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) if colour is True: lostGraph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism) lostEdges = lostGraph.getEdges() addedGraph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism) addedEdges = addedGraph.getEdges() conservedGraph = GeneFunctionConservation.getGraph(parentCoreMetabolism, childCoreMetabolism) conservedEdges = conservedGraph.getEdges() Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = lostEdges) Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = addedEdges) Export.addColourAttribute(graph, colour = Export.Colour.PINK, nodes = False, edges = conservedEdges) return graph
## for enzyme graphs
[docs] def conservedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs derived from the conserved core metabolism, see :func:`conservedMetabolism`. First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the enzyme edges from the parent in blue, and from the child in red. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) conservedECs = GeneFunctionConservation.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(conservedECs) childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(conservedECs) if colour is True: parentEdges = parentGraph.getEdges() childEdges = childGraph.getEdges() graph = parentGraph.union(childGraph, addCount = False, updateName = False) Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges) graph.name = 'Conserved metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph.name = 'Conserved metabolism enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Conserved metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def addedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph derived from the added core metabolism, see :func:`addedMetabolism`. First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`addedMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism) childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(addedECs) childGraph.name = 'Added metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return childGraph
[docs] def lostMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph derived from the lost core metabolism, see :func:`lostMetabolism`. First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`lostMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(lostECs) parentGraph.name = 'Lost metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return parentGraph
[docs] def divergedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs derived from the diverged core metabolism, see :func:`divergedMetabolism`. First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`divergedMetabolism`. colour : bool, optional If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) divergedECs = GeneFunctionDivergence.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(divergedECs) childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(divergedECs) if colour is True: parentEdges = parentGraph.getEdges() childEdges = childGraph.getEdges() graph = parentGraph.union(childGraph, addCount = False, updateName = False) Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph.name = 'Diverged metabolism enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Diverged metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def unifiedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph derived from the unified core metabolisms. The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEnzymeGraph The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentGraph = self.parentClade.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) childGraph = self.childClade.coreMetabolismEnzymes(majorityPercentageCoreMetabolism) graph = parentGraph.union(childGraph, addCount = False, updateName = False) graph.name = 'Unified metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) if colour is True: parentEdges = parentGraph.getEdges() childEdges = childGraph.getEdges() Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges) return graph
# set-operations on gene-duplicated core metabolism ## for enzymes ### for enzyme graphs
[docs] def conservedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs of gene-duplicated enzymes, derived from the conserved core metabolism. First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually. Then, for parent and child, the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzymes of the conserved core metabolism enzymes are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the enzyme edges from the parent in blue, and from the child in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour) parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is True: parentEdges = parentGeneDuplicated.getEdges() childEdges = childGeneDuplicated.getEdges() graph = conservedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Conserved metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = conservedMetabolismEnzymes[0].removeAllEnzymesExcept(parentGeneDuplicated.getEnzymes()) childGraph = conservedMetabolismEnzymes[1].removeAllEnzymesExcept(childGeneDuplicated.getEnzymes()) parentGraph.name = 'Conserved metabolism gene-duplicated enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Conserved metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def addedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of gene-duplicated enzymes, derived from the added core metabolism. First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`addedMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism) childGraph = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(addedECs) childGraph.name = 'Added metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return childGraph
[docs] def lostMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of gene-duplicated enzymes, derived from the lost core metabolism. First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`lostMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(lostECs) parentGraph.name = 'Lost metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return parentGraph
[docs] def divergedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs of gene-duplicated enzymes, derived from the diverged core metabolism. First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`divergedMetabolism`. colour : bool, optional If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour) parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is True: parentEdges = parentGeneDuplicated.getEdges() childEdges = childGeneDuplicated.getEdges() graph = divergedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = divergedMetabolismEnzymes[0].removeAllEnzymesExcept(parentGeneDuplicated.getEnzymes()) childGraph = divergedMetabolismEnzymes[1].removeAllEnzymesExcept(childGeneDuplicated.getEnzymes()) parentGraph.name = 'Diverged metabolism gene-duplicated enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Diverged metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def unifiedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of gene-duplicated enzymes, derived from the unified core metabolisms. The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEcGraph The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is False: graph = parentGeneDuplicated.union(childGeneDuplicated, addCount = False, updateName = False) else: unifiedMetabolismEnzymes = self.unifiedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = True) parentEdges = parentGeneDuplicated.getEdges() childEdges = childGeneDuplicated.getEdges() graph = unifiedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) return graph
### for enzyme pairs
[docs] def conservedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Tuple[Set[Tuple[Enzyme, Enzyme]]]: """ Pairs of gene-duplicated enzymes, derived from the conserved core metabolism. First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually. Then, for parent and child, the gene-duplicated enzyme pairs are calculated. Finally, the gene-duplicated enzymes where both enzymes are in the conserved core metabolism are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. Returns ------- Tuple[Set[Tuple[Enzyme, Enzyme]]] Tuple of two sets of tuples of gene-duplicated enzyme pairs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first set is from the parent clade, the second set from the child clade. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get conserved metabolism conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes() # get gene-duplicate enzyme pairs parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) # filter gene-duplicated enzyme pairs for the ones with both enzymes in the conserved metabolism parentGeneDuplicatedConserved = set() childGeneDuplicatedConserved = set() for enzymeTuple in parentGeneDuplicated: if enzymeTuple[0] in conservedMetabolismEnzymes and enzymeTuple[1] in conservedMetabolismEnzymes: parentGeneDuplicatedConserved.add(enzymeTuple) for enzymeTuple in childGeneDuplicated: if enzymeTuple[0] in conservedMetabolismEnzymes and enzymeTuple[1] in conservedMetabolismEnzymes: childGeneDuplicatedConserved.add(enzymeTuple) return (parentGeneDuplicatedConserved, childGeneDuplicatedConserved)
[docs] def addedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]: """ Pairs of gene-duplicated enzymes, derived from the added core metabolism. First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism. Then the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzyme pairs of the conserved core metabolism enzymes are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`addedMetabolism`. Returns ------- Set[Tuple[Enzyme, Enzyme]] Pairs of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get added metabolism addedMetabolismEnzymes = self.addedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes() # get gene-duplicated enzyme pairs geneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) # filter gene-duplicated enzyme pairs for the ones with both enzymes in the added metabolism geneDuplicatedAdded = set() for enzymeTuple in geneDuplicated: if enzymeTuple[0] in addedMetabolismEnzymes and enzymeTuple[1] in addedMetabolismEnzymes: geneDuplicatedAdded.add(enzymeTuple) return geneDuplicatedAdded
[docs] def lostMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]: """ Pairs of gene-duplicated enzymes, derived from the lost core metabolism. First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism. Then the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzyme pairs of the conserved core metabolism enzymes are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`lostMetabolism`. Returns ------- Set[Tuple[Enzyme, Enzyme]] Pairs of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get added metabolism lostMetabolismEnzymes = self.lostMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes() # get gene-duplicated enzyme pairs geneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) # filter gene-duplicated enzyme pairs for the ones with both enzymes in the lost metabolism geneDuplicatedLost = set() for enzymeTuple in geneDuplicated: if enzymeTuple[0] in lostMetabolismEnzymes and enzymeTuple[1] in lostMetabolismEnzymes: geneDuplicatedLost.add(enzymeTuple) return geneDuplicatedLost
[docs] def divergedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]: """ Pairs of gene-duplicated enzymes, derived from the diverged core metabolism. First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually. Then, for parent and child, the gene-duplicated enzyme pairs are calculated. Finally, the gene-duplicated enzymes where both enzymes are in the conserved core metabolism are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`divergedMetabolism`. colour : bool, optional If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Set[Tuple[Enzyme, Enzyme] Sets of tuples of gene-duplicated enzyme pairs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ # get diverged metabolism divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes() # get gene-duplicate enzyme pairs parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) # filter gene-duplicated enzyme pairs for the ones with both enzymes in the diverged metabolism parentGeneDuplicatedDiverged = set() childGeneDuplicatedDiverged = set() for enzymeTuple in parentGeneDuplicated: if enzymeTuple[0] in divergedMetabolismEnzymes and enzymeTuple[1] in divergedMetabolismEnzymes: parentGeneDuplicatedDiverged.add(enzymeTuple) for enzymeTuple in childGeneDuplicated: if enzymeTuple[0] in divergedMetabolismEnzymes and enzymeTuple[1] in divergedMetabolismEnzymes: childGeneDuplicatedDiverged.add(enzymeTuple) return parentGeneDuplicatedDiverged.union(childGeneDuplicatedDiverged)
[docs] def unifiedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]: """ Pairs of gene-duplicated enzymes, derived from the unified core metabolisms. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. Returns ------- Set[Tuple[Enzyme, Enzyme] Set of enzyme pairs representing the gene-duplicated enzymes of the combined metabolic networks of both, child and parent. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism) return parentGeneDuplicated.union(childGeneDuplicated)
# set-operations on neofunctionalised core metabolism ## for enzyme graphs
[docs] def conservedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs of neofunctionalised enzymes, derived from the conserved core metabolism. First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually. Then, for parent and child, the gene-duplicated enzymes are calculated. Then, the gene-duplicated enzymes of the conserved core metabolism enzymes are identified. Finally, the pairs of enzymes in which EC numbers differ are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the enzyme edges from the parent in blue, and from the child in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour) parentNeofunctionalised= self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is True: parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = conservedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Conserved metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = conservedMetabolismEnzymes[0].removeAllEnzymesExcept(parentNeofunctionalised.getEnzymes()) childGraph = conservedMetabolismEnzymes[1].removeAllEnzymesExcept(childNeofunctionalised.getEnzymes()) parentGraph.name = 'Conserved metabolism neofunctionalised enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Conserved metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def addedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of neofunctionalised enzymes, derived from the added core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`addedMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism) childGraph = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(addedECs) childGraph.name = 'Added metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return childGraph
[docs] def lostMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of neofunctionalised enzymes, derived from the lost core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`lostMetabolism`. Returns ------- SubstanceEnzymeGraph Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(lostECs) parentGraph.name = 'Lost metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return parentGraph
[docs] def divergedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False): """ Two Substance-Enzyme graphs of neofunctionalised enzymes, derived from the diverged core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`divergedMetabolism`. colour : bool, optional If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour) parentNeofunctionalised = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is True: parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = divergedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = divergedMetabolismEnzymes[0].removeAllEnzymesExcept(parentNeofunctionalised.getEnzymes()) childGraph = divergedMetabolismEnzymes[1].removeAllEnzymesExcept(childNeofunctionalised.getEnzymes()) parentGraph.name = 'Diverged metabolism neofunctionalised enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def unifiedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph: """ Substance-Enzyme graph of neofunctionalised enzymes, derived from the unified core metabolisms. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEcGraph The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentNeofunctionalised = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False) if colour is False: graph = parentNeofunctionalised.union(childNeofunctionalised, addCount = False, updateName = False) else: unifiedMetabolismEnzymes = self.unifiedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = True) parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = unifiedMetabolismEnzymes Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
## for EC graphs
[docs] def conservedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False): """ Two Substance-EC graphs of "neofunctionalised" EC numbers, derived from the conserved core metabolism. First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually. Then, for parent and child, the gene-duplicated enzymes are calculated. Then, the gene-duplicated enzymes of the conserved core metabolism enzymes are identified. Then, the pairs of enzymes in which EC numbers differ are identified. Finally, the EC numbers which are part of these function changes are reported. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the EC edges from the parent in blue, and from the child in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEcGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEcGraph, SubstanceEcGraph] or SubstanceEcGraph Tuple of two Substance-EC graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-EC graph. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ conservedMetabolism = self.conservedMetabolism(majorityPercentageCoreMetabolism) parentNeofunctionalised= self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) if colour is True: parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = conservedMetabolism Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Conserved metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = conservedMetabolism[0].removeAllECsExcept(parentNeofunctionalised.getECs()) childGraph = conservedMetabolism[1].removeAllECsExcept(childNeofunctionalised.getECs()) parentGraph.name = 'Conserved metabolism neofunctionalised ECs *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Conserved metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def addedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation) -> SubstanceEcGraph: """ Substance-EC graph of "neofunctionalised" EC numbers, derived from the added core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`addedMetabolism`. Returns ------- SubstanceEcGraph Substance-EC graph of ECs from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism) childGraph = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False).removeAllECsExcept(addedECs) childGraph.name = 'Added metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return childGraph
[docs] def lostMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation) -> SubstanceEcGraph: """ Substance-EC graph of "neofunctionalised" EC numbers, derived from the lost core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`lostMetabolism`. Returns ------- SubstanceEcGraph Substance-EC graph of ECs from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism) childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism) lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism) parentGraph = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False).removeAllECsExcept(lostECs) parentGraph.name = 'Lost metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return parentGraph
[docs] def divergedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False): """ Two Substance-EC graphs of "neofunctionalised" EC numbers, derived from the diverged core metabolism. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`divergedMetabolism`. colour : bool, optional If *True*, colours the lost EC edges in blue, and the added EC edges in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow. When doing so, a single :class:`SubstanceEcGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- Tuple[SubstanceEcGraph, SubstanceEcGraph] or SubstanceEcGraph Tuple of two Substance-EC graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade. If `colour` == *True*, returns a single Substance-EC graph, coloured blue for parent and red for child. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ divergedMetabolism = self.divergedMetabolism(majorityPercentageCoreMetabolism, colour = colour) parentNeofunctionalised = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) if colour is True: parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = divergedMetabolism Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph else: parentGraph = divergedMetabolism[0].removeAllECsExcept(parentNeofunctionalised.getECs()) childGraph = divergedMetabolism[1].removeAllECsExcept(childNeofunctionalised.getECs()) parentGraph.name = 'Diverged metabolism neofunctionalised ECs *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames) childGraph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*' return (parentGraph, childGraph)
[docs] def unifiedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False) -> SubstanceEcGraph: """ Substance-EC graph of "neofunctionalised" EC numbers, derived from the unified core metabolisms. Parameters ---------- majorityPercentageCoreMetabolism : int, optional See :func:`conservedMetabolism`. colour : bool, optional If *True*, colours the parent's EC edges in blue, and the child's EC edges in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured. Returns ------- SubstanceEcGraph The substance-EC graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured. Raises ------ TypeError If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`. HTTPError If fetching any of the underlying graphs fails. URLError If connection to KEGG fails. """ parentNeofunctionalised = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False) if colour is False: graph = parentNeofunctionalised.union(childNeofunctionalised, addCount = False, updateName = False) else: unifiedMetabolism = self.unifiedMetabolism(majorityPercentageCoreMetabolism, colour = True) parentEdges = parentNeofunctionalised.getEdges() childEdges = childNeofunctionalised.getEdges() graph = unifiedMetabolism Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges) Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges) graph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames) return graph
[docs]class NestedCladePair(CladePair): def __init__(self, parent, child, excludeUnclassified = defaultExcludeUnclassified): """ Two clades in NCBI taxonomy, 'child' is assumed younger and must be nested somewhere inside 'parent'. This only checks nestedness for the first node found in taxonomy, by the first parent's/child's NCBI name, respectively. The latter being relevant if you pass a :class:`Clade`, which has a list of NCBI names, or a list of NCBI names itself. Parameters ---------- parent : str or List[str] or Clade Path(s) of the parent clade's taxon, as defined by NCBI taxonomy, e.g. 'Proteobacteria/Gammaproteobacteria'. Or a ready :class:`Clade` object. child : str or List[str] or Clade Path(s) of the child clade's taxon, as defined by NCBI taxonomy, e.g. 'Enterobacter'. Or a ready :class:`Clade` object. excludeUnclassified : bool, optional If *True*, ignore taxons with a path containing the string 'unclassified'. Attributes ---------- self.childClade : :class:`Clade` self.parentClade : :class:`Clade` Raises ------ ValueError If parent or child are unknown taxons. Or if the child taxon is not actually a child of the parent taxon. """ # read first NCBI name from Clade object, if necessary if isinstance(parent, Clade): parentNCBIname = parent.ncbiNames[0] elif not isinstance(parent, str): # must be iterable, else fail parentNCBIname = parent[0] if isinstance(child, Clade): childNCBIname = child.ncbiNames[0] elif not isinstance(child, str): # must be iterable, else fail childNCBIname = child[0] # check if child is really a child of parent taxonomy = NCBI.getTaxonomy() parentNode = taxonomy.searchNodesByPath(parentNCBIname, exceptPaths=('unclassified' if excludeUnclassified else None)) if parentNode is None or len(parentNode) == 0: raise ValueError("No clade of this path found: " + parentNCBIname) else: # only consider first element parentNode = parentNode[0] childNode = taxonomy.searchNodesByPath(childNCBIname, exceptPaths=('unclassified' if excludeUnclassified else None)) if childNode is None or len(childNode) == 0: raise ValueError("No clade of this path found: " + childNCBIname) else: # only consider first element childNode = childNode[0] foundParent = False for ancestor in childNode.ancestors: if Taxonomy.nodePath2String(ancestor) == Taxonomy.nodePath2String(parentNode): foundParent = True break if foundParent == False: raise ValueError("Child is not a descendant of parent.") super().__init__(parent, child, excludeUnclassified)