Source code for FEV_KEGG.Evolution.Clade
from FEV_KEGG.Graph.SubstanceGraphs import SubstanceEcGraph, SubstanceEnzymeGraph
from FEV_KEGG.Evolution.Taxonomy import NCBI, Taxonomy
from FEV_KEGG.KEGG.Organism import Group
from FEV_KEGG.Evolution.Events import GeneFunctionAddition, GeneFunctionLoss, GeneFunctionDivergence, GeneFunctionConservation, SimpleGeneDuplication,\
NeofunctionalisedECs, NeofunctionalisedEnzymes, Neofunctionalisation, FunctionChange
from FEV_KEGG import settings
from builtins import str
from FEV_KEGG.Drawing import Export
import math
from typing import Dict, Set, Tuple
from FEV_KEGG.Graph.Elements import Enzyme, GeneID, EcNumber
defaultExcludeUnclassified = True
"""
If *True*, ignore taxons with a path containing the string 'unclassified'.
This can be overridden in each relevant method's `excludeUnclassified` parameter in this module.
"""
defaultExcludeMultifunctionalEnzymes = settings.defaultNoMultifunctional
"""
If *True*, ignore enzymes with more than one EC number.
This can be overridden in each relevant method's `excludeMultifunctionalEnzymes` parameter in this module.
"""
defaultMajorityPercentageCoreMetabolism = 80
"""
Default percentage of organisms in the clade, which have to possess an EC number, for it to be included in the core metabolism of the clade.
See :func:`FEV_KEGG.KEGG.Organism.Group.majorityEcGraph`.
This can be overridden in each relevant method's `majorityPercentageCoreMetabolism` parameter in this module.
"""
defaultMajorityPercentageNeofunctionalisation = 0
"""
Default percentage of organisms in the clade, which have to possess the same "neofunctionalised" EC number, for it to be included in the set of "neofunctionalised" EC numbers of the clade.
See :class:`FEV_KEGG.KEGG.Evolution.Events.NeofunctionalisedECs`.
This can be overridden in each relevant method's `majorityPercentageNeofunctionalisation` parameter in this module.
"""
defaultEValue = settings.defaultEvalue
"""
Default threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
"""
defaultOneOrganismPerSpecies = settings.defaultOneOrganismPerSpecies
"""
Default descision whether to use only the first organism for each species in NCBI taxonomy.
"""
[docs]class Clade(object):
def __init__(self, ncbiNames: 'e.g. Enterobacter or Proteobacteria/Gammaproteobacteria. Allows list of names, e.g. ["Gammaproteobacteria", "/Archaea"]', excludeUnclassified = defaultExcludeUnclassified, oneOrganismPerSpecies = defaultOneOrganismPerSpecies):
"""
A clade in NCBI taxonomy, containing all leaf taxon's KEGG organisms.
Parameters
----------
ncbiNames : str or Iterable[str]
String(s) a taxon's path must contain to be included in this clade.
excludeUnclassified : bool, optional
If *True*, ignore taxons with a path containing the string 'unclassified'.
oneOrganismPerSpecies : bool, optional
If *True*, use only the first organism of each species.
Attributes
----------
self.ncbiNames : Iterable[str]
Part of the path of each leaf taxon to be included in this clade. A single string is wrapped in a list.
self.group
The :class:`FEV_KEGG.KEGG.Organism.Group` of KEGG organisms created from the found leaf taxons.
Raises
------
ValueError
If no clade with `ncbiNames` in its path could be found.
Warnings
--------
It is possible to include organisms of several clades in the same Clade object!
For example, if you were to search for `ncbiNames` == 'Donaldus Duckus', you would get every organism within '/Bacteria/Donaldus Duckus' **and** '/Archaea/Order/Donaldus Duckus'.
Use the slash (/) notation to make sure you only get the taxon you want, e.g. 'Proteobacteria/Gammaproteobacteria' or '/Archaea'.
"""
taxonomy = NCBI.getTaxonomy()
if isinstance(ncbiNames, str):
ncbiNames = [ncbiNames]
self.ncbiNames = ncbiNames
allOrganisms = set()
for ncbiName in ncbiNames:
organisms = taxonomy.getOrganismAbbreviationsByPath(ncbiName, exceptPaths=('unclassified' if excludeUnclassified else None), oneOrganismPerSpecies=oneOrganismPerSpecies)
if organisms is None or len(organisms) == 0:
raise ValueError("No clade of this path found: " + ncbiName)
allOrganisms.update(organisms)
self.group = Group( allOrganisms )
self._lastNeofunctionalisedEnzymesCache = None
self._lastGeneDuplicatedEnzymesMatches = None
[docs] def collectiveMetabolism(self, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes, addEcDescriptions = False) -> SubstanceEcGraph:
"""
The Substance-EC graph representing the collective metabolic network, occuring in any organism of the clade.
This includes each and every EC number which occurs in any organism of this clade.
Parameters
----------
excludeMultifunctionalEnzymes : bool, optional
If *True*, ignore enzymes with more than one EC number.
Returns
-------
SubstanceEcGraph
Collective metabolic network of EC numbers, including counts of occurence in each of the clade's organisms.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
graph = self.group.collectiveEcGraph(noMultifunctional = excludeMultifunctionalEnzymes, addCount = True, keepOnHeap = True, addEcDescriptions = addEcDescriptions)
graph.name = 'Collective metabolism ECs ' + ' '.join(self.ncbiNames)
return graph
[docs] def collectiveMetabolismEnzymes(self, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEnzymeGraph:
"""
The Substance-Enzyme graph representing the collective metabolic network, occuring in any organism of the clade.
This includes each and every enzyme of every organism of this clade.
Parameters
----------
excludeMultifunctionalEnzymes : bool, optional
If *True*, ignore enzymes with more than one EC number.
Returns
-------
SubstanceEnzymeGraph
Collective metabolic network of enzymes.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
graph = self.group.collectiveEnzymeGraph(noMultifunctional = excludeMultifunctionalEnzymes, keepOnHeap = True)
graph.name = 'Collective metabolism enzymes ' + ' '.join(self.ncbiNames)
return graph
[docs] def coreMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEcGraph:
"""
The Substance-EC graph representing the common metabolic network, shared among all organisms of the clade.
This includes only EC numbers which occur in at least `majorityPercentageCoreMetabolism` % of all organisms of this clade.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
A path (substance -> EC -> product) has to occur in `majorityPercentageCoreMetabolism` % of the clade's organisms to be included.
excludeMultifunctionalEnzymes : bool, optional
If *True*, ignore enzymes with more than one EC number.
Returns
-------
SubstanceEcGraph
Core metabolic network of EC numbers.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
graph = self.group.majorityEcGraph(majorityPercentage = majorityPercentageCoreMetabolism, noMultifunctional = excludeMultifunctionalEnzymes, keepOnHeap = True)
graph.name = 'Core metabolism ECs ' + ' '.join(self.ncbiNames)
return graph
[docs] def coreMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, excludeMultifunctionalEnzymes = defaultExcludeMultifunctionalEnzymes) -> SubstanceEnzymeGraph:
"""
The Substance-Enzyme graph representing the common metabolic network, shared among all organisms of the clade.
This includes every Enzyme associated with an EC number occuring in core metabolism (see :func:`substanceEcGraph`), no matter from which organism it stems.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
A path (substance -> EC -> product) has to occur in `majorityPercentageCoreMetabolism` % of the clade's organisms to be included.
excludeMultifunctionalEnzymes : bool, optional
If *True*, ignore enzymes with more than one EC number.
Returns
-------
SubstanceEnzymeGraph
Core metabolic network of enzymes.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
graph = self.group.collectiveEnzymeGraphByEcMajority(majorityPercentage = majorityPercentageCoreMetabolism, majorityTotal = None, noMultifunctional = excludeMultifunctionalEnzymes)
graph.name = 'Core metabolism Enzymes ' + ' '.join(self.ncbiNames)
return graph
@property
def organismsCount(self) -> int:
"""
The number of organisms (leaf taxons) in this clade.
Returns
-------
int
The number of organisms (leaf taxons) in this clade.
"""
return self.group.organismsCount
# gene duplication
[docs] def geneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph:
"""
The substance-Enzyme graph of all gene duplicated enzymes of the core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
colour : bool, optional
If *True*, colours the gene-duplicated enzyme edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Alternatively, you can specify a :class:`Export.Colour`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph containing all gene-duplicated enzymes, and nothing else.
If `colour` == *True*, returns the full core metabolism enzyme graph, colouring gene-duplicated enzymes green.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
geneDuplicationModel = SimpleGeneDuplication
# geneDuplicationModel = SimpleGroupGeneDuplication(sameGroupOrganisms = self.group)
# filter core metabolism enzyme graph
geneDuplicatedEnzymes = geneDuplicationModel.filterEnzymes(enzymeGraph, eValue = defaultEValue, ignoreDuplicatesOutsideSet = True, preCalculatedEnzymes = None)
# colour core metabolism
if colour is not False:
if colour is True:
colourToUse = Export.Colour.GREEN
else:
colourToUse = colour
geneDuplicatedEnzymesOnly = geneDuplicatedEnzymes
geneDuplicatedEnzymes = enzymeGraph
Export.addColourAttribute(geneDuplicatedEnzymes, colourToUse, nodes = False, edges = geneDuplicatedEnzymesOnly.getEdges())
geneDuplicatedEnzymes.name = 'Gene-duplicated core metabolism enzymes ' + ' '.join(self.ncbiNames)
return geneDuplicatedEnzymes
[docs] def geneDuplicatedEnzymesDict(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Dict[Enzyme, Set[GeneID]]:
"""
All gene duplicated enzymes of the core metabolism, pointing to all their duplicates.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
Returns
-------
Dict[Enzyme, Set[GeneID]]
Each gene ID on the right usually has an entry of its own, as an enzyme object, on the left, because they are each others homologs.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
geneDuplicationModel = SimpleGeneDuplication
geneIDsForEnzyme = geneDuplicationModel.getEnzymes(enzymeGraph, returnMatches = True, ignoreDuplicatesOutsideSet = True, preCalculatedEnzymes = None)
# if keepOnHeap is True:
# self._geneDuplicatedEnzymesObject = geneIDsForEnzyme
return geneIDsForEnzyme
[docs] def geneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]:
"""
All gene duplicated enzymes of the core metabolism, paired with each of their duplicates.
If enzyme A is a duplicate of enzyme B and vice versa, this does not return duplicates, but returns only one pair, with the "smaller" enzyme as the first value. An enzyme is "smaller" if its gene ID string is "smaller".
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
Returns
-------
Set[Tuple[Enzyme, Enzyme]]
Set of gene-duplicated enzymes, broken down into pairs of enzymes.
Can obviously create many duplicates left and right.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
enzymes = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes()
geneDuplicationModel = SimpleGeneDuplication
geneIdToEnzyme = dict()
for enzyme in enzymes:
geneIdToEnzyme[enzyme.geneID] = enzyme
enzymePairs = geneDuplicationModel.getEnzymePairs(enzymes, ignoreDuplicatesOutsideSet = True, geneIdToEnzyme = geneIdToEnzyme, preCalculatedEnzymes = None)
return enzymePairs
# neofunctionalisation
def _neofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism, eValue = defaultEValue, considerOnlyECs = None):
# check if the last calculation can be returned
if hasattr(self, '_lastNeofunctionalisedEnzymesCache') and self._lastNeofunctionalisedEnzymesCache is not None and considerOnlyECs is None:
lastMajorityPercentage, lastNeofunctionalisedEnzymes = self._lastNeofunctionalisedEnzymesCache
if lastMajorityPercentage == majorityPercentageCoreMetabolism:
return lastNeofunctionalisedEnzymes
else:
self._lastNeofunctionalisedEnzymesCache = None
# calculate
enzymes = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
if considerOnlyECs is not None:
enzymes.keepEnzymesByEC(considerOnlyECs)
enzymes = enzymes.getEnzymes()
geneDuplicationModel = SimpleGeneDuplication
# geneDuplicationModel = SimpleGroupGeneDuplication(sameGroupOrganisms = self.group)
neofunctionalisedEnzymes = NeofunctionalisedEnzymes(enzymes, geneDuplicationModel, eValue = eValue)
# Cache calculation
if considerOnlyECs is None:
self._lastNeofunctionalisedEnzymesCache = (majorityPercentageCoreMetabolism, neofunctionalisedEnzymes)
return neofunctionalisedEnzymes
[docs] def neofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False, eValue = defaultEValue, considerOnlyECs = None) -> SubstanceEnzymeGraph:
"""
The substance-Enzyme graph of all neofunctionalised enzymes of the core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
colour : bool, optional
If *True*, colours the neofunctionalised enzyme edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Alternatively, you can specify a :class:`Export.Colour`.
eValue : float, optional
Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
considerOnlyECs : Iterable[EcNumber], optional
If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph containing all neofunctionalised enzymes, and nothing else.
If `colour` == *True*, returns the full core metabolism enzyme graph, colouring neofunctionalised enzymes green.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get neofunctionalisations
neofunctionalisedEnzymes = self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs)
# filter core metabolism enzyme graph
enzymeGraph = self.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
neofunctionalisedMetabolism = neofunctionalisedEnzymes.filterGraph(enzymeGraph, minimumEcDifference = None)
# colour core metabolism
if colour is not False:
if colour is True:
colourToUse = Export.Colour.GREEN
else:
colourToUse = colour
neofunctionalisedMetabolismOnly = neofunctionalisedMetabolism
neofunctionalisedMetabolism = enzymeGraph
Export.addColourAttribute(neofunctionalisedMetabolism, colourToUse, nodes = False, edges = neofunctionalisedMetabolismOnly.getEdges())
neofunctionalisedMetabolism.name = 'Neofunctionalised core metabolism enzymes ' + ' '.join(self.ncbiNames)
return neofunctionalisedMetabolism
[docs] def neofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False, eValue = defaultEValue, considerOnlyECs = None) -> SubstanceEcGraph:
"""
The substance-EC graph of EC numbers belonging to function changes of neofunctionalised enzymes of the core metabolism.
Only EC numbers which could have actually taken part in a function change are reported. This is because enzymes can have multiple EC numbers, while only some might be eligible for a function change.
For example, consider enzyme A (1.2.3.4, 6.5.4.3) and enzyme B (1.2.3.4, 4.5.6.7). 1.2.3.4 can never change its function to itself, which leaves 1.2.3.4 <-> 6.5.4.3, 1.2.3.4 <-> 4.5.6.7, and 4.5.6.7 <-> 6.5.4.3 as possible function changes.
This obviously requires a function to change to a single other function, without splitting or merging, which might be biologically inacurate.
However, this should happen rarely, plus one could exclude all enzymes with multiple functions from the core metabolism in the first place.
The maximum expectation value (e-value) necessary for a sequence alignment to constitute a "similar sequence" can be changed via :attr:`defaultEValue`.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
majorityPercentageNeofunctionalisation : int, optional
Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms.
A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly;
or only recently, with not all organisms of the child clade having picked up the new function, yet.
colour : bool, optional
If *True*, colours the neofunctionalised EC edges in green. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Alternatively, you can specify a :class:`Export.Colour`.
eValue : float, optional
Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
considerOnlyECs : Iterable[EcNumber], optional
If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the metabolic network which was probably affected due to neofunctionalisations of the core metabolism of the clade.
If `colour` == *True*, returns the full union of parent and child, colouring neofunctionalised ECs green.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get neofunctionalisations
neofunctionalisedECs = NeofunctionalisedECs(self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs))
# filter core metabolism EC graph
coreMetabolism = self.coreMetabolism(majorityPercentageCoreMetabolism)
minimumOrganismsCount = math.ceil(self.organismsCount * (majorityPercentageNeofunctionalisation / 100))
neofunctionalisedMetabolism = neofunctionalisedECs.filterGraph(coreMetabolism, minimumEcDifference = None, minimumOrganismsCount = minimumOrganismsCount)
# colour core metabolism
if colour is not False:
if colour is True:
colourToUse = Export.Colour.GREEN
else:
colourToUse = colour
neofunctionalisedMetabolismOnly = neofunctionalisedMetabolism
neofunctionalisedMetabolism = coreMetabolism
Export.addColourAttribute(neofunctionalisedMetabolism, colourToUse, nodes = False, edges = neofunctionalisedMetabolismOnly.getEdges())
neofunctionalisedMetabolism.name = 'Neofunctionalised core metabolism ' + ' '.join(self.ncbiNames)
return neofunctionalisedMetabolism
[docs] def neofunctionalisations(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, eValue = defaultEValue, considerOnlyECs = None) -> Set[Neofunctionalisation]:
"""
Get neofunctionalisation events of all enzymes in the core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
eValue : float, optional
Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
considerOnlyECs : Iterable[EcNumber], optional
If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation.
Returns
-------
Set[Neofunctionalisation]
Set of possible neofunctionalisation events.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get neofunctionalisations
return self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs).getNeofunctionalisations()
[docs] def neofunctionalisationsForFunctionChange(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, eValue = defaultEValue, considerOnlyECs = None) -> Dict[FunctionChange, Set[Neofunctionalisation]]:
"""
Get neofunctionalisation events of all enzymes in the core metabolism, grouped by each possible function change event.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
majorityPercentageNeofunctionalisation : int, optional
Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms.
A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly;
or only recently, with not all organisms of the child clade having picked up the new function, yet.
eValue : float, optional
Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
considerOnlyECs : Iterable[EcNumber], optional
If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation.
Returns
-------
Dict[FunctionChange, Set[Neofunctionalisation]]
Dictionary of function changes, pointing to a set of neofunctionalisations which might have caused them.
Since an enzyme of a neofunctionalisation can have multiple EC numbers, all combinations of the two enzymes' EC numbers are formed and treated as separate possible function changes.
The neofunctionalisation is then saved again for each function change, which obviously leads to duplicated neofunctionalisation objects.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get neofunctionalisations
minimumOrganismsCount = math.ceil(self.organismsCount * (majorityPercentageNeofunctionalisation / 100))
return NeofunctionalisedECs(self._neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, eValue, considerOnlyECs)).getNeofunctionalisationsForFunctionChange(minimumOrganismsCount = minimumOrganismsCount)
# redundancy of neofunctionalisation
[docs] def redundantECsForContributingNeofunctionalisation(self,
majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism,
majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation,
eValue = defaultEValue,
redundancyType: 'RedundancyType' = None,
considerOnlyECs = None) -> Dict[Neofunctionalisation, Set[EcNumber]]:
"""
Get neofunctionalisation events of all enzymes in the core metabolism, which contribute to redundancy, pointing to the EC numbers their function changes' EC numbers provides redundancy for.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism.
majorityPercentageNeofunctionalisation : int, optional
Every EC number considered for neofunctionalisation has to be associated with a function change of neofunctionalisations whose enzymes involve at least `majorityPercentageNeofunctionalisation` % of of the clade's organisms.
A high `majorityPercentageNeofunctionalisation` disallows us to detect neofunctionalisations which happened a long time ago, with their genes having diverged significantly;
or only recently, with not all organisms of the child clade having picked up the new function, yet.
eValue : float, optional
Threshold for the statistical expectation value (E-value), below which a sequence alignment is considered significant.
redundancyType : RedundancyType
Definition of redundancy for which to check the neofunctionalisation's contribution. Default to `RedundancyType.default`.
considerOnlyECs : Iterable[EcNumber], optional
If given, only enzymes with an EC number in `considerOnlyECs` are tested for neofunctionalisation.
Returns
-------
Dict[FunctionChange, Set[Neofunctionalisation]]
Dictionary of function changes, pointing to a set of neofunctionalisations which might have caused them.
Since an enzyme of a neofunctionalisation can have multiple EC numbers, all combinations of the two enzymes' EC numbers are formed and treated as separate possible function changes.
The neofunctionalisation is then saved again for each function change, which obviously leads to duplicated neofunctionalisation objects.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
from FEV_KEGG.Robustness.Topology.Redundancy import Redundancy, RedundancyContribution, RedundancyType
if redundancyType is None:
redundancyType = RedundancyType.default
#- calculate "neofunctionalised" ECs
neofunctionalisedMetabolismSet = self.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, eValue, considerOnlyECs).getECs()
neofunctionalisationsForFunctionChange = self.neofunctionalisationsForFunctionChange(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, eValue, considerOnlyECs)
#- calculate redundancy
redundancy = Redundancy( self.coreMetabolism(majorityPercentageCoreMetabolism) )
redundancyContribution = RedundancyContribution(redundancy, neofunctionalisedMetabolismSet)
contributedECsForContributingNeofunctionalisedEC = redundancyContribution.getContributedKeysForSpecial(redundancyType)
contributingNeofunctionalisedECs = set(contributedECsForContributingNeofunctionalisedEC.keys())
#- REPEAT for each function change consisting of "neofunctionalised" ECs, which also contribute to redundancy
contributingNeofunctionalisations = dict()
for functionChange, neofunctionalisations in neofunctionalisationsForFunctionChange.items():
#- report enzyme pairs of neofunctionalisations, which caused the EC to be considered "neofunctionalised", and are in return contributing to redundancy
if functionChange.ecA in contributingNeofunctionalisedECs or functionChange.ecB in contributingNeofunctionalisedECs: # function change contributes to redundancy
for neofunctionalisation in neofunctionalisations:
currentSetOfContributedECs = contributingNeofunctionalisations.get(neofunctionalisation, None)
if currentSetOfContributedECs is None:
currentSetOfContributedECs = set()
contributingNeofunctionalisations[neofunctionalisation] = currentSetOfContributedECs
for ec in functionChange.ecPair:
contributedECs = contributedECsForContributingNeofunctionalisedEC.get(ec, None)
if contributedECs is not None:
currentSetOfContributedECs.update(contributedECs)
return contributingNeofunctionalisations
[docs]class CladePair(object):
def __init__(self, parent, child, excludeUnclassified = defaultExcludeUnclassified, oneOrganismPerSpecies = defaultOneOrganismPerSpecies):
"""
Two clades in NCBI taxonomy, 'child' is assumed younger than 'parent'.
Does not check if the child taxon is actually a child of the parent taxon.
Therefore, it would be possible to pass a list of NCBI names to the underlying :class:`Clade` objects by instantiating `parent` = List[str] and/or `child` = List[str].
This is useful when comparing groups of organisms which are, according to NCBI, not related.
Parameters
----------
parent : str or List[str] or Clade
Path(s) of the parent clade's taxon, as defined by NCBI taxonomy, e.g. 'Proteobacteria/Gammaproteobacteria'. Or a ready :class:`Clade` object.
child : str or List[str] or Clade
Path(s) of the child clade's taxon, as defined by NCBI taxonomy, e.g. 'Enterobacter'. Or a ready :class:`Clade` object.
excludeUnclassified : bool, optional
If *True*, ignore taxons with a path containing the string 'unclassified'. Only used if one of `parent` and/or `child` is not already a :class:`Clade`.
oneOrganismPerSpecies : bool, optional
If *True*, use only the first organism of each species.
Attributes
----------
self.childClade : :class:`Clade`
self.parentClade : :class:`Clade`
"""
# read NCBI names from Clade object, if necessary
if isinstance(parent, Clade):
self.parentClade = parent
else:
self.parentClade = Clade(parent, excludeUnclassified, oneOrganismPerSpecies=oneOrganismPerSpecies)
if isinstance(child, Clade):
self.childClade = child
else:
self.childClade = Clade(child, excludeUnclassified, oneOrganismPerSpecies=oneOrganismPerSpecies)
@property
def parentNCBInames(self):
"""
All names/paths in NCBI taxonomy used to create the parent clade.
"""
return self.parentClade.ncbiNames
@property
def childNCBInames(self):
"""
All names/paths in NCBI taxonomy used to create the child clade.
"""
return self.childClade.ncbiNames
# set-operations on core metabolism
## for EC graphs
[docs] def conservedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph:
"""
Substance-EC graph of the conserved core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade.
The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism.
Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the metabolic network which stayed the same between the core metabolism of the parent (assumed older) and the core metabolism of the child (assumed younger).
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
graph = GeneFunctionConservation.getGraph(parentCoreMetabolism, childCoreMetabolism)
graph.name = 'Conserved metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
[docs] def addedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph:
"""
Substance-EC graph of the added core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade.
The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism.
Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the metabolic network which was added to the core metabolism of the parent (assumed older) on the way to the core metabolism of the child (assumed younger).
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
graph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism)
graph.name = 'Added metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
[docs] def lostMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEcGraph:
"""
Substance-EC graph of the lost core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade.
The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism.
Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the metabolic network which got lost from the core metabolism of the parent (assumed older) on the way to the core metabolism of the child (assumed younger).
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
graph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism)
graph.name = 'Lost metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
[docs] def divergedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEcGraph:
"""
Substance-EC graph of the diverged core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
Every substance-EC-product edge has to occur in `majorityPercentageCoreMetabolism` % of organisms constituting the clade, to be included in the core metabolism. This is individually true for both parent clade and child clade.
The parent clade fully includes the child clade, therefore, the occurence of a substance-EC-product edge in the child clade's core metabolism counts towards the percentage for the parent clade's core metabolism.
Meaning: if an EC number does not occur in the child clade's core metabolism, it is unlikely that it will occur in the parent clade's core metabolism, unless `majorityPercentageCoreMetabolism` is consecutively lowered towards 0.
colour : bool, optional
If *True*, colours the lost EC edges in blue, and the added EC edges in red. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the metabolic network which changed between the core metabolism of the parent (assumed older) and the core metabolism of the child (assumed younger).
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
if colour is True:
lostGraph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism)
lostEdges = lostGraph.getEdges()
addedGraph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism)
addedEdges = addedGraph.getEdges()
graph = lostGraph.union(addedGraph, addCount = False, updateName = False)
Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = lostEdges)
Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = addedEdges)
else:
graph = GeneFunctionDivergence.getGraph(parentCoreMetabolism, childCoreMetabolism)
graph.name = 'Diverged metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
[docs] def unifiedMetabolism(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEcGraph:
"""
Substance-EC graph of the unified core metabolisms.
The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the parent's EC edges in blue, the child's EC edges in red, and the shared EC edges in pink. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
See Also
--------
:mod:`FEV_KEGG.Drawing.Export` : Export the graph into a file, e.g. for visualisation in Cytoscape.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
graph = parentCoreMetabolism.union(childCoreMetabolism, addCount = False, updateName = False)
graph.name = 'Unified metabolism ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
if colour is True:
lostGraph = GeneFunctionLoss.getGraph(parentCoreMetabolism, childCoreMetabolism)
lostEdges = lostGraph.getEdges()
addedGraph = GeneFunctionAddition.getGraph(parentCoreMetabolism, childCoreMetabolism)
addedEdges = addedGraph.getEdges()
conservedGraph = GeneFunctionConservation.getGraph(parentCoreMetabolism, childCoreMetabolism)
conservedEdges = conservedGraph.getEdges()
Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = lostEdges)
Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = addedEdges)
Export.addColourAttribute(graph, colour = Export.Colour.PINK, nodes = False, edges = conservedEdges)
return graph
## for enzyme graphs
[docs] def conservedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs derived from the conserved core metabolism, see :func:`conservedMetabolism`.
First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the enzyme edges from the parent in blue, and from the child in red. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
conservedECs = GeneFunctionConservation.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(conservedECs)
childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(conservedECs)
if colour is True:
parentEdges = parentGraph.getEdges()
childEdges = childGraph.getEdges()
graph = parentGraph.union(childGraph, addCount = False, updateName = False)
Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges)
graph.name = 'Conserved metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph.name = 'Conserved metabolism enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Conserved metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def addedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph derived from the added core metabolism, see :func:`addedMetabolism`.
First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`addedMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism)
childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(addedECs)
childGraph.name = 'Added metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return childGraph
[docs] def lostMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph derived from the lost core metabolism, see :func:`lostMetabolism`.
First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`lostMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(lostECs)
parentGraph.name = 'Lost metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return parentGraph
[docs] def divergedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs derived from the diverged core metabolism, see :func:`divergedMetabolism`.
First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`divergedMetabolism`.
colour : bool, optional
If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
divergedECs = GeneFunctionDivergence.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.collectiveMetabolismEnzymes().keepEnzymesByEC(divergedECs)
childGraph = self.childClade.collectiveMetabolismEnzymes().keepEnzymesByEC(divergedECs)
if colour is True:
parentEdges = parentGraph.getEdges()
childEdges = childGraph.getEdges()
graph = parentGraph.union(childGraph, addCount = False, updateName = False)
Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph.name = 'Diverged metabolism enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Diverged metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def unifiedMetabolismEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph derived from the unified core metabolisms.
The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEnzymeGraph
The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentGraph = self.parentClade.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
childGraph = self.childClade.coreMetabolismEnzymes(majorityPercentageCoreMetabolism)
graph = parentGraph.union(childGraph, addCount = False, updateName = False)
graph.name = 'Unified metabolism enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
if colour is True:
parentEdges = parentGraph.getEdges()
childEdges = childGraph.getEdges()
Export.addColourAttribute(graph, colour = Export.Colour.BLUE, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.RED, nodes = False, edges = childEdges)
return graph
# set-operations on gene-duplicated core metabolism
## for enzymes
### for enzyme graphs
[docs] def conservedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs of gene-duplicated enzymes, derived from the conserved core metabolism.
First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually.
Then, for parent and child, the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzymes of the conserved core metabolism enzymes are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the enzyme edges from the parent in blue, and from the child in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour)
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is True:
parentEdges = parentGeneDuplicated.getEdges()
childEdges = childGeneDuplicated.getEdges()
graph = conservedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Conserved metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = conservedMetabolismEnzymes[0].removeAllEnzymesExcept(parentGeneDuplicated.getEnzymes())
childGraph = conservedMetabolismEnzymes[1].removeAllEnzymesExcept(childGeneDuplicated.getEnzymes())
parentGraph.name = 'Conserved metabolism gene-duplicated enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Conserved metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def addedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of gene-duplicated enzymes, derived from the added core metabolism.
First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`addedMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism)
childGraph = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(addedECs)
childGraph.name = 'Added metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return childGraph
[docs] def lostMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of gene-duplicated enzymes, derived from the lost core metabolism.
First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`lostMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(lostECs)
parentGraph.name = 'Lost metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return parentGraph
[docs] def divergedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs of gene-duplicated enzymes, derived from the diverged core metabolism.
First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`divergedMetabolism`.
colour : bool, optional
If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour)
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is True:
parentEdges = parentGeneDuplicated.getEdges()
childEdges = childGeneDuplicated.getEdges()
graph = divergedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = divergedMetabolismEnzymes[0].removeAllEnzymesExcept(parentGeneDuplicated.getEnzymes())
childGraph = divergedMetabolismEnzymes[1].removeAllEnzymesExcept(childGeneDuplicated.getEnzymes())
parentGraph.name = 'Diverged metabolism gene-duplicated enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Diverged metabolism gene-duplicated enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def unifiedMetabolismGeneDuplicatedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of gene-duplicated enzymes, derived from the unified core metabolisms.
The lost metabolism of the parent is coloured in blue, the conserved metabolism of both in red, and the added metabolism of the child in pink.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEcGraph
The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is False:
graph = parentGeneDuplicated.union(childGeneDuplicated, addCount = False, updateName = False)
else:
unifiedMetabolismEnzymes = self.unifiedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = True)
parentEdges = parentGeneDuplicated.getEdges()
childEdges = childGeneDuplicated.getEdges()
graph = unifiedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
return graph
### for enzyme pairs
[docs] def conservedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Tuple[Set[Tuple[Enzyme, Enzyme]]]:
"""
Pairs of gene-duplicated enzymes, derived from the conserved core metabolism.
First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually.
Then, for parent and child, the gene-duplicated enzyme pairs are calculated. Finally, the gene-duplicated enzymes where both enzymes are in the conserved core metabolism are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
Returns
-------
Tuple[Set[Tuple[Enzyme, Enzyme]]]
Tuple of two sets of tuples of gene-duplicated enzyme pairs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first set is from the parent clade, the second set from the child clade.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get conserved metabolism
conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes()
# get gene-duplicate enzyme pairs
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
# filter gene-duplicated enzyme pairs for the ones with both enzymes in the conserved metabolism
parentGeneDuplicatedConserved = set()
childGeneDuplicatedConserved = set()
for enzymeTuple in parentGeneDuplicated:
if enzymeTuple[0] in conservedMetabolismEnzymes and enzymeTuple[1] in conservedMetabolismEnzymes:
parentGeneDuplicatedConserved.add(enzymeTuple)
for enzymeTuple in childGeneDuplicated:
if enzymeTuple[0] in conservedMetabolismEnzymes and enzymeTuple[1] in conservedMetabolismEnzymes:
childGeneDuplicatedConserved.add(enzymeTuple)
return (parentGeneDuplicatedConserved, childGeneDuplicatedConserved)
[docs] def addedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]:
"""
Pairs of gene-duplicated enzymes, derived from the added core metabolism.
First, the added core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the child's enzyme metabolism.
Then the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzyme pairs of the conserved core metabolism enzymes are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`addedMetabolism`.
Returns
-------
Set[Tuple[Enzyme, Enzyme]]
Pairs of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get added metabolism
addedMetabolismEnzymes = self.addedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes()
# get gene-duplicated enzyme pairs
geneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
# filter gene-duplicated enzyme pairs for the ones with both enzymes in the added metabolism
geneDuplicatedAdded = set()
for enzymeTuple in geneDuplicated:
if enzymeTuple[0] in addedMetabolismEnzymes and enzymeTuple[1] in addedMetabolismEnzymes:
geneDuplicatedAdded.add(enzymeTuple)
return geneDuplicatedAdded
[docs] def lostMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]:
"""
Pairs of gene-duplicated enzymes, derived from the lost core metabolism.
First, the lost core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the parent's enzyme metabolism.
Then the gene-duplicated enzymes are calculated. Finally, the gene-duplicated enzyme pairs of the conserved core metabolism enzymes are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`lostMetabolism`.
Returns
-------
Set[Tuple[Enzyme, Enzyme]]
Pairs of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get added metabolism
lostMetabolismEnzymes = self.lostMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes()
# get gene-duplicated enzyme pairs
geneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
# filter gene-duplicated enzyme pairs for the ones with both enzymes in the lost metabolism
geneDuplicatedLost = set()
for enzymeTuple in geneDuplicated:
if enzymeTuple[0] in lostMetabolismEnzymes and enzymeTuple[1] in lostMetabolismEnzymes:
geneDuplicatedLost.add(enzymeTuple)
return geneDuplicatedLost
[docs] def divergedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]:
"""
Pairs of gene-duplicated enzymes, derived from the diverged core metabolism.
First, the diverged core metabolism is calculated. Then, the enzymes associated with the added EC numbers are extracted from the collective parent's and child's metabolism individually.
Then, for parent and child, the gene-duplicated enzyme pairs are calculated. Finally, the gene-duplicated enzymes where both enzymes are in the conserved core metabolism are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`divergedMetabolism`.
colour : bool, optional
If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Gene-duplicated enzyme edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Set[Tuple[Enzyme, Enzyme]
Sets of tuples of gene-duplicated enzyme pairs calculated using the diverged EC numbers found by :func:`divergedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
# get diverged metabolism
divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism).getEnzymes()
# get gene-duplicate enzyme pairs
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
# filter gene-duplicated enzyme pairs for the ones with both enzymes in the diverged metabolism
parentGeneDuplicatedDiverged = set()
childGeneDuplicatedDiverged = set()
for enzymeTuple in parentGeneDuplicated:
if enzymeTuple[0] in divergedMetabolismEnzymes and enzymeTuple[1] in divergedMetabolismEnzymes:
parentGeneDuplicatedDiverged.add(enzymeTuple)
for enzymeTuple in childGeneDuplicated:
if enzymeTuple[0] in divergedMetabolismEnzymes and enzymeTuple[1] in divergedMetabolismEnzymes:
childGeneDuplicatedDiverged.add(enzymeTuple)
return parentGeneDuplicatedDiverged.union(childGeneDuplicatedDiverged)
[docs] def unifiedMetabolismGeneDuplicatedEnzymePairs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> Set[Tuple[Enzyme, Enzyme]]:
"""
Pairs of gene-duplicated enzymes, derived from the unified core metabolisms.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
Returns
-------
Set[Tuple[Enzyme, Enzyme]
Set of enzyme pairs representing the gene-duplicated enzymes of the combined metabolic networks of both, child and parent.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentGeneDuplicated = self.parentClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
childGeneDuplicated = self.childClade.geneDuplicatedEnzymePairs(majorityPercentageCoreMetabolism)
return parentGeneDuplicated.union(childGeneDuplicated)
# set-operations on neofunctionalised core metabolism
## for enzyme graphs
[docs] def conservedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs of neofunctionalised enzymes, derived from the conserved core metabolism.
First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually.
Then, for parent and child, the gene-duplicated enzymes are calculated. Then, the gene-duplicated enzymes of the conserved core metabolism enzymes are identified.
Finally, the pairs of enzymes in which EC numbers differ are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the enzyme edges from the parent in blue, and from the child in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
conservedMetabolismEnzymes = self.conservedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour)
parentNeofunctionalised= self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is True:
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = conservedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Conserved metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = conservedMetabolismEnzymes[0].removeAllEnzymesExcept(parentNeofunctionalised.getEnzymes())
childGraph = conservedMetabolismEnzymes[1].removeAllEnzymesExcept(childNeofunctionalised.getEnzymes())
parentGraph.name = 'Conserved metabolism neofunctionalised enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Conserved metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def addedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of neofunctionalised enzymes, derived from the added core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`addedMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism)
childGraph = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(addedECs)
childGraph.name = 'Added metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return childGraph
[docs] def lostMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of neofunctionalised enzymes, derived from the lost core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`lostMetabolism`.
Returns
-------
SubstanceEnzymeGraph
Substance-Enzyme graph of enzymes from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False).keepEnzymesByEC(lostECs)
parentGraph.name = 'Lost metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return parentGraph
[docs] def divergedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False):
"""
Two Substance-Enzyme graphs of neofunctionalised enzymes, derived from the diverged core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`divergedMetabolism`.
colour : bool, optional
If *True*, colours the lost enzyme edges in blue, and the added enzyme edges in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEnzymeGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEnzymeGraph, SubstanceEnzymeGraph] or SubstanceEnzymeGraph
Tuple of two Substance-Enzyme graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-Enzyme graph, coloured blue for parent and red for child.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
divergedMetabolismEnzymes = self.divergedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = colour)
parentNeofunctionalised = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is True:
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = divergedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = divergedMetabolismEnzymes[0].removeAllEnzymesExcept(parentNeofunctionalised.getEnzymes())
childGraph = divergedMetabolismEnzymes[1].removeAllEnzymesExcept(childNeofunctionalised.getEnzymes())
parentGraph.name = 'Diverged metabolism neofunctionalised enzymes *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def unifiedMetabolismNeofunctionalisedEnzymes(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, colour = False) -> SubstanceEnzymeGraph:
"""
Substance-Enzyme graph of neofunctionalised enzymes, derived from the unified core metabolisms.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the parent's enzyme edges in blue, and the child's enzyme edges in red. Neofunctionalised enzyme edges of the parent are coloured in green, the ones of the child in yellow.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEcGraph
The substance-Enzyme graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentNeofunctionalised = self.parentClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedEnzymes(majorityPercentageCoreMetabolism, colour = False)
if colour is False:
graph = parentNeofunctionalised.union(childNeofunctionalised, addCount = False, updateName = False)
else:
unifiedMetabolismEnzymes = self.unifiedMetabolismEnzymes(majorityPercentageCoreMetabolism, colour = True)
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = unifiedMetabolismEnzymes
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism neofunctionalised enzymes ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
## for EC graphs
[docs] def conservedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False):
"""
Two Substance-EC graphs of "neofunctionalised" EC numbers, derived from the conserved core metabolism.
First, the conserved core metabolism is calculated. Then, the enzymes associated with the conserved EC numbers are extracted from the collective parent's and child's metabolism individually.
Then, for parent and child, the gene-duplicated enzymes are calculated. Then, the gene-duplicated enzymes of the conserved core metabolism enzymes are identified.
Then, the pairs of enzymes in which EC numbers differ are identified. Finally, the EC numbers which are part of these function changes are reported.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the EC edges from the parent in blue, and from the child in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEcGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEcGraph, SubstanceEcGraph] or SubstanceEcGraph
Tuple of two Substance-EC graphs calculated using the conserved EC numbers found by :func:`conservedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-EC graph.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
conservedMetabolism = self.conservedMetabolism(majorityPercentageCoreMetabolism)
parentNeofunctionalised= self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
if colour is True:
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = conservedMetabolism
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Conserved metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = conservedMetabolism[0].removeAllECsExcept(parentNeofunctionalised.getECs())
childGraph = conservedMetabolism[1].removeAllECsExcept(childNeofunctionalised.getECs())
parentGraph.name = 'Conserved metabolism neofunctionalised ECs *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Conserved metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def addedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation) -> SubstanceEcGraph:
"""
Substance-EC graph of "neofunctionalised" EC numbers, derived from the added core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`addedMetabolism`.
Returns
-------
SubstanceEcGraph
Substance-EC graph of ECs from the child clade. Calculated using the added EC numbers found by :func:`addedMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
addedECs = GeneFunctionAddition.getECs(parentCoreMetabolism, childCoreMetabolism)
childGraph = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False).removeAllECsExcept(addedECs)
childGraph.name = 'Added metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return childGraph
[docs] def lostMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation) -> SubstanceEcGraph:
"""
Substance-EC graph of "neofunctionalised" EC numbers, derived from the lost core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`lostMetabolism`.
Returns
-------
SubstanceEcGraph
Substance-EC graph of ECs from the parent clade. Calculated using the lost EC numbers found by :func:`lostMetabolism`.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentCoreMetabolism = self.parentClade.coreMetabolism(majorityPercentageCoreMetabolism)
childCoreMetabolism = self.childClade.coreMetabolism(majorityPercentageCoreMetabolism)
lostECs = GeneFunctionLoss.getECs(parentCoreMetabolism, childCoreMetabolism)
parentGraph = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False).removeAllECsExcept(lostECs)
parentGraph.name = 'Lost metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return parentGraph
[docs] def divergedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False):
"""
Two Substance-EC graphs of "neofunctionalised" EC numbers, derived from the diverged core metabolism.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`divergedMetabolism`.
colour : bool, optional
If *True*, colours the lost EC edges in blue, and the added EC edges in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow.
When doing so, a single :class:`SubstanceEcGraph` is returned, not a :class:`Tuple`. The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
Tuple[SubstanceEcGraph, SubstanceEcGraph] or SubstanceEcGraph
Tuple of two Substance-EC graphs calculated using the diverged EC numbers found by :func:`divergedMetabolism`. The first graph is from the parent clade, the second graph from the child clade.
If `colour` == *True*, returns a single Substance-EC graph, coloured blue for parent and red for child.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
divergedMetabolism = self.divergedMetabolism(majorityPercentageCoreMetabolism, colour = colour)
parentNeofunctionalised = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
if colour is True:
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = divergedMetabolism
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
else:
parentGraph = divergedMetabolism[0].removeAllECsExcept(parentNeofunctionalised.getECs())
childGraph = divergedMetabolism[1].removeAllECsExcept(childNeofunctionalised.getECs())
parentGraph.name = 'Diverged metabolism neofunctionalised ECs *' + ' '.join(self.parentNCBInames) + '* -> ' + ' '.join(self.childNCBInames)
childGraph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> *' + ' '.join(self.childNCBInames) + '*'
return (parentGraph, childGraph)
[docs] def unifiedMetabolismNeofunctionalisedECs(self, majorityPercentageCoreMetabolism = defaultMajorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation = defaultMajorityPercentageNeofunctionalisation, colour = False) -> SubstanceEcGraph:
"""
Substance-EC graph of "neofunctionalised" EC numbers, derived from the unified core metabolisms.
Parameters
----------
majorityPercentageCoreMetabolism : int, optional
See :func:`conservedMetabolism`.
colour : bool, optional
If *True*, colours the parent's EC edges in blue, and the child's EC edges in red. "Neofunctionalised" EC edges of the parent are coloured in green, the ones of the child in yellow.
The colouring is realised by adding a 'colour' attribute to each edge. Nodes are not coloured.
Returns
-------
SubstanceEcGraph
The substance-EC graph representing the combined metabolic networks of both, child and parent. If `colour` == *True*, coloured differently for the lost, conserved, and added edges. Nodes are not coloured.
Raises
------
TypeError
If you failed to enable :attr:`FEV_KEGG.settings.automaticallyStartProcessPool` or to provide a :attr:`FEV_KEGG.Util.Parallelism.processPool`. See :func:`FEV_KEGG.KEGG.Organism.Group._getGraphsParallelly`.
HTTPError
If fetching any of the underlying graphs fails.
URLError
If connection to KEGG fails.
"""
parentNeofunctionalised = self.parentClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
childNeofunctionalised = self.childClade.neofunctionalisedECs(majorityPercentageCoreMetabolism, majorityPercentageNeofunctionalisation, colour = False)
if colour is False:
graph = parentNeofunctionalised.union(childNeofunctionalised, addCount = False, updateName = False)
else:
unifiedMetabolism = self.unifiedMetabolism(majorityPercentageCoreMetabolism, colour = True)
parentEdges = parentNeofunctionalised.getEdges()
childEdges = childNeofunctionalised.getEdges()
graph = unifiedMetabolism
Export.addColourAttribute(graph, colour = Export.Colour.GREEN, nodes = False, edges = parentEdges)
Export.addColourAttribute(graph, colour = Export.Colour.YELLOW, nodes = False, edges = childEdges)
graph.name = 'Diverged metabolism neofunctionalised ECs ' + ' '.join(self.parentNCBInames) + ' -> ' + ' '.join(self.childNCBInames)
return graph
[docs]class NestedCladePair(CladePair):
def __init__(self, parent, child, excludeUnclassified = defaultExcludeUnclassified):
"""
Two clades in NCBI taxonomy, 'child' is assumed younger and must be nested somewhere inside 'parent'.
This only checks nestedness for the first node found in taxonomy, by the first parent's/child's NCBI name, respectively. The latter being relevant if you pass a :class:`Clade`, which has a list of NCBI names, or a list of NCBI names itself.
Parameters
----------
parent : str or List[str] or Clade
Path(s) of the parent clade's taxon, as defined by NCBI taxonomy, e.g. 'Proteobacteria/Gammaproteobacteria'. Or a ready :class:`Clade` object.
child : str or List[str] or Clade
Path(s) of the child clade's taxon, as defined by NCBI taxonomy, e.g. 'Enterobacter'. Or a ready :class:`Clade` object.
excludeUnclassified : bool, optional
If *True*, ignore taxons with a path containing the string 'unclassified'.
Attributes
----------
self.childClade : :class:`Clade`
self.parentClade : :class:`Clade`
Raises
------
ValueError
If parent or child are unknown taxons. Or if the child taxon is not actually a child of the parent taxon.
"""
# read first NCBI name from Clade object, if necessary
if isinstance(parent, Clade):
parentNCBIname = parent.ncbiNames[0]
elif not isinstance(parent, str):
# must be iterable, else fail
parentNCBIname = parent[0]
if isinstance(child, Clade):
childNCBIname = child.ncbiNames[0]
elif not isinstance(child, str):
# must be iterable, else fail
childNCBIname = child[0]
# check if child is really a child of parent
taxonomy = NCBI.getTaxonomy()
parentNode = taxonomy.searchNodesByPath(parentNCBIname, exceptPaths=('unclassified' if excludeUnclassified else None))
if parentNode is None or len(parentNode) == 0:
raise ValueError("No clade of this path found: " + parentNCBIname)
else: # only consider first element
parentNode = parentNode[0]
childNode = taxonomy.searchNodesByPath(childNCBIname, exceptPaths=('unclassified' if excludeUnclassified else None))
if childNode is None or len(childNode) == 0:
raise ValueError("No clade of this path found: " + childNCBIname)
else: # only consider first element
childNode = childNode[0]
foundParent = False
for ancestor in childNode.ancestors:
if Taxonomy.nodePath2String(ancestor) == Taxonomy.nodePath2String(parentNode):
foundParent = True
break
if foundParent == False:
raise ValueError("Child is not a descendant of parent.")
super().__init__(parent, child, excludeUnclassified)