Source code for FEV_KEGG.lib.Biopython.KEGG.KGML.KGML_parser

# Copyright 2013 by Leighton Pritchard.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Classes and functions to parse a KGML pathway map.

The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.

Classes:
 - KGMLParser - Parses KGML file

Functions:
 - read - Returns a single Pathway object, using KGMLParser internally

"""

from xml.etree import ElementTree

from io import StringIO

from FEV_KEGG.lib.Biopython.KEGG.KGML.KGML_pathway import Component, Entry, Graphics
from FEV_KEGG.lib.Biopython.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation


[docs]def read(handle): """Parse a single KEGG Pathway from given file handle. Returns a single Pathway object. There should be one and only one pathway in each file, but there may well be pathological examples out there. """ pathways = parse(handle) try: pathway = next(pathways) except StopIteration: raise ValueError("No pathways found in handle") from None try: next(pathways) raise ValueError("More than one pathway found in handle") except StopIteration: pass return pathway
[docs]def parse(handle): """Return an iterator over Pathway elements. Arguments: - handle - file handle to a KGML file for parsing, or a KGML string This is a generator for the return of multiple Pathway objects. """ # Check handle try: handle.read(0) except AttributeError: try: handle = StringIO(handle) except TypeError: raise TypeError( "An XML-containing handle or an XML string must be provided" ) from None # Parse XML and return each Pathway for event, elem in ElementTree.iterparse(handle, events=("start", "end")): if event == "end" and elem.tag == "pathway": yield KGMLParser(elem).parse() elem.clear()
[docs]class KGMLParser: """Parses a KGML XML Pathway entry into a Pathway object. Example: Read and parse large metabolism file >>> from FEV_KEGG.lib.Biopython.KEGG.KGML.KGML_parser import read >>> pathway = read(open('KEGG/ko01100.xml', 'r')) >>> print(len(pathway.entries)) 3628 >>> print(len(pathway.reactions)) 1672 >>> print(len(pathway.maps)) 149 >>> pathway = read(open('KEGG/ko00010.xml', 'r')) >>> print(pathway) #doctest: +NORMALIZE_WHITESPACE Pathway: Glycolysis / Gluconeogenesis KEGG ID: path:ko00010 Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png Organism: ko Entries: 99 Entry types: ortholog: 61 compound: 31 map: 7 """ def __init__(self, elem): """Initialize the class.""" self.entry = elem
[docs] def parse(self): """Parse the input elements.""" def _parse_pathway(attrib): for k, v in attrib.items(): self.pathway.__setattr__(k, v) def _parse_entry(element): new_entry = Entry() for k, v in element.attrib.items(): new_entry.__setattr__(k, v) for subelement in element: if subelement.tag == "graphics": _parse_graphics(subelement, new_entry) elif subelement.tag == "component": _parse_component(subelement, new_entry) self.pathway.add_entry(new_entry) def _parse_graphics(element, entry): new_graphics = Graphics(entry) for k, v in element.attrib.items(): new_graphics.__setattr__(k, v) entry.add_graphics(new_graphics) def _parse_component(element, entry): new_component = Component(entry) for k, v in element.attrib.items(): new_component.__setattr__(k, v) entry.add_component(new_component) def _parse_reaction(element): new_reaction = Reaction() for k, v in element.attrib.items(): new_reaction.__setattr__(k, v) for subelement in element: if subelement.tag == "substrate": new_reaction.add_substrate(int(subelement.attrib["id"])) elif subelement.tag == "product": new_reaction.add_product(int(subelement.attrib["id"])) self.pathway.add_reaction(new_reaction) def _parse_relation(element): new_relation = Relation() new_relation.entry1 = int(element.attrib["entry1"]) new_relation.entry2 = int(element.attrib["entry2"]) new_relation.type = element.attrib["type"] for subtype in element: name, value = subtype.attrib["name"], subtype.attrib["value"] if name in ("compound", "hidden compound"): new_relation.subtypes.append((name, int(value))) else: new_relation.subtypes.append((name, value)) self.pathway.add_relation(new_relation) # ========== # Initialize Pathway self.pathway = Pathway() # Get information about the pathway itself _parse_pathway(self.entry.attrib) for element in self.entry: if element.tag == "entry": _parse_entry(element) elif element.tag == "reaction": _parse_reaction(element) elif element.tag == "relation": _parse_relation(element) # # Parsing of some elements not implemented - no examples yet # else: # # This should warn us of any unimplemented tags # import warnings # from Bio import BiopythonParserWarning # # warnings.warn( # "Warning: tag %s not implemented in parser" % element.tag, # BiopythonParserWarning, # ) return self.pathway
# if __name__ == "__main__": # from Bio._utils import run_doctest # # run_doctest(verbose=0)