ontology_parser.rb revision 2715ba04b8716707c2d24d9ce495425eb9a5c189
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozekmodule OntologyParser
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek class ParseException < Exception; end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # Parses the given string and executes the callback for each symbol
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def self.parse(input, callbacks)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # Create a new parser
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek parser = Nokogiri::XML::SAX::Parser.new(Listener.new(callbacks))
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # Feed the parser some XML
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek parser.parse(input)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # Listener for the SAX Parser
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek class Listener < Nokogiri::XML::SAX::Document
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek MAP = "map"
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek ROOT = 'DGraph'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek ONTOLOGY = 'DGNode'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek SYMBOL = 'Symbol'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek AXIOM = 'Axiom'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek IMPAXIOMS = 'ImpAxioms'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek AXIOMS = 'Axioms'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek LINK = 'DGLink'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek TEXT = 'Text'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek TYPE = 'Type'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek MORPHISM = 'GMorphism'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek IMPORT = 'Reference'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # the callback function is called for each Symbol tag
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def initialize(callbacks)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @callbacks = callbacks
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @path = []
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_ontology = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_symbol = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_link = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @in_imp_axioms = false
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek end
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek # a tag
a95c006f748fa9df0dd81509b51974133d2786afLukas Slebodnik def start_element(name, attributes)
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek @path << name
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek case name
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when ROOT
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:root, Hash[*[attributes]])
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when ONTOLOGY
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:ontology, Hash[*[attributes]])
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when IMPORT
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:import, Hash[*[attributes]])
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when SYMBOL
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_symbol = Hash[*[attributes]]
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_symbol['text'] = ''
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek if @current_link && @current_link['map']
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek @current_link['map'] << @current_symbol
5a4239490c7fb7d732180a9d40f27f0247c56631Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['symbol_hashes'] << @current_symbol if @current_axiom
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when IMPAXIOMS
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @in_imp_axioms = true
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when AXIOMS
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @in_axioms = true
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when AXIOM
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom = Hash[*[attributes]]
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['symbols'] = []
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['symbol_hashes'] = []
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['text'] = ''
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when LINK
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_link = Hash[*[attributes]]
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when MORPHISM
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov @current_link['morphism'] = Hash[*[attributes]]['name'] if @current_link
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when MAP
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_link['map'] = []
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # a text node
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def characters(text)
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov case @path.last
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when SYMBOL
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_symbol['text'] << text if @current_symbol
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when TEXT
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['text'] << text if @current_axiom
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov when TYPE # there is no other use of TYPE in this code
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_link['type'] = text if @current_link
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov # closing tag
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def end_element(name)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @path.pop
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek case name
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when ONTOLOGY
04868f1573f4b26ef34610b6d7069172f93bd8abJakub Hrozek callback(:ontology_end, @current_ontology)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_ontology = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when SYMBOL
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek return if @path.last == 'Hidden'
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek if @current_axiom
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # add to current axiom
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_axiom['symbols'] << @current_symbol['text']
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek else
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # return the current symcol
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:symbol, @current_symbol)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_symbol = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when IMPAXIOMS
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @in_imp_axioms = false
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when AXIOMS
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @in_axioms = false
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when AXIOM
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # do not execute callbacks
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # unless the axiom was inside a
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # <Axioms> element or a <ImpAxioms>
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # element
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek if @in_imp_axioms
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:imported_axiom, @current_axiom)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek elsif @in_axioms
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:axiom, @current_axiom)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # return the current axiom
828cc04cd1ed9076faa6e1545055ae69a04f0f0fLukas Slebodnik @current_axiom = nil
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek when LINK
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # return the current link
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek callback(:link, @current_link)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek @current_link = nil
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek end
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek end
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek # error handler for parsing problems
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek # this exception is not being used so far
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def error(string)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek raise ParseException, 'cannot parse: ' + string
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek private
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek def callback(name, args)
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek block = @callbacks[name]
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek block.call(args) if block
4cae8609b513c267af11c0409bfe1d17d3a5da2fMichal Zidek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek end
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozekend
9cb46bc62f22e0104f1b41a423b014c281ef5fc2Jakub Hrozek