ontology_parser.rb revision 2cecff8c40ff62af965403b167f8143e1969d8ef
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenmodule OntologyParser
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen class ParseException < Exception; end
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # Parses the given string and executes the callback for each symbol
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen def self.parse(input, callbacks)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # Create a new parser
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen parser = Nokogiri::XML::SAX::Parser.new(Listener.new(callbacks))
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # Feed the parser some XML
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen parser.parse(input)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen end
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen # Listener for the SAX Parser
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen class Listener < Nokogiri::XML::SAX::Document
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen MAP = "map"
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen ROOT = 'DGraph'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen ONTOLOGY = 'DGNode'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen SYMBOL = 'Symbol'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen AXIOM = 'Axiom'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen THEOREM = 'Theorem'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen IMPAXIOMS = 'ImpAxioms'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen AXIOMS = 'Axioms'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen THEOREMS = 'Theorems'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen LINK = 'DGLink'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen TEXT = 'Text'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen TYPE = 'Type'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen MORPHISM = 'GMorphism'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen IMPORT = 'Reference'
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen # the callback function is called for each Symbol tag
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen def initialize(callbacks)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen @callbacks = callbacks
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen @path = []
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen @current_ontology = nil
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen @current_symbol = nil
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen @current_axiom = nil
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen @current_link = nil
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen @in_imp_axioms = false
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen end
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen # a tag
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen def start_element(name, attributes)
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen @path << name
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen case name
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen when ROOT
9aa52288a4b53186d81b0ec9afa7d9e0a8ee8753Timo Sirainen callback(:root, Hash[*[attributes]])
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen when ONTOLOGY
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen callback(:ontology, Hash[*[attributes]])
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen when IMPORT
7a6b45405fb1544ac476e6eb1402a70cc1ddcdcfTimo Sirainen callback(:import, Hash[*[attributes]])
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen when SYMBOL
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen @current_symbol = Hash[*[attributes]]
abe8230dd1dd37d7ccf0163100e934bb5e658c20Timo Sirainen @current_symbol['text'] = ''
abe8230dd1dd37d7ccf0163100e934bb5e658c20Timo Sirainen if @current_link && @current_link['map']
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen @current_link['map'] << @current_symbol
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen end
f7d43647acc6dc80064c8c4cacf5bf86f754c530Timo Sirainen @current_axiom['symbol_hashes'] << @current_symbol if @current_axiom
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen when IMPAXIOMS
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen @in_imp_axioms = true
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen when AXIOMS
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen @in_axioms = true
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen when AXIOM
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_axiom = Hash[*[attributes]]
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_axiom['symbols'] = []
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_axiom['symbol_hashes'] = []
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_axiom['text'] = ''
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen when THEOREMS
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @in_theorems = true
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen when THEOREM
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_theorem = Hash[*[attributes]]
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_theorem['symbols'] = []
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_theorem['symbol_hashes'] = []
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @current_theorem['text'] = ''
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen when LINK
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen @current_link = Hash[*[attributes]]
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen when MORPHISM
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen @current_link['morphism'] = Hash[*[attributes]]['name'] if @current_link
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen when MAP
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen @current_link['map'] = []
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen end
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen end
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen # a text node
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen def characters(text)
445f9e31c6c3aa6c0a72be8565da8f6e594d24fbTimo Sirainen case @path.last
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen when SYMBOL
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen @current_symbol['text'] << text if @current_symbol
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen when TEXT
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen @current_axiom['text'] << text if @current_axiom
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen @current_theorem['text'] << text if @current_theorem
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen when TYPE # there is no other use of TYPE in this code
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen @current_link['type'] = text if @current_link
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen end
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen end
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen # closing tag
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen def end_element(name)
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen @path.pop
9e59a1f3f095b3099478562cf3f3970a24736970Timo Sirainen
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen case name
178511b57faa7c3f8203dd8b7e4059d00cbfc23aTimo Sirainen when ONTOLOGY
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen callback(:ontology_end, @current_ontology)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen @current_ontology = nil
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen when SYMBOL
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen return if @path.last == 'Hidden'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen if @current_axiom
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen # add to current axiom
4d25408732be27e91f0430f71e87242760c2517cTimo Sirainen @current_axiom['symbols'] << @current_symbol['text']
e68309fcfa2eaa88217fd51e7b4900fc9c20ef5dTimo Sirainen elsif @current_theorem
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen # add to current theorem
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen @current_theorem['symbols'] << @current_symbol['text']
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen else
9e59a1f3f095b3099478562cf3f3970a24736970Timo Sirainen # return the current symbol
9e59a1f3f095b3099478562cf3f3970a24736970Timo Sirainen in_mapping_link = @current_link && @current_link['map']
96c253a039f102fa78a313ee05200ab3970112dcTimo Sirainen callback(:symbol, @current_symbol) unless in_mapping_link
e5c08648676d1989f6e70b95e5990c26b3e8b96bTimo Sirainen end
4d25408732be27e91f0430f71e87242760c2517cTimo Sirainen @current_symbol = nil
c3412ddeb9abc13f99d3caf50faf76cd99f7e9d2Timo Sirainen when IMPAXIOMS
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen @in_imp_axioms = false
268fe0fe3e748ae313e08b0918aa114815cf9d7cTimo Sirainen when AXIOMS
268fe0fe3e748ae313e08b0918aa114815cf9d7cTimo Sirainen @in_axioms = false
268fe0fe3e748ae313e08b0918aa114815cf9d7cTimo Sirainen when AXIOM
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen # do not execute callbacks
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen # unless the axiom was inside a
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen # <Axioms> element or a <ImpAxioms>
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen # element
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen if @in_imp_axioms
b9ac6179d3aee0d1641a4ee1d78da28628929c61Timo Sirainen callback(:imported_axiom, @current_axiom)
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen elsif @in_axioms
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen callback(:axiom, @current_axiom)
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen end
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen # return the current axiom
e8a35266a5ceacdfafeeffd6bddae77931ff97ebTimo Sirainen @current_axiom = nil
9aa52288a4b53186d81b0ec9afa7d9e0a8ee8753Timo Sirainen when THEOREMS
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen @in_theorems = false
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen when THEOREM
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen callback(:theorem, @current_theorem) if @in_theorems
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen # return the current theorem
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen @current_theorem = nil
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen when LINK
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen # return the current link
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen callback(:link, @current_link)
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen @current_link = nil
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen end
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen end
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen # error handler for parsing problems
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen # this exception is not being used so far
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen def error(string)
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen raise ParseException, 'cannot parse: ' + string
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen end
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen private
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen def callback(name, args)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen block = @callbacks[name]
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen block.call(args) if block
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen end
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen end
7a6b45405fb1544ac476e6eb1402a70cc1ddcdcfTimo Sirainenend
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen