cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiPUB_SYNCFILES = registry-expanded.turtle registry.rdf registry.turtle README
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiDEPLOY_FILES = registry.rdf registry-expanded.rdf $(DEPLOY_DIR)/logics/CommonLogic/index
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiEXPANDED_DATASET = registry-expanded.ntriples
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiDOL_INFERENCES = $(LIB)/dol-inferences-beyond-owl.n3
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiOWL_INFERENCES = $(LIB)/some-owl-inferences.n3
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiSTANDARD_VOCAB = ../syntax/standardization.owl
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiEYEBALL_FIXES = eyeball-fixes.rdf # some things eyeball reports missing, even though they actually exist
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till MossakowskiEYEBALL_DEFAULT_ARGS = -assume $(DOL_RDF_VOCAB) -assume $(STANDARD_VOCAB) -assume $(TODO_VOCAB) -assume $(DCTERMS) -assume $(SKOS) -assume $(OWL) -assume $(EYEBALL_FIXES)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Eyeball must be last, as it usually fails
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskitest: registry-with-tbox.rdf registry.eyeball
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# validate RDF with Eyeball
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Registry TBox (the OWL ontology that defines the vocabulary of the graph) and ABox (the actual graph) combined, for the purpose of validating (manually, known to work with HermiT in Protégé 4.1) whether the ABox is consistent wrt. the TBox.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiregistry-with-tbox.rdf: registry-expanded.ntriples
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 1. apply the N3 ruleset to expand the core dataset to the expanded dataset; in detail:
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# cwm --n3 $< # parse the input as N3 (a superset of Turtle)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# --rdf $(BASE)/syntax/dol-rdf.owl # parse the ontology as RDF/XML
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# --n3 $(DOL_INFERENCES) # load DOL-specific inference rules (particularly those that can't be represented in OWL, but only in FOL)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# $(OWL_INFERENCES) # load general OWL (and RDFS and RDF) inference rules (just a relevant subset of the actual rules, implemented ad hoc according to our needs)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# --think # apply the inference rules until they lead to no more expansions
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# --ntriples # create N-Triples output for easy linewise post-processing
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# $(CWM_DEFAULT_ARGS)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 2. remove leading whilespace
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 3. filter out any triples whose subjects are not from the namespace of this dataset. This includes blank nodes, which are just used for editorial comments so far
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 4. filter out triples containing blank nodes in any component. Many blank nodes are not relevant for the dataset (e.g. artifacts from the RDF serialization of the OWL ontology, editorial comments, etc.), and those, that are, are not supported by our approach.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 5. filter out triples with certain annotation properties (here: editorial comments)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski%-expanded.ntriples: %.turtle $(DOL_INFERENCES) $(OWL_INFERENCES)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski cwm --n3 $< --rdf $(BASE)/syntax/dol-rdf.owl --n3 $(DOL_INFERENCES) $(OWL_INFERENCES) --think --ntriples $(CWM_DEFAULT_ARGS) \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski | grep -v '^<http://purl\.net/dol/1\.0/rdf#' \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski | grep -vE '^<[^>]+>[^<]+(_:|<http://example\.org/todo#)' \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# convert expanded N-Triples datasets to Turtle
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Output all distinct subject URIs (and blank node IDs) that occur in the dataset.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Omit any namespaces we are not interested in deploying.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski awk '{print gensub("<([^>]+)>", "\\1", 1, $$1)}' $< \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Make sure that all directories exist in whose paths we have resources.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# We use one directory path as a representative for generating all of them.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski perl -lne 'print $$1 if m@^http://purl\.net/dol/((?:[^/]+/)*)[^/]+@' $< \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski | while read directory ; do \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Use the URI of one resource as a representative for generating all split files
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# For each resource, …
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 1. read all triples having this resource as a subject
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# 2. output them to a self-contained RDF/XML file named after the resource
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski$(DEPLOY_DIR)/logics/CommonLogic/index: resource-uris $(DEPLOY_DIR)/logics
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski out_file=$(DEPLOY_DIR)/$${main_resource#$(REGISTRY_NS)} ; \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if [[ -d $$out_file ]]; then out_file=$${out_file}/index; fi ; \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski grep "$${resource_pattern}" $(EXPANDED_DATASET) \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski while read resource ; do \
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski done < $< ; \