BASE = ..
SYNCDIRS =
EYEBALL_FIXES = eyeball-fixes.rdf # some things eyeball reports missing, even though they actually exist
EYEBALL_DEFAULT_ARGS = -assume $(DOL_RDF_VOCAB) -assume $(STANDARD_VOCAB) -assume $(TODO_VOCAB) -assume $(DCTERMS) -assume $(SKOS) -assume $(OWL) -assume $(EYEBALL_FIXES)
all: $(DEPLOY_FILES)
# Eyeball must be last, as it usually fails
# validate RDF with Eyeball
# Registry TBox (the OWL ontology that defines the vocabulary of the graph) and ABox (the actual graph) combined, for the purpose of validating (manually, known to work with HermiT in Protégé 4.1) whether the ABox is consistent wrt. the TBox.
# 1. apply the N3 ruleset to expand the core dataset to the expanded dataset; in detail:
# cwm --n3 $< # parse the input as N3 (a superset of Turtle)
# --rdf $(BASE)/syntax/dol-rdf.owl # parse the ontology as RDF/XML
# --n3 $(DOL_INFERENCES) # load DOL-specific inference rules (particularly those that can't be represented in OWL, but only in FOL)
# $(OWL_INFERENCES) # load general OWL (and RDFS and RDF) inference rules (just a relevant subset of the actual rules, implemented ad hoc according to our needs)
# --think # apply the inference rules until they lead to no more expansions
# --ntriples # create N-Triples output for easy linewise post-processing
# $(CWM_DEFAULT_ARGS)
# 2. remove leading whilespace
# 3. filter out any triples whose subjects are not from the namespace of this dataset. This includes blank nodes, which are just used for editorial comments so far
# 4. filter out triples containing blank nodes in any component. Many blank nodes are not relevant for the dataset (e.g. artifacts from the RDF serialization of the OWL ontology, editorial comments, etc.), and those, that are, are not supported by our approach.
# 5. filter out triples with certain annotation properties (here: editorial comments)
cwm --n3 $< --rdf $(BASE)/syntax/dol-rdf.owl --n3 $(DOL_INFERENCES) $(OWL_INFERENCES) --think --ntriples $(CWM_DEFAULT_ARGS) \
| grep '^<http://purl\.net/dol/' \
| grep -v '^<http://purl\.net/dol/1\.0/rdf#' \
> $@
# convert expanded N-Triples datasets to Turtle
# Output all distinct subject URIs (and blank node IDs) that occur in the dataset.
# Omit any namespaces we are not interested in deploying.
awk '{print gensub("<([^>]+)>", "\\1", 1, $$1)}' $< \
| sort \
| uniq \
> $@
# Make sure that all directories exist in whose paths we have resources.
# We use one directory path as a representative for generating all of them.
| sort \
| uniq \
| while read directory ; do \
echo $$directory ; \
done
# Use the URI of one resource as a representative for generating all split files
# For each resource, …
# 1. read all triples having this resource as a subject
resource_pattern=$$1 ; \
main_resource=$$2 ; \
> $$out_file ; \
} ; \
while read resource ; do \
echo $$resource ; \
done < $< ; \