IRI.hs revision 08444813af2fade39f88fc4bd7c6465452915668
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maeder{-# LANGUAGE CPP, DeriveDataTypeable #-}
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederModule : $Header$
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederCopyright : (c) DFKI GmbH 2012
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu ProdescuLicense : GPLv2 or higher, see LICENSE.txt
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederMaintainer : Eugen Kuksa <eugenk@informatik.uni-bremen.de>
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederStability : provisional
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederPortability : portable
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederThis module defines functions for handling IRIs. It is substantially the
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maedersame as the Network.URI module by Graham Klyne, but is extended to IRI
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maedersupport [2] and even Manchester-Syntax-IRI [3], [4] and CURIE [5].
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian MaederFour methods are provided for parsing different
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maederkinds of IRI string (as noted in [1], [2]):
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'parseIRIReference',
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'parseRelativeReference' and
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'parseAbsoluteIRI'.
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian MaederAn additional method is provided for parsing an abbreviated IRI according to
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder[3], [4]: 'parseIRIManchester' and according to [5]: 'parseIRICurie'
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian MaederFurther, four methods are provided for classifying different
002961cfb5c53204887101239d2a47c83d596585Christian Maederkinds of IRI string (as noted in [1], [2]):
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'isIRIReference',
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'isRelativeReference' and
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maeder'isAbsoluteIRI'.
db6729e623b4053149084ccf4b35e5308ac7e359Christian MaederAdditionally, classification of full, abbreviated and simple IRI is provided
33f5512f0538c5ec4141205a8440ff6ba9e96139Christian Maederby 'isIRIManchester', isIRICurie.
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederThe abbreviated syntaxs [3], [4], [5] provide three different kinds of IRI: full,
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maederabbreviated, simple, expandedAbbreviated and expandedSimple. An existing element
92dc581bf568c9e225aa9d0570ab0a4b6ebdab69Christian Maederof type IRI can be classified in one of those kinds with 'iriType'.
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian MaederMost of the code has been copied from the Network.URI implementation,
b1f2971b105e6da3f4722315e0a0e2abef96e66fcmaederbut it is extended to IRI, Manchester-syntax and CURIE.
8acac20a235839e60ea2d43709fce47de1c68bc1Christian Maeder(3) <http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/>
0789323dfca89bae8f710da5bba20220b9af2feaChristian Maeder(4) <http://www.w3.org/TR/2008/REC-rdf-sparql-query-20080115/>
720eeee7c9d8442093c8d05bed743193eee906e0Christian Maeder -- * The IRI type
720eeee7c9d8442093c8d05bed743193eee906e0Christian Maeder , IRIAuth (..)
5dc46f6d0fdd8747d730f9e79a93978145ed43bbChristian Maeder , IRIType (..)
0789323dfca89bae8f710da5bba20220b9af2feaChristian Maeder -- * Conversion
0789323dfca89bae8f710da5bba20220b9af2feaChristian Maeder , simpleIdToIRI
5dc46f6d0fdd8747d730f9e79a93978145ed43bbChristian Maeder , parseIRIReference
5dc46f6d0fdd8747d730f9e79a93978145ed43bbChristian Maeder , parseRelativeReference
5dc46f6d0fdd8747d730f9e79a93978145ed43bbChristian Maeder , parseAbsoluteIRI
720eeee7c9d8442093c8d05bed743193eee906e0Christian Maeder , parseIRICurie
11c3a215d5cf043181e83929f1ce214df65cb587Christian Maeder , parseIRIManchester
92dc581bf568c9e225aa9d0570ab0a4b6ebdab69Christian Maeder -- * Test for strings containing various kinds of IRI
e49fd57c63845c7806860a9736ad09f6d44dbaedChristian Maeder , isIRIReference
db6729e623b4053149084ccf4b35e5308ac7e359Christian Maeder , isRelativeReference
db6729e623b4053149084ccf4b35e5308ac7e359Christian Maeder , isAbsoluteIRI
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maeder , isIRIManchester
002961cfb5c53204887101239d2a47c83d596585Christian Maeder , isIPv6address
002961cfb5c53204887101239d2a47c83d596585Christian Maeder , isIPv4address
e49fd57c63845c7806860a9736ad09f6d44dbaedChristian Maeder -- * Relative IRIs
b7bba589fb78fe61379de93d531556c00da36cd9Christian Maeder , nonStrictRelativeTo
b7bba589fb78fe61379de93d531556c00da36cd9Christian Maeder , relativeFrom
002961cfb5c53204887101239d2a47c83d596585Christian Maeder -- * Operations on IRI strings
b1f2971b105e6da3f4722315e0a0e2abef96e66fcmaeder {- | Support for putting strings into IRI-friendly
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maeder escaped format and getting them back again. -}
9f7cd2db42cbc88253af8034f8d1fb83e1ecd4cdChristian Maeder , iriToString
01ddc4cad68fa84b4e9dd41089ad876329bae5b0Christian Maeder , iriToStringUnsecure
9f7cd2db42cbc88253af8034f8d1fb83e1ecd4cdChristian Maeder , iriToStringShort
1596a4d2cc01bff500afdd3789a43ec93210e81fChristian Maeder , iriToStringShortUnsecure
1596a4d2cc01bff500afdd3789a43ec93210e81fChristian Maeder , isReserved, isUnreserved
002961cfb5c53204887101239d2a47c83d596585Christian Maeder , isAllowedInIRI, isUnescapedInIRI
429df04296fa571432f62cbfad6855e1420e0fd6Christian Maeder , escapeIRIChar
11c3a215d5cf043181e83929f1ce214df65cb587Christian Maeder , escapeIRIString
01ddc4cad68fa84b4e9dd41089ad876329bae5b0Christian Maeder , unEscapeString
fbc1e851413f39999a00a0d3be0edf75bbf42007Ewaryst Schulz -- * Parser combinators, special additions to export list
fbc1e851413f39999a00a0d3be0edf75bbf42007Ewaryst Schulz , iriReference
b410420153cc9ac37fb4ebb86699cba7fa19bc35Christian Maeder , irelativeRef
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maeder , absoluteIRI
db6729e623b4053149084ccf4b35e5308ac7e359Christian Maeder , iriManchester
e49fd57c63845c7806860a9736ad09f6d44dbaedChristian Maeder -- * IRI Normalization functions
54a535fb81b928ac8f99a11bdcfa8998533204a5Christian Maeder , expandCurie
b410420153cc9ac37fb4ebb86699cba7fa19bc35Christian Maeder , normalizeCase
b410420153cc9ac37fb4ebb86699cba7fa19bc35Christian Maeder , normalizeEscape
b410420153cc9ac37fb4ebb86699cba7fa19bc35Christian Maeder , normalizePathSegments
db6729e623b4053149084ccf4b35e5308ac7e359Christian Maeder ( GenParser, ParseError
db6729e623b4053149084ccf4b35e5308ac7e359Christian Maeder , parse, (<|>), (<?>), try
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maeder , option, many, many1, count, notFollowedBy
e49fd57c63845c7806860a9736ad09f6d44dbaedChristian Maeder , char, satisfy, oneOf, string, digit, eof
a43c1a7fa08c12524415386aa13a566cc9e53a4fChristian Maederimport Control.Monad (MonadPlus (..))
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maederimport Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
e49fd57c63845c7806860a9736ad09f6d44dbaedChristian Maederimport Numeric (showIntAtBase)
697e63e30aa3c309a1ef1f9357745111f8dfc5a9Christian Maederimport Data.Typeable (Typeable)
e2e5830e2562de2f9a7daa31704fca25285180f0Ewaryst Schulzimport Data.Map (Map, findWithDefault)
8acac20a235839e60ea2d43709fce47de1c68bc1Christian Maeder-- * The IRI datatype
, iriAuthority :: Maybe IRIAuth -- ^ @\/\/anonymous\@www.haskell.org:42@
, iriRegName :: String -- ^ @www.haskell.org@
-- | Parses a CURIE <http://www.w3.org/TR/rdfa-core/#s_curies>
-- http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName
[[[Above was a comment originally in GHC Network/IRI.hs:
alphaChar = satisfy isAlphaChar -- or: Parsec.letter ?
digitChar = satisfy isDigitChar -- or: Parsec.digit ?
hexDigitChar = satisfy isHexDigitChar -- or: Parsec.hexDigit ?
> "http://example.com/Root/sub1/name2#frag"
> `relativeFrom` "http://example.com/Root/sub2/name2#frag"
> == "../sub1/name2#frag"
(i.e. results always ends with '/')