IRI.hs revision ea8e98e298f33f9362293f392c8fb192722b8904
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder{-# LANGUAGE CPP, DeriveDataTypeable #-}
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--------------------------------------------------------------------------------
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- |
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Module : Common.IRI
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Copyright : (c) 2004, Graham Klyne
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu Prodescu-- License : BSD-style (see end of this file)
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Maintainer : Eugen Kuksa <eugenk@informatik.uni-bremen.de>
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Stability : provisional
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Portability : portable
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- This module defines functions for handling IRIs. It is substantially the
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- same as the Network.URI module by Graham Klyne, but is extended to IRI
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- support [2] and even Manchester-Syntax-IRI [3], [4].
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly-- Four methods are provided for parsing different
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- kinds of IRI string (as noted in [1], [2]):
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRI',
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRIReference',
e90dc723887d541f809007ae81c9bb73ced9592eChristian Maeder-- 'parseRelativeReference' and
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder-- 'parseAbsoluteIRI'.
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
56899f6457976a2ee20f6a23f088cb5655b15715Liam O'Reilly-- An addotional method is provided for parsing an abbreviated IRI according to [3], [4]:
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRIManchester'
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Further, four methods are provided for classifying different
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- kinds of IRI string (as noted in [1], [2]):
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- 'isIRI',
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- 'isIRIReference',
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- 'isRelativeReference' and
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- 'isAbsoluteIRI'.
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- Additionally, classification of full, abbreviated and simple IRI is provided
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder-- by
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- 'isIRIManchester'.
c0833539c8cf577dd3f2497792fbdd818442744cChristian Maeder--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- The Manchester-syntax [3], [4] provdies three different kinds of IRI: full,
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- abbreviated and simple. An existing element of type IRI can be classified in
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- one of those kinds with
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- 'iriType'.
fa373bc327620e08861294716b4454be8d25669fChristian Maeder--
036ecbd8f721096321f47cf6a354a9d1bf3d032fChristian Maeder-- Most of the code has been copied from the Network.URI implementation,
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- but it is extended to IRI and Manchester-syntax.
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder--
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly--
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- References
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- (1) <http://www.ietf.org/rfc/rfc3986.txt>
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- (2) <http://www.ietf.org/rfc/rfc3987.txt>
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- (3) <http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- (4) <http://www.w3.org/TR/2008/REC-rdf-sparql-query-20080115/>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--------------------------------------------------------------------------------
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maedermodule Common.IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder (
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * The IRI type
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder IRI(..)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , IRIAuth(..)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , IRIType(..)
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder , nullIRI
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder , iriType
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly -- * Conversion
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly , simpleIdToIRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * Parsing
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseIRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseIRIReference
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseRelativeReference
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder , parseAbsoluteIRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseCurie
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , parseIRICurie
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , parseIRIManchester
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
fa373bc327620e08861294716b4454be8d25669fChristian Maeder -- * Test for strings containing various kinds of IRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRIReference
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isRelativeReference
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isAbsoluteIRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isCurie
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRICurie
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRIManchester
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIPv6address
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIPv4address
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * Relative IRIs
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder , relativeTo
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , nonStrictRelativeTo
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , relativeFrom
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly -- * Operations on IRI strings
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- | Support for putting strings into IRI-friendly
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- escaped format and getting them back again.
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , iriToString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriToStringUnsecure
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , isReserved, isUnreserved
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , isAllowedInIRI, isUnescapedInIRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , escapeIRIChar
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , escapeIRIString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , unEscapeString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * Parser combinators, special additions to export list
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iri
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriReference
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , irelativeRef
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , absoluteIRI
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , iriCurie
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , curie
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , iriManchester
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder -- * IRI Normalization functions
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , normalizeCase
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder , normalizeEscape
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , normalizePathSegments
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder ) where
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maederimport Text.ParserCombinators.Parsec
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder ( GenParser, ParseError
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parse, (<|>), (<?>), try
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder , option, many, many1, count, notFollowedBy
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , char, satisfy, oneOf, string, digit, eof
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , unexpected
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder )
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Control.Monad (MonadPlus(..))
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Numeric (showIntAtBase)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Data.Typeable (Typeable)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Common.Id
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Common.Lexer
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Common.Doc (text)
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillyimport Common.DocUtils
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maederimport ATerm.Lib
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder------------------------------------------------------------
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- The IRI datatype
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder------------------------------------------------------------
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- |Represents a general universal resource identifier using
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- its component parts.
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder--
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder-- For example, for the (full) IRI
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- > foo://anonymous@www.haskell.org:42/ghc?query#frag
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- or the abbreviated IRI
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder--
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder-- > prefix:abbrevPath
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder--
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- or the simple IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- > abbrevPath
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- the components are:
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederdata IRI = IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder { iriScheme :: String -- ^ @foo:@
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriAuthority :: Maybe IRIAuth -- ^ @\/\/anonymous\@www.haskell.org:42@
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriPath :: String -- ^ local part @\/ghc@
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder , iriQuery :: String -- ^ @?query@
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , iriFragment :: String -- ^ @#frag@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , prefixName :: String -- ^ @prefix@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , abbrevPath :: String -- ^ @abbrevPath@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriPos :: Range
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly -- ^ prefix name part from "prefixName:path"
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly } deriving (Eq, Typeable, Ord)
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly-- |Type for authority value within a IRI
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reillydata IRIAuth = IRIAuth
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly { iriUserInfo :: String -- ^ @anonymous\@@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriRegName :: String -- ^ @www.haskell.org@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriPort :: String -- ^ @:42@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly } deriving (Eq, Typeable, Ord, Show)
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reillydata IRIType = Full | Abbreviated | Simple
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly deriving (Eq, Show, Typeable, Ord)
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- |Blank IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillynullIRI :: IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillynullIRI = IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly { iriScheme = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriAuthority = Nothing
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriPath = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriQuery = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriFragment = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , prefixName = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , abbrevPath = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriPos = nullRange
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly }
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- |Returns Type of an IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillyiriType :: IRI -> IRIType
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillyiriType i =
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly if (not.null) $ iriPath i then Full else
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly if null $ prefixName i then Simple else Abbreviated
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- IRI as instance of Show. Note that for secirity reasons, the default
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- behaviour is to suppress any iuserinfo field (see RFC3986, section 7.5).
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- This can be overridden by using iriToString directly with first
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- argument @id@ (noting that this returns a ShowS value rather than a string).
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly--
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- [[[Another design would be to embed the iuserinfo mapping function in
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- the IRIAuth value, with the default value suppressing iuserinfo formatting,
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- but providing a function to return a new IRI value with iuserinfo
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- data exposed by show.]]]
fa373bc327620e08861294716b4454be8d25669fChristian Maeder--
fa373bc327620e08861294716b4454be8d25669fChristian Maederinstance Show IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder showsPrec _ i = iriToString defaultUserInfoMap i
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
fa373bc327620e08861294716b4454be8d25669fChristian Maederinstance GetRange IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder getRange = iriPos
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maederinstance Pretty IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder pretty = text . show
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederdefaultUserInfoMap :: String -> String
fa373bc327620e08861294716b4454be8d25669fChristian MaederdefaultUserInfoMap uinf = user++newpass
fa373bc327620e08861294716b4454be8d25669fChristian Maeder where
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder (user,pass) = break (==':') uinf
fa373bc327620e08861294716b4454be8d25669fChristian Maeder newpass = if null pass || (pass == "@")
fa373bc327620e08861294716b4454be8d25669fChristian Maeder || (pass == ":@")
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder then pass
fa373bc327620e08861294716b4454be8d25669fChristian Maeder else ":...@"
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
fa373bc327620e08861294716b4454be8d25669fChristian MaedeririToStringUnsecure :: IRI -> String
fa373bc327620e08861294716b4454be8d25669fChristian MaedeririToStringUnsecure i = (iriToString id i) ""
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- |Converts a Simple_ID to an IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaedersimpleIdToIRI :: SIMPLE_ID -> IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaedersimpleIdToIRI sid = nullIRI { abbrevPath = tokStr sid
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , iriPos = tokPos sid
fa373bc327620e08861294716b4454be8d25669fChristian Maeder }
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder------------------------------------------------------------
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- Parse a IRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder------------------------------------------------------------
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- |Turn a string containing an RFC3987 IRI into an 'IRI'.
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- (an absolute IRI with optional fragment identifier).
fa373bc327620e08861294716b4454be8d25669fChristian Maeder--
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRI :: String -> Maybe IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRI = parseIRIAny iri
fa373bc327620e08861294716b4454be8d25669fChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Parse a IRI reference to an 'IRI' value.
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- Returns 'Nothing' if the string is not a valid IRI reference.
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- (an absolute or relative IRI with optional fragment identifier).
fa373bc327620e08861294716b4454be8d25669fChristian Maeder--
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRIReference :: String -> Maybe IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRIReference = parseIRIAny iriReference
56899f6457976a2ee20f6a23f088cb5655b15715Liam O'Reilly
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Parse a relative IRI to an 'IRI' value.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid relative IRI.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (a relative IRI with optional fragment identifier).
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseRelativeReference :: String -> Maybe IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseRelativeReference = parseIRIAny irelativeRef
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Parse an absolute IRI to an 'IRI' value.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid absolute IRI.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (an absolute IRI without a fragment identifier).
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseAbsoluteIRI :: String -> Maybe IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseAbsoluteIRI = parseIRIAny absoluteIRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder-- |Turn a string containing a CURIE into an 'IRI'
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseCurie :: String -> Maybe IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseCurie = parseIRIAny curie
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Turn a string containing an IRI or a CURIE into an 'IRI'.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- or a CURIE).
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder--
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian MaederparseIRICurie :: String -> Maybe IRI
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian MaederparseIRICurie = parseIRIAny iriCurie
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- |Turn a string containing an IRI (by Manchester-syntax) into an 'IRI'.
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier,
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian Maeder-- an abbreviated IRI or a simple IRI).
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian Maeder--
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian MaederparseIRIManchester :: String -> Maybe IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseIRIManchester = parseIRIAny iriManchester
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Test if string contains a valid IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (an absolute IRI with optional fragment identifier).
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder--
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRI :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRI = isValidParse iri
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Test if string contains a valid IRI reference
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- (an absolute or relative IRI with optional fragment identifier).
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
8e9c3881fb6e710b1e08bf5ac8ff9d393df2e74eChristian MaederisIRIReference :: String -> Bool
8e9c3881fb6e710b1e08bf5ac8ff9d393df2e74eChristian MaederisIRIReference = isValidParse iriReference
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Test if string contains a valid relative IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (a relative IRI with optional fragment identifier).
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisRelativeReference :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisRelativeReference = isValidParse irelativeRef
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Test if string contains a valid absolute IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- (an absolute IRI without a fragment identifier).
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederisAbsoluteIRI :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisAbsoluteIRI = isValidParse absoluteIRI
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid IRI or CURIE
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- or a CURIE).
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder--
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederisIRICurie :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisIRICurie = isValidParse iriCurie
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid CURIE
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder--
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisCurie :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisCurie = isValidParse curie
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid IRI by Manchester-syntax
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier,
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- an abbreviated IRI or a simple IRI).
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRIManchester :: String -> Bool
bcd914850de931848b86d7728192a149f9c0108bChristian MaederisIRIManchester = isValidParse iriManchester
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- |Test if string contains a valid IPv6 address
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder--
bcd914850de931848b86d7728192a149f9c0108bChristian MaederisIPv6address :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIPv6address = isValidParse ipv6address
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- |Test if string contains a valid IPv4 address
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian MaederisIPv4address :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIPv4address = isValidParse ipv4address
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Helper function for turning a string into a IRI
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder--
bcd914850de931848b86d7728192a149f9c0108bChristian MaederparseIRIAny :: IRIParserDirect IRI -> String -> Maybe IRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseIRIAny parser iristr = case parseAll parser "" iristr of
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly Left _ -> Nothing
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly Right u -> Just u
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Helper function to test a string match to a parser
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder--
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisValidParse :: IRIParserDirect a -> String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisValidParse parser iristr = case parseAll parser "" iristr of
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly Left _ -> False
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly Right _ -> True
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseAll :: IRIParserDirect a -> String -> String -> Either ParseError a
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseAll parser filename iristr = parse newparser filename iristr
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly where
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly newparser =
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly do { res <- parser
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; eof
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; return res
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly }
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly------------------------------------------------------------
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- IRI parser body based on Parsec elements and combinators
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly------------------------------------------------------------
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Parser parser type.
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Currently
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillytype IRIParserDirect a = GenParser Char () a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillytype IRIParser st a = GenParser Char st a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- RFC3986, section 2.1
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Parse and return a 'pct-encoded' sequence
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillyescaped :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillyescaped =
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly do { char '%'
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; h1 <- hexDigitChar
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; h2 <- hexDigitChar
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; return $ ['%',h1,h2]
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly }
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- RFC3986, section 2.2
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- |Returns 'True' if the character is a \"reserved\" character in a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- IRI. To include a literal instance of one of these characters in a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- component of a IRI, it must be escaped.
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisReserved :: Char -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisReserved c = isGenDelims c || isSubDelims c
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly
dd7da1b5fedc05b92ba023ebd803e6f4a662503bChristian MaederisGenDelims :: Char -> Bool
dd7da1b5fedc05b92ba023ebd803e6f4a662503bChristian MaederisGenDelims c = c `elem` ":/?#[]@"
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisSubDelims :: Char -> Bool
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'ReillyisSubDelims c = c `elem` "!$&'()*+,;="
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillysubDelims :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillysubDelims = do { c <- satisfy isSubDelims ; return [c] }
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- RFC3986, section 2.3
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Returns 'True' if the character is an \"unreserved\" character in
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- a IRI. These characters do not need to be escaped in a IRI. The
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- only characters allowed in a IRI are either \"reserved\",
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- \"unreserved\", or an escape sequence (@%@ followed by two hex digits).
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly--
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisUnreserved :: Char -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisUnreserved c = isAlphaNumChar c || (c `elem` "-_.~") || (isUcsChar c)
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiunreservedChar :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiunreservedChar = do { c <- satisfy isUnreserved ; return [c] }
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
648fe1220044aac847acbdfbc4155af5556063ebChristian MaedeririWithPos :: IRIParser st IRI -> IRIParser st IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian MaedeririWithPos parser = do
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder p <- getPos
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly i <- parser
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder q <- getPos
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ i {iriPos = appRange (Range [p,q]) $ iriPos i}
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly{- BEGIN CURIE -}
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder-- |Parses an absolute IRI enclosed in '<', '>' or a CURIE
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian MaedeririCurie :: IRIParser st IRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiriCurie = do
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly char '<'
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly i <- iri
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly char '>'
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly skipSmart
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return i
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder <|> curie
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Parses an absolute or relative IRI enclosed in '<', '>' or a CURIE
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- see @iriReference@
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiriReferenceCurie :: IRIParser st IRI
bcd914850de931848b86d7728192a149f9c0108bChristian MaedeririReferenceCurie = do
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder char '<'
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- iri <|> irelativeRef
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder char '>'
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder skipSmart
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder return i
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder <|> curie
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly-- |Parses a CURIE
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- http://www.w3.org/TR/rdfa-core/#s_curies
648fe1220044aac847acbdfbc4155af5556063ebChristian Maedercurie :: IRIParser st IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maedercurie = iriWithPos $ do
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder c <- string ":"
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- reference
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly skipSmart
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ i { prefixName = c }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder <|> do
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly pn <- try (do
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder n <- ncname
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder c <- string ":"
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ n++c
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder )
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- reference
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly skipSmart
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly return $ i { prefixName = pn }
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly <|> do
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder r <- reference
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reilly skipSmart
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reilly return r
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reilly
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reillyreference :: IRIParser st IRI
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reillyreference = iriWithPos $ do
(_, up) <- ihierPartNoAuth
uq <- option "" ( do { char '?' ; uiquery } )
uf <- option "" ( do { char '#' ; uifragment } )
return $ IRI
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = ""
, iriQuery = uq
, iriFragment = uf
, prefixName = ""
, abbrevPath = up
, iriPos = nullRange
}
-- http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName
ncname :: GenParser Char st String
ncname = do
c <- nameStartChar
s <- many nameChar
return $ c:s
nameStartChar :: GenParser Char st Char
nameStartChar = satisfy nameStartCharP
nameChar :: GenParser Char st Char
nameChar = satisfy nameCharP
-- NOTE: Usually ':' is allowed. Here, only ncname uses nameStartChar, however.
-- Thus we disallow ':'
nameStartCharP :: Char -> Bool
nameStartCharP c =
let n = ord c in
(c == '_') || -- usually: (c `elem` ":_") ||
isAlphaChar c ||
(0x00C0 <= n && n <= 0x00D6) ||
(0x00D8 <= n && n <= 0x00F6) ||
(0x00F8 <= n && n <= 0x02FF) ||
(0x0370 <= n && n <= 0x037D) ||
(0x037F <= n && n <= 0x1FFF) ||
(0x200C <= n && n <= 0x200D) ||
(0x2070 <= n && n <= 0x218F) ||
(0x2C00 <= n && n <= 0x2FEF) ||
(0x3001 <= n && n <= 0xD7FF) ||
(0xF900 <= n && n <= 0xFDCF) ||
(0xFDF0 <= n && n <= 0xFFFD) ||
(0x10000 <= n && n <= 0xEFFFF)
nameCharP :: Char -> Bool
nameCharP c =
let n = ord c in
nameStartCharP c ||
isDigitChar c ||
c `elem` "-." ||
n == 0xB7 ||
(0x0300 <= n && n <= 0x036F) ||
(0x203F <= n && n <= 0x2040)
{- END CURIE -}
{- BEGIN SPARQL -}
-- http://www.w3.org/TR/2008/REC-rdf-sparql-query-20080115/
-- Section 4.1
pn_chars_baseP :: Char -> Bool
pn_chars_baseP c =
let n = ord c in
isAlphaChar c ||
(0x00C0 <= n && n <= 0x00D6) ||
(0x00D8 <= n && n <= 0x00F6) ||
(0x00F8 <= n && n <= 0x02FF) ||
(0x0370 <= n && n <= 0x037D) ||
(0x037F <= n && n <= 0x1FFF) ||
(0x200C <= n && n <= 0x200D) ||
(0x2070 <= n && n <= 0x218F) ||
(0x2C00 <= n && n <= 0x2FEF) ||
(0x00D8 <= n && n <= 0x00F6) ||
(0x3001 <= n && n <= 0xD7FF) ||
(0xF900 <= n && n <= 0xFDCF) ||
(0xFDF0 <= n && n <= 0xFFFD) ||
(0x10000 <= n && n <= 0xEFFFF)
pn_chars_base :: GenParser Char st Char
pn_chars_base = satisfy pn_chars_baseP
pn_chars_u :: GenParser Char st Char
pn_chars_u = satisfy pn_chars_uP
pn_chars :: GenParser Char st Char
pn_chars = satisfy pn_charsP
pn_chars_uP :: Char -> Bool
pn_chars_uP c = (pn_chars_baseP c) || (c == '_')
pn_charsP :: Char -> Bool
pn_charsP c =
let n = ord c in
c == '-' ||
pn_chars_uP c ||
isDigitChar c ||
(n == 0x00B7) ||
(0x0300 <= n && n <= 0x036F) ||
(0x203F <= n && n <= 0x2040)
-- http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/
-- section 2.1
-- fullIRI := an IRI as defined in [RFC 3987], enclosed in a pair of < (U+3C) and > (U+3E) characters
-- prefixName := a finite sequence of characters matching the PNAME_NS production of [SPARQL] and not matching any of the keyword terminals of the syntax
-- abbreviatedIRI := a finite sequence of characters matching the PNAME_LN production of [SPARQL]
-- simpleIRI := a finite sequence of characters matching the PN_LOCAL production of [SPARQL] and not matching any of the keyword terminals of the syntax
-- IRI := fullIRI | abbreviatedIRI | simpleIRI
iriManchester :: IRIParser st IRI
iriManchester = iriWithPos $ do
char '<'
i <- iri <|> irelativeRef
char '>'
return i
<|> do
(PName_Ln prefix loc) <- try pname_ln
return $ IRI
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = ""
, iriQuery = ""
, iriFragment = ""
, prefixName = prefix
, abbrevPath = loc
, iriPos = nullRange
}
<|> do
loc <- pn_local
return $ IRI
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = ""
, iriQuery = ""
, iriFragment = ""
, prefixName = ""
, abbrevPath = loc
, iriPos = nullRange
}
data PNAME_LN = PName_Ln PNAME_NS PN_LOCAL deriving (Show, Eq, Ord)
type PNAME_NS = String
type PN_PREFIX = String
type PN_LOCAL = String
pname_ln :: GenParser Char st PNAME_LN
pname_ln = do
ns <- pname_ns
loc <- pn_local
return $ PName_Ln ns loc
pname_ns :: GenParser Char st PNAME_NS
pname_ns = do
char ':'
return ":"
<|> do
prefix <- pn_prefix
char ':'
return $ prefix ++ ":"
pn_prefix :: GenParser Char st PN_PREFIX
pn_prefix = do
c1 <- pn_chars_base
t <- (
do
s1 <- many (pn_chars <|> char '.')
if null s1 then return Nothing else case last s1 of
'.' -> fail "Last character in prefix must not be '.'"
_ -> return $ Just s1
<|> return Nothing
)
case t of
Just str -> return $ c1:str
Nothing -> return [c1]
pn_local :: GenParser Char st PN_LOCAL
pn_local = do
c1 <- (pn_chars_u <|> digit)
t <- (
do
s1 <- many (pn_chars <|> char '.')
if null s1 then return Nothing else case last s1 of
'.' -> fail "Last character in prefix must not be '.'"
_ -> return $ Just s1
<|> return Nothing
)
case t of
Just str -> return $ c1:str
Nothing -> return [c1]
{- END SPARQL -}
-- RFC3987, section 2.2
--
-- IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
--
-- ihier-part = "//" iauthority ipath-abempty
-- / ipath-absolute
-- / ipath-rootless
-- / ipath-empty
iri :: IRIParser st IRI
iri = iriWithPos $
do { us <- try uscheme
; (ua,up) <- ihierPart
; uq <- option "" ( do { char '?' ; uiquery } )
; uf <- option "" ( do { char '#' ; uifragment } )
; return $ IRI
{ iriScheme = us
, iriAuthority = ua
, iriPath = up
, iriQuery = uq
, iriFragment = uf
, prefixName = ""
, abbrevPath = ""
, iriPos = nullRange
}
}
ihierPart :: IRIParser st ((Maybe IRIAuth),String)
ihierPart =
do { try (string "//")
; ua <- uiauthority
; up <- ipathAbEmpty
; return (ua,up)
}
<|> ihierPartNoAuth
ihierPartNoAuth :: IRIParser st ((Maybe IRIAuth),String)
ihierPartNoAuth =
do { up <- ipathAbs
; return (Nothing, up)
}
<|> do { up <- ipathRootLess
; return (Nothing, up)
}
<|> do { return (Nothing, "")
}
-- RFC3986, section 3.1
uscheme :: IRIParser st String
uscheme =
do { s <- oneThenMany alphaChar (satisfy isSchemeChar)
; char ':'
; return $ s++":"
}
-- RFC3987, section 2.2
uiauthority :: IRIParser st (Maybe IRIAuth)
uiauthority =
do { uu <- option "" (try iuserinfo)
; uh <- ihost
; up <- option "" port
; return $ Just $ IRIAuth
{ iriUserInfo = uu
, iriRegName = uh
, iriPort = up
}
}
-- RFC3987, section 2.2
iuserinfo :: IRIParser st String
iuserinfo =
do { uu <- many (uchar ";:&=+$,")
; char '@'
; return (concat uu ++"@")
}
-- RFC3987, section 2.2
ihost :: IRIParser st String
ihost = ipLiteral <|> try ipv4address <|> iregName
ipLiteral :: IRIParser st String
ipLiteral =
do { char '['
; ua <- ( ipv6address <|> ipvFuture )
; char ']'
; return $ "[" ++ ua ++ "]"
}
<?> "IP address literal"
ipvFuture :: IRIParser st String
ipvFuture =
do { char 'v'
; h <- hexDigitChar
; char '.'
; a <- many1 (satisfy isIpvFutureChar)
; return $ 'c':h:'.':a
}
isIpvFutureChar :: Char -> Bool
isIpvFutureChar c = isUnreserved c || isSubDelims c || (c==';')
ipv6address :: IRIParser st String
ipv6address =
try ( do
{ a2 <- count 6 h4c
; a3 <- ls32
; return $ concat a2 ++ a3
} )
<|> try ( do
{ string "::"
; a2 <- count 5 h4c
; a3 <- ls32
; return $ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 0
; string "::"
; a2 <- count 4 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 1
; string "::"
; a2 <- count 3 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 2
; string "::"
; a2 <- count 2 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 3
; string "::"
; a2 <- h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 4
; string "::"
; a3 <- ls32
; return $ a1 ++ "::" ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 5
; string "::"
; a3 <- h4
; return $ a1 ++ "::" ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 6
; string "::"
; return $ a1 ++ "::"
} )
<?> "IPv6 address"
opt_n_h4c_h4 :: Int -> IRIParser st String
opt_n_h4c_h4 n = option "" $
do { a1 <- countMinMax 0 n h4c
; a2 <- h4
; return $ concat a1 ++ a2
}
ls32 :: IRIParser st String
ls32 = try ( do
{ a1 <- h4c
; a2 <- h4
; return (a1++a2)
} )
<|> ipv4address
h4c :: IRIParser st String
h4c = try $
do { a1 <- h4
; char ':'
; notFollowedBy (char ':')
; return $ a1 ++ ":"
}
h4 :: IRIParser st String
h4 = countMinMax 1 4 hexDigitChar
ipv4address :: IRIParser st String
ipv4address =
do { a1 <- decOctet ; char '.'
; a2 <- decOctet ; char '.'
; a3 <- decOctet ; char '.'
; a4 <- decOctet
; return $ a1++"."++a2++"."++a3++"."++a4
}
decOctet :: IRIParser st String
decOctet =
do { a1 <- countMinMax 1 3 digitChar
; if (read a1 :: Int) > 255 then
fail "Decimal octet value too large"
else
return a1
}
iregName :: IRIParser st String
iregName =
do { ss <- countMinMax 0 255 ( iunreservedChar <|> escaped <|> subDelims )
; return $ concat ss
}
<?> "Registered name"
-- RFC3986, section 3.2.3
port :: IRIParser st String
port =
do { char ':'
; p <- many digitChar
; return (':':p)
}
--
-- RFC3987, section 2.2
--
-- ipath = ipath-abempty ; begins with "/" or is empty
-- / ipath-absolute ; begins with "/" but not "//"
-- / ipath-noscheme ; begins with a non-colon isegment
-- / ipath-rootless ; begins with a isegment
-- / ipath-empty ; zero characters
--
-- ipath-abempty = *( "/" iisegment )
-- ipath-absolute = "/" [ iisegment-nz *( "/" iisegment ) ]
-- ipath-noscheme = iisegment-nz-nc *( "/" iisegment )
-- ipath-rootless = iisegment-nz *( "/" iisegment )
-- ipath-empty = 0<iipchar>
--
-- iisegment = *iipchar
-- iisegment-nz = 1*iipchar
-- iisegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
-- / "@" )
-- ; non-zero-length isegment without any colon ":"
--
-- iipchar = iunreserved / pct-encoded / sub-delims / ":"
-- / "@"
ipathAbEmpty :: IRIParser st String
ipathAbEmpty =
do { ss <- many slashIsegment
; return $ concat ss
}
ipathAbs :: IRIParser st String
ipathAbs =
do { char '/'
; ss <- option "" ipathRootLess
; return $ '/':ss
}
ipathRootLess :: IRIParser st String
ipathRootLess =
do { s1 <- isegmentNz
; ss <- many slashIsegment
; return $ concat (s1:ss)
}
ipathNoScheme :: IRIParser st String
ipathNoScheme =
do { s1 <- isegmentNzc
; ss <- many slashIsegment
; return $ concat (s1:ss)
}
slashIsegment :: IRIParser st String
slashIsegment =
do { char '/'
; s <- isegment
; return ('/':s)
}
isegment :: IRIParser st String
isegment =
do { ps <- many ipchar
; return $ concat ps
}
isegmentNz :: IRIParser st String
isegmentNz =
do { ps <- many1 ipchar
; return $ concat ps
}
isegmentNzc :: IRIParser st String
isegmentNzc =
do { ps <- many1 (uchar "@")
; return $ concat ps
}
ipchar :: IRIParser st String
ipchar = uchar ":@"
-- helper function for ipchar and friends
uchar :: String -> IRIParser st String
uchar extras =
iunreservedChar
<|> escaped
<|> subDelims
<|> do { c <- oneOf extras ; return [c] }
-- RFC3987, section 2.2
uiquery :: IRIParser st String
uiquery =
do { ss <- many iqueryPart
; return $ '?':concat ss
}
iqueryPart :: IRIParser st String
iqueryPart = (many1 iprivate) <|> (uchar $ ":@" ++ "/?")
-- RFC3987, section 2.2
uifragment :: IRIParser st String
uifragment =
do { ss <- many $ uchar (":@"++"/?")
; return $ '#':concat ss
}
-- Reference, Relative and Absolute IRI forms
--
-- RFC3987, section 2.2
iriReference :: IRIParser st IRI
iriReference = iri <|> irelativeRef
-- RFC3987, section 2.2
--
-- irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
--
-- irelative-part = "//" iauthority ipath-abempty
-- / ipath-absolute
irelativeRef :: IRIParser st IRI
irelativeRef = iriWithPos $
do { notMatching uscheme
; (ua,up) <- irelativePart
; uq <- option "" ( do { char '?' ; uiquery } )
; uf <- option "" ( do { char '#' ; uifragment } )
; return $ IRI
{ iriScheme = ""
, iriAuthority = ua
, iriPath = up
, iriQuery = uq
, iriFragment = uf
, prefixName = ""
, abbrevPath = ""
, iriPos = nullRange
}
}
irelativePart :: IRIParser st ((Maybe IRIAuth),String)
irelativePart =
do { try (string "//")
; ua <- uiauthority
; up <- ipathAbEmpty
; return (ua,up)
}
<|> do { up <- ipathAbs
; return (Nothing,up)
}
<|> do { up <- ipathNoScheme
; return (Nothing,up)
}
<|> do { return (Nothing,"")
}
-- RFC3987, section 2.2
absoluteIRI :: IRIParser st IRI
absoluteIRI = iriWithPos $
do { us <- uscheme
-- ; ua <- option Nothing ( do { try (string "//") ; uiauthority } )
-- ; up <- upath
; (ua,up) <- ihierPart
; uq <- option "" ( do { char '?' ; uiquery } )
; return $ IRI
{ iriScheme = us
, iriAuthority = ua
, iriPath = up
, iriQuery = uq
, iriFragment = ""
, prefixName = ""
, abbrevPath = ""
, iriPos = nullRange
}
}
-- Imports from RFC 2234
-- NOTE: can't use isAlphaNum etc. because these deal with ISO 8859
-- (and possibly Unicode!) chars.
-- [[[Above was a comment originally in GHC Network/IRI.hs:
-- when IRIs are introduced then most codepoints above 128(?) should
-- be treated as unreserved, and higher codepoints for letters should
-- certainly be allowed.
-- ]]]
isAlphaChar :: Char -> Bool
isAlphaChar c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
isDigitChar :: Char -> Bool
isDigitChar c = (c >= '0' && c <= '9')
isAlphaNumChar :: Char -> Bool
isAlphaNumChar c = isAlphaChar c || isDigitChar c
isUcsChar :: Char -> Bool
isUcsChar c =
let n = ord c
in (0xA0 <= n && n <= 0xD7FF) ||
(0x20000 <= n && n <= 0x2FFFD) ||
(0x30000 <= n && n <= 0x3FFFD) ||
(0x40000 <= n && n <= 0x4FFFD) ||
(0x50000 <= n && n <= 0x5FFFD) ||
(0x60000 <= n && n <= 0x6FFFD) ||
(0x70000 <= n && n <= 0x7FFFD) ||
(0x80000 <= n && n <= 0x8FFFD) ||
(0x90000 <= n && n <= 0x9FFFD) ||
(0xA0000 <= n && n <= 0xAFFFD) ||
(0xB0000 <= n && n <= 0xBFFFD) ||
(0xC0000 <= n && n <= 0xCFFFD) ||
(0xD0000 <= n && n <= 0xDFFFD) ||
(0xE0000 <= n && n <= 0xEFFFD)
isIprivate :: Char -> Bool
isIprivate c =
let n = ord c
in (0xE000 <= n && n <= 0xF8FF) ||
(0xF000 <= n && n <= 0xFFFD) ||
(0x100000 <= n && n <= 0x10FFFD)
isHexDigitChar :: Char -> Bool
isHexDigitChar c = isHexDigit c
isSchemeChar :: Char -> Bool
isSchemeChar c = (isAlphaNumChar c) || (c `elem` "+-.")
alphaChar :: IRIParser st Char
alphaChar = satisfy isAlphaChar -- or: Parsec.letter ?
digitChar :: IRIParser st Char
digitChar = satisfy isDigitChar -- or: Parsec.digit ?
hexDigitChar :: IRIParser st Char
hexDigitChar = satisfy isHexDigitChar -- or: Parsec.hexDigit ?
iprivate :: IRIParser st Char
iprivate = satisfy isIprivate
-- Additional parser combinators for common patterns
oneThenMany :: GenParser t s a -> GenParser t s a -> GenParser t s [a]
oneThenMany p1 pr =
do { a1 <- p1
; ar <- many pr
; return (a1:ar)
}
countMinMax :: Int -> Int -> GenParser t s a -> GenParser t s [a]
countMinMax m n p | m > 0 =
do { a1 <- p
; ar <- countMinMax (m-1) (n-1) p
; return (a1:ar)
}
countMinMax _ n _ | n <= 0 = return []
countMinMax _ n p = option [] $
do { a1 <- p
; ar <- countMinMax 0 (n-1) p
; return (a1:ar)
}
notMatching :: Show a => GenParser tok st a -> GenParser tok st ()
notMatching p = do { a <- try p ; unexpected (show a) } <|> return ()
------------------------------------------------------------
-- Reconstruct a IRI string
------------------------------------------------------------
--
-- |Turn an 'IRI' into a string.
--
-- Uses a supplied function to map the iuserinfo part of the IRI.
--
-- The Show instance for IRI uses a mapping that hides any password
-- that may be present in the IRI. Use this function with argument @id@
-- to preserve the password in the formatted output.
--
iriToString :: (String->String) -> IRI -> ShowS
iriToString iuserinfomap i@(IRI { iriScheme=scheme
, iriAuthority=authority
, iriPath=path
, iriQuery=query
, iriFragment=fragment
, prefixName=pname
, abbrevPath=aPath
}) = case iriType i of
Simple -> (aPath++)
Abbreviated -> (pname++) . (aPath++)
Full -> (scheme++) . (iriAuthToString iuserinfomap authority)
. (path++) . (query++) . (fragment++)
iriAuthToString :: (String->String) -> (Maybe IRIAuth) -> ShowS
iriAuthToString _ Nothing = id -- shows ""
iriAuthToString iuserinfomap
(Just IRIAuth { iriUserInfo = uinfo
, iriRegName = regname
, iriPort = port
} ) =
("//"++) . (if null uinfo then id else ((iuserinfomap uinfo)++))
. (regname++)
. (port++)
------------------------------------------------------------
-- Character classes
------------------------------------------------------------
-- | Returns 'True' if the character is allowed in a IRI.
--
isAllowedInIRI :: Char -> Bool
isAllowedInIRI c = isReserved c || isUnreserved c || c == '%' -- escape char
-- | Returns 'True' if the character is allowed unescaped in a IRI.
--
isUnescapedInIRI :: Char -> Bool
isUnescapedInIRI c = isReserved c || isUnreserved c
------------------------------------------------------------
-- Escape sequence handling
------------------------------------------------------------
-- |Escape character if supplied predicate is not satisfied,
-- otherwise return character as singleton string.
--
escapeIRIChar :: (Char->Bool) -> Char -> String
escapeIRIChar p c
| p c = [c]
| otherwise = '%' : myShowHex (ord c) ""
where
myShowHex :: Int -> ShowS
myShowHex n r = case showIntAtBase 16 (toChrHex) n r of
[] -> "00"
[c] -> ['0',c]
cs -> cs
toChrHex d
| d < 10 = chr (ord '0' + fromIntegral d)
| otherwise = chr (ord 'A' + fromIntegral (d - 10))
-- |Can be used to make a string valid for use in a IRI.
--
escapeIRIString
:: (Char->Bool) -- ^ a predicate which returns 'False'
-- if the character should be escaped
-> String -- ^ the string to process
-> String -- ^ the resulting IRI string
escapeIRIString p s = concatMap (escapeIRIChar p) s
-- |Turns all instances of escaped characters in the string back
-- into literal characters.
--
unEscapeString :: String -> String
unEscapeString [] = ""
unEscapeString ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
chr (digitToInt x1 * 16 + digitToInt x2) : unEscapeString s
unEscapeString (c:s) = c : unEscapeString s
------------------------------------------------------------
-- Resolving a relative IRI relative to a base IRI
------------------------------------------------------------
-- |Returns a new 'IRI' which represents the value of the
-- first 'IRI' interpreted as relative to the second 'IRI'.
-- For example:
--
-- > "foo" `relativeTo` "http://bar.org/" = "http://bar.org/foo"
-- > "http:foo" `nonStrictRelativeTo` "http://bar.org/" = "http://bar.org/foo"
--
-- Algorithm from RFC3986 [3], section 5.2.2
--
nonStrictRelativeTo :: IRI -> IRI -> Maybe IRI
nonStrictRelativeTo ref base = relativeTo ref' base
where
ref' = if iriScheme ref == iriScheme base
then ref { iriScheme="" }
else ref
isDefined :: ( MonadPlus m, Eq (m a) ) => m a -> Bool
isDefined a = a /= mzero
-- |Compute an absolute 'IRI' for a supplied IRI
-- relative to a given base.
relativeTo :: IRI -> IRI -> Maybe IRI
relativeTo ref base
| isDefined ( iriScheme ref ) =
just_isegments ref
| isDefined ( iriAuthority ref ) =
just_isegments ref { iriScheme = iriScheme base }
| isDefined ( iriPath ref ) =
if (head (iriPath ref) == '/') then
just_isegments ref
{ iriScheme = iriScheme base
, iriAuthority = iriAuthority base
}
else
just_isegments ref
{ iriScheme = iriScheme base
, iriAuthority = iriAuthority base
, iriPath = mergePaths base ref
}
| isDefined ( iriQuery ref ) =
just_isegments ref
{ iriScheme = iriScheme base
, iriAuthority = iriAuthority base
, iriPath = iriPath base
}
| otherwise =
just_isegments ref
{ iriScheme = iriScheme base
, iriAuthority = iriAuthority base
, iriPath = iriPath base
, iriQuery = iriQuery base
}
where
just_isegments u =
Just $ u { iriPath = removeDotSegments (iriPath u) }
mergePaths b r
| isDefined (iriAuthority b) && null pb = '/':pr
| otherwise = dropLast pb ++ pr
where
pb = iriPath b
pr = iriPath r
dropLast = fst . splitLast -- reverse . dropWhile (/='/') . reverse
-- Remove dot isegments, but protect leading '/' character
removeDotSegments :: String -> String
removeDotSegments ('/':ps) = '/':elimDots ps []
removeDotSegments ps = elimDots ps []
-- Second arg accumulates isegments processed so far in reverse order
elimDots :: String -> [String] -> String
elimDots [] [] = ""
elimDots [] rs = concat (reverse rs)
elimDots ( '.':'/':ps) rs = elimDots ps rs
elimDots ( '.':[] ) rs = elimDots [] rs
elimDots ( '.':'.':'/':ps) rs = elimDots ps (dropHead rs)
elimDots ( '.':'.':[] ) rs = elimDots [] (dropHead rs)
elimDots ps rs = elimDots ps1 (r:rs)
where
(r,ps1) = nextSegment ps
-- Return tail of non-null list, otherwise return null list
dropHead :: [a] -> [a]
dropHead [] = []
dropHead (_:rs) = rs
-- Returns the next isegment and the rest of the path from a path string.
-- Each isegment ends with the next '/' or the end of string.
--
nextSegment :: String -> (String,String)
nextSegment ps =
case break (=='/') ps of
(r,'/':ps1) -> (r++"/",ps1)
(r,_) -> (r,[])
-- Split last (name) isegment from path, returning (path,name)
splitLast :: String -> (String,String)
splitLast path = (reverse revpath,reverse revname)
where
(revname,revpath) = break (=='/') $ reverse path
------------------------------------------------------------
-- Finding a IRI relative to a base IRI
------------------------------------------------------------
-- |Returns a new 'IRI' which represents the relative location of
-- the first 'IRI' with respect to the second 'IRI'. Thus, the
-- values supplied are expected to be absolute IRIs, and the result
-- returned may be a relative IRI.
--
-- Example:
--
-- > "http://example.com/Root/sub1/name2#frag"
-- > `relativeFrom` "http://example.com/Root/sub2/name2#frag"
-- > == "../sub1/name2#frag"
--
-- There is no single correct implementation of this function,
-- but any acceptable implementation must satisfy the following:
--
-- > (uabs `relativeFrom` ubase) `relativeTo` ubase == uabs
--
-- For any valid absolute IRI.
-- (cf. <http://lists.w3.org/Archives/Public/iri/2003Jan/0008.html>
-- <http://lists.w3.org/Archives/Public/iri/2003Jan/0005.html>)
--
relativeFrom :: IRI -> IRI -> IRI
relativeFrom uabs base
| diff iriScheme uabs base = uabs
| diff iriAuthority uabs base = uabs { iriScheme = "" }
| diff iriPath uabs base = uabs
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = relPathFrom (removeBodyDotSegments $ iriPath uabs)
(removeBodyDotSegments $ iriPath base)
}
| diff iriQuery uabs base = uabs
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = ""
}
| otherwise = uabs -- Always carry fragment from uabs
{ iriScheme = ""
, iriAuthority = Nothing
, iriPath = ""
, iriQuery = ""
}
where
diff :: Eq b => (a -> b) -> a -> a -> Bool
diff sel u1 u2 = sel u1 /= sel u2
-- Remove dot isegments except the final isegment
removeBodyDotSegments p = removeDotSegments p1 ++ p2
where
(p1,p2) = splitLast p
relPathFrom :: String -> String -> String
relPathFrom [] _ = "/"
relPathFrom pabs [] = pabs
relPathFrom pabs base = -- Construct a relative path isegments
if sa1 == sb1 -- if the paths share a leading isegment
then if (sa1 == "/") -- other than a leading '/'
then if (sa2 == sb2)
then relPathFrom1 ra2 rb2
else pabs
else relPathFrom1 ra1 rb1
else pabs
where
(sa1,ra1) = nextSegment pabs
(sb1,rb1) = nextSegment base
(sa2,ra2) = nextSegment ra1
(sb2,rb2) = nextSegment rb1
-- relPathFrom1 strips off trailing names from the supplied paths,
-- and calls difPathFrom to find the relative path from base to
-- target
relPathFrom1 :: String -> String -> String
relPathFrom1 pabs base = relName
where
(sa,na) = splitLast pabs
(sb,nb) = splitLast base
rp = relSegsFrom sa sb
relName = if null rp then
if (na == nb) then ""
else if protect na then "./"++na
else na
else
rp++na
-- Precede name with some path if it is null or contains a ':'
protect na = null na || ':' `elem` na
-- relSegsFrom discards any common leading isegments from both paths,
-- then invokes difSegsFrom to calculate a relative path from the end
-- of the base path to the end of the target path.
-- The final name is handled separately, so this deals only with
-- "directory" segtments.
--
relSegsFrom :: String -> String -> String
relSegsFrom [] [] = "" -- paths are identical
relSegsFrom sabs base =
if sa1 == sb1
then relSegsFrom ra1 rb1
else difSegsFrom sabs base
where
(sa1,ra1) = nextSegment sabs
(sb1,rb1) = nextSegment base
-- difSegsFrom calculates a path difference from base to target,
-- not including the final name at the end of the path
-- (i.e. results always ends with '/')
--
-- This function operates under the invariant that the supplied
-- value of sabs is the desired path relative to the beginning of
-- base. Thus, when base is empty, the desired path has been found.
--
difSegsFrom :: String -> String -> String
difSegsFrom sabs "" = sabs
difSegsFrom sabs base = difSegsFrom ("../"++sabs) (snd $ nextSegment base)
------------------------------------------------------------
-- Other normalization functions
------------------------------------------------------------
-- |Case normalization; cf. RFC3986 section 6.2.2.1
-- NOTE: authority case normalization is not performed
--
normalizeCase :: String -> String
normalizeCase iristr = ncScheme iristr
where
ncScheme (':':cs) = ':':ncEscape cs
ncScheme (c:cs) | isSchemeChar c = toLower c:ncScheme cs
ncScheme _ = ncEscape iristr -- no scheme present
ncEscape ('%':h1:h2:cs) = '%':toUpper h1:toUpper h2:ncEscape cs
ncEscape (c:cs) = c:ncEscape cs
ncEscape [] = []
-- |Encoding normalization; cf. RFC3986 section 6.2.2.2
--
normalizeEscape :: String -> String
normalizeEscape ('%':h1:h2:cs)
| isHexDigit h1 && isHexDigit h2 && isUnreserved escval =
escval:normalizeEscape cs
where
escval = chr (digitToInt h1*16+digitToInt h2)
normalizeEscape (c:cs) = c:normalizeEscape cs
normalizeEscape [] = []
-- |Path isegment normalization; cf. RFC3986 section 6.2.2.4
--
normalizePathSegments :: String -> String
normalizePathSegments iristr = normstr jiri
where
jiri = parseIRI iristr
normstr Nothing = iristr
normstr (Just u) = show (normiri u)
normiri u = u { iriPath = removeDotSegments (iriPath u) }
-- FIX: where do the instances fit the best?
instance ShATermConvertible IRI where
toShATermAux att0 u = do
(att1, is) <- toShATerm' att0 ((iriToString id u) "")
return $ addATerm (ShAAppl "IRI" [is] []) att1
fromShATermAux ix att0 =
case getShATerm ix att0 of
x@(ShAAppl "IRI" [is] _) ->
case fromShATerm' is att0 of
(att1, is') ->
case parseIRIReference is' of --TODO apply most tolerating parser
Nothing ->
fromShATermError "IRI" x
Just i ->
(att1, i)
i -> fromShATermError "IRI" i
--------------------------------------------------------------------------------
--
-- Copyright (c) 2004, G. KLYNE. All rights reserved.
-- Distributed as free software under the following license.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
-- - Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- - Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- - Neither name of the copyright holders nor the names of its
-- contributors may be used to endorse or promote products derived from
-- this software without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-- HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-- OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
-- TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
-- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
--------------------------------------------------------------------------------