IRI.hs revision ea8e98e298f33f9362293f392c8fb192722b8904
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder{-# LANGUAGE CPP, DeriveDataTypeable #-}
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly--------------------------------------------------------------------------------
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Copyright : (c) 2004, Graham Klyne
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu Prodescu-- License : BSD-style (see end of this file)
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Maintainer : Eugen Kuksa <eugenk@informatik.uni-bremen.de>
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Stability : provisional
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- Portability : portable
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- This module defines functions for handling IRIs. It is substantially the
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- same as the Network.URI module by Graham Klyne, but is extended to IRI
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly-- support [2] and even Manchester-Syntax-IRI [3], [4].
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly-- Four methods are provided for parsing different
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- kinds of IRI string (as noted in [1], [2]):
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRI',
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRIReference',
e90dc723887d541f809007ae81c9bb73ced9592eChristian Maeder-- 'parseRelativeReference' and
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder-- 'parseAbsoluteIRI'.
56899f6457976a2ee20f6a23f088cb5655b15715Liam O'Reilly-- An addotional method is provided for parsing an abbreviated IRI according to [3], [4]:
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- 'parseIRIManchester'
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Further, four methods are provided for classifying different
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- kinds of IRI string (as noted in [1], [2]):
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- 'isIRIReference',
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- 'isRelativeReference' and
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- 'isAbsoluteIRI'.
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- Additionally, classification of full, abbreviated and simple IRI is provided
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- 'isIRIManchester'.
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- The Manchester-syntax [3], [4] provdies three different kinds of IRI: full,
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- abbreviated and simple. An existing element of type IRI can be classified in
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- one of those kinds with
036ecbd8f721096321f47cf6a354a9d1bf3d032fChristian Maeder-- Most of the code has been copied from the Network.URI implementation,
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- but it is extended to IRI and Manchester-syntax.
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- (3) <http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- (4) <http://www.w3.org/TR/2008/REC-rdf-sparql-query-20080115/>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder--------------------------------------------------------------------------------
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * The IRI type
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , IRIAuth(..)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , IRIType(..)
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly -- * Conversion
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly , simpleIdToIRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseIRIReference
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parseRelativeReference
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder , parseAbsoluteIRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , parseIRICurie
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , parseIRIManchester
fa373bc327620e08861294716b4454be8d25669fChristian Maeder -- * Test for strings containing various kinds of IRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRIReference
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isRelativeReference
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isAbsoluteIRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIRIManchester
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIPv6address
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , isIPv4address
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * Relative IRIs
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , nonStrictRelativeTo
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , relativeFrom
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly -- * Operations on IRI strings
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- | Support for putting strings into IRI-friendly
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- escaped format and getting them back again.
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder , iriToString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriToStringUnsecure
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , isReserved, isUnreserved
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , isAllowedInIRI, isUnescapedInIRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , escapeIRIChar
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , escapeIRIString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , unEscapeString
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder -- * Parser combinators, special additions to export list
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriReference
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , irelativeRef
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , absoluteIRI
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , iriManchester
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder -- * IRI Normalization functions
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , normalizeCase
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder , normalizeEscape
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder , normalizePathSegments
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder ( GenParser, ParseError
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , parse, (<|>), (<?>), try
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder , option, many, many1, count, notFollowedBy
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , char, satisfy, oneOf, string, digit, eof
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Control.Monad (MonadPlus(..))
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Numeric (showIntAtBase)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederimport Data.Typeable (Typeable)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder------------------------------------------------------------
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- The IRI datatype
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder------------------------------------------------------------
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- |Represents a general universal resource identifier using
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- its component parts.
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder-- For example, for the (full) IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- > foo://anonymous@www.haskell.org:42/ghc?query#frag
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- or the abbreviated IRI
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder-- > prefix:abbrevPath
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- or the simple IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- > abbrevPath
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- the components are:
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederdata IRI = IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder { iriScheme :: String -- ^ @foo:@
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriAuthority :: Maybe IRIAuth -- ^ @\/\/anonymous\@www.haskell.org:42@
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder , iriPath :: String -- ^ local part @\/ghc@
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder , iriQuery :: String -- ^ @?query@
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly , iriFragment :: String -- ^ @#frag@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , prefixName :: String -- ^ @prefix@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , abbrevPath :: String -- ^ @abbrevPath@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriPos :: Range
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly -- ^ prefix name part from "prefixName:path"
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly } deriving (Eq, Typeable, Ord)
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly-- |Type for authority value within a IRI
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reillydata IRIAuth = IRIAuth
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly { iriUserInfo :: String -- ^ @anonymous\@@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriRegName :: String -- ^ @www.haskell.org@
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly , iriPort :: String -- ^ @:42@
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly } deriving (Eq, Typeable, Ord, Show)
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reillydata IRIType = Full | Abbreviated | Simple
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly deriving (Eq, Show, Typeable, Ord)
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillynullIRI :: IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly { iriScheme = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriAuthority = Nothing
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriPath = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriQuery = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriFragment = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , prefixName = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , abbrevPath = ""
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly , iriPos = nullRange
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- |Returns Type of an IRI
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'ReillyiriType :: IRI -> IRIType
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly if (not.null) $ iriPath i then Full else
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly if null $ prefixName i then Simple else Abbreviated
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- IRI as instance of Show. Note that for secirity reasons, the default
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- behaviour is to suppress any iuserinfo field (see RFC3986, section 7.5).
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- This can be overridden by using iriToString directly with first
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- argument @id@ (noting that this returns a ShowS value rather than a string).
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly-- [[[Another design would be to embed the iuserinfo mapping function in
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- the IRIAuth value, with the default value suppressing iuserinfo formatting,
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- but providing a function to return a new IRI value with iuserinfo
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- data exposed by show.]]]
fa373bc327620e08861294716b4454be8d25669fChristian Maederinstance Show IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder showsPrec _ i = iriToString defaultUserInfoMap i
fa373bc327620e08861294716b4454be8d25669fChristian Maederinstance GetRange IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder getRange = iriPos
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maederinstance Pretty IRI where
fa373bc327620e08861294716b4454be8d25669fChristian Maeder pretty = text . show
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederdefaultUserInfoMap :: String -> String
fa373bc327620e08861294716b4454be8d25669fChristian MaederdefaultUserInfoMap uinf = user++newpass
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder (user,pass) = break (==':') uinf
fa373bc327620e08861294716b4454be8d25669fChristian Maeder newpass = if null pass || (pass == "@")
fa373bc327620e08861294716b4454be8d25669fChristian Maeder || (pass == ":@")
fa373bc327620e08861294716b4454be8d25669fChristian MaedeririToStringUnsecure :: IRI -> String
fa373bc327620e08861294716b4454be8d25669fChristian MaedeririToStringUnsecure i = (iriToString id i) ""
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- |Converts a Simple_ID to an IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaedersimpleIdToIRI :: SIMPLE_ID -> IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaedersimpleIdToIRI sid = nullIRI { abbrevPath = tokStr sid
fa373bc327620e08861294716b4454be8d25669fChristian Maeder , iriPos = tokPos sid
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder------------------------------------------------------------
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- Parse a IRI
fa373bc327620e08861294716b4454be8d25669fChristian Maeder------------------------------------------------------------
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- |Turn a string containing an RFC3987 IRI into an 'IRI'.
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- (an absolute IRI with optional fragment identifier).
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRI :: String -> Maybe IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRI = parseIRIAny iri
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Parse a IRI reference to an 'IRI' value.
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- Returns 'Nothing' if the string is not a valid IRI reference.
fa373bc327620e08861294716b4454be8d25669fChristian Maeder-- (an absolute or relative IRI with optional fragment identifier).
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRIReference :: String -> Maybe IRI
fa373bc327620e08861294716b4454be8d25669fChristian MaederparseIRIReference = parseIRIAny iriReference
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Parse a relative IRI to an 'IRI' value.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid relative IRI.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (a relative IRI with optional fragment identifier).
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseRelativeReference :: String -> Maybe IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseRelativeReference = parseIRIAny irelativeRef
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Parse an absolute IRI to an 'IRI' value.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid absolute IRI.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (an absolute IRI without a fragment identifier).
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseAbsoluteIRI :: String -> Maybe IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederparseAbsoluteIRI = parseIRIAny absoluteIRI
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder-- |Turn a string containing a CURIE into an 'IRI'
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseCurie :: String -> Maybe IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseCurie = parseIRIAny curie
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Turn a string containing an IRI or a CURIE into an 'IRI'.
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- or a CURIE).
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian MaederparseIRICurie :: String -> Maybe IRI
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian MaederparseIRICurie = parseIRIAny iriCurie
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- |Turn a string containing an IRI (by Manchester-syntax) into an 'IRI'.
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder-- Returns 'Nothing' if the string is not a valid IRI;
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier,
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian Maeder-- an abbreviated IRI or a simple IRI).
ace03c3051e5c5144e43ae78cae73f6a29dde6d5Christian MaederparseIRIManchester :: String -> Maybe IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian MaederparseIRIManchester = parseIRIAny iriManchester
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Test if string contains a valid IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (an absolute IRI with optional fragment identifier).
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRI :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRI = isValidParse iri
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Test if string contains a valid IRI reference
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- (an absolute or relative IRI with optional fragment identifier).
8e9c3881fb6e710b1e08bf5ac8ff9d393df2e74eChristian MaederisIRIReference :: String -> Bool
8e9c3881fb6e710b1e08bf5ac8ff9d393df2e74eChristian MaederisIRIReference = isValidParse iriReference
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- |Test if string contains a valid relative IRI
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder-- (a relative IRI with optional fragment identifier).
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisRelativeReference :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisRelativeReference = isValidParse irelativeRef
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- |Test if string contains a valid absolute IRI
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- (an absolute IRI without a fragment identifier).
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederisAbsoluteIRI :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisAbsoluteIRI = isValidParse absoluteIRI
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid IRI or CURIE
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder-- or a CURIE).
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian MaederisIRICurie :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisIRICurie = isValidParse iriCurie
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid CURIE
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisCurie :: String -> Bool
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian MaederisCurie = isValidParse curie
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- |Test if string contains a valid IRI by Manchester-syntax
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- (an absolute IRI enclosed in '<' and '>' with optional fragment identifier,
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder-- an abbreviated IRI or a simple IRI).
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIRIManchester :: String -> Bool
bcd914850de931848b86d7728192a149f9c0108bChristian MaederisIRIManchester = isValidParse iriManchester
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- |Test if string contains a valid IPv6 address
bcd914850de931848b86d7728192a149f9c0108bChristian MaederisIPv6address :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIPv6address = isValidParse ipv6address
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- |Test if string contains a valid IPv4 address
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian MaederisIPv4address :: String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisIPv4address = isValidParse ipv4address
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Helper function for turning a string into a IRI
bcd914850de931848b86d7728192a149f9c0108bChristian MaederparseIRIAny :: IRIParserDirect IRI -> String -> Maybe IRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseIRIAny parser iristr = case parseAll parser "" iristr of
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly Left _ -> Nothing
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly Right u -> Just u
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Helper function to test a string match to a parser
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisValidParse :: IRIParserDirect a -> String -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisValidParse parser iristr = case parseAll parser "" iristr of
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly Left _ -> False
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly Right _ -> True
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseAll :: IRIParserDirect a -> String -> String -> Either ParseError a
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyparseAll parser filename iristr = parse newparser filename iristr
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly do { res <- parser
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly------------------------------------------------------------
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- IRI parser body based on Parsec elements and combinators
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly------------------------------------------------------------
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Parser parser type.
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillytype IRIParserDirect a = GenParser Char () a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillytype IRIParser st a = GenParser Char st a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- RFC3986, section 2.1
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- Parse and return a 'pct-encoded' sequence
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reillyescaped :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly do { char '%'
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; h1 <- hexDigitChar
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; h2 <- hexDigitChar
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly ; return $ ['%',h1,h2]
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- RFC3986, section 2.2
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder-- |Returns 'True' if the character is a \"reserved\" character in a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- IRI. To include a literal instance of one of these characters in a
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- component of a IRI, it must be escaped.
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisReserved :: Char -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisReserved c = isGenDelims c || isSubDelims c
dd7da1b5fedc05b92ba023ebd803e6f4a662503bChristian MaederisGenDelims :: Char -> Bool
dd7da1b5fedc05b92ba023ebd803e6f4a662503bChristian MaederisGenDelims c = c `elem` ":/?#[]@"
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisSubDelims :: Char -> Bool
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'ReillyisSubDelims c = c `elem` "!$&'()*+,;="
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillysubDelims :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillysubDelims = do { c <- satisfy isSubDelims ; return [c] }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder-- RFC3986, section 2.3
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Returns 'True' if the character is an \"unreserved\" character in
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- a IRI. These characters do not need to be escaped in a IRI. The
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- only characters allowed in a IRI are either \"reserved\",
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- \"unreserved\", or an escape sequence (@%@ followed by two hex digits).
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisUnreserved :: Char -> Bool
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyisUnreserved c = isAlphaNumChar c || (c `elem` "-_.~") || (isUcsChar c)
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiunreservedChar :: IRIParser st String
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiunreservedChar = do { c <- satisfy isUnreserved ; return [c] }
648fe1220044aac847acbdfbc4155af5556063ebChristian MaedeririWithPos :: IRIParser st IRI -> IRIParser st IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian MaedeririWithPos parser = do
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ i {iriPos = appRange (Range [p,q]) $ iriPos i}
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly{- BEGIN CURIE -}
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder-- |Parses an absolute IRI enclosed in '<', '>' or a CURIE
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian MaedeririCurie :: IRIParser st IRI
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- |Parses an absolute or relative IRI enclosed in '<', '>' or a CURIE
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly-- see @iriReference@
33bdce26495121cdbce30331ef90a1969126a840Liam O'ReillyiriReferenceCurie :: IRIParser st IRI
bcd914850de931848b86d7728192a149f9c0108bChristian MaedeririReferenceCurie = do
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- iri <|> irelativeRef
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly-- |Parses a CURIE
648fe1220044aac847acbdfbc4155af5556063ebChristian Maedercurie :: IRIParser st IRI
648fe1220044aac847acbdfbc4155af5556063ebChristian Maedercurie = iriWithPos $ do
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder c <- string ":"
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- reference
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ i { prefixName = c }
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly pn <- try (do
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder c <- string ":"
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return $ n++c
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder i <- reference
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly return $ i { prefixName = pn }
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder r <- reference
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reillyreference :: IRIParser st IRI
05cc55892e6c93bdd7b9c3f100ab1bb65fe6a21eLiam O'Reillyreference = iriWithPos $ do
-- http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName
-- fullIRI := an IRI as defined in [RFC 3987], enclosed in a pair of < (U+3C) and > (U+3E) characters
-- prefixName := a finite sequence of characters matching the PNAME_NS production of [SPARQL] and not matching any of the keyword terminals of the syntax
-- simpleIRI := a finite sequence of characters matching the PN_LOCAL production of [SPARQL] and not matching any of the keyword terminals of the syntax
-- [[[Above was a comment originally in GHC Network/IRI.hs:
alphaChar = satisfy isAlphaChar -- or: Parsec.letter ?
digitChar = satisfy isDigitChar -- or: Parsec.digit ?
hexDigitChar = satisfy isHexDigitChar -- or: Parsec.hexDigit ?
-- > "http://example.com/Root/sub1/name2#frag"
-- > `relativeFrom` "http://example.com/Root/sub2/name2#frag"
-- > == "../sub1/name2#frag"
-- (i.e. results always ends with '/')
-- documentation and/or other materials provided with the distribution.