mirror of
https://github.com/jgm/pandoc.git
synced 2025-04-22 04:42:51 +03:00
235 lines
7.8 KiB
Haskell
235 lines
7.8 KiB
Haskell
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{- |
|
|
Module : Text.Pandoc.XML.Light.Output
|
|
Copyright : Copyright (C) 2007 Galois, Inc., 2021-2024 John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
This code is based on code from xml-light, released under the BSD3 license.
|
|
We use a text Builder instead of ShowS.
|
|
-}
|
|
module Text.Pandoc.XML.Light.Output
|
|
( -- * Replacement for xml-light's Text.XML.Output
|
|
ppTopElement
|
|
, ppElement
|
|
, ppContent
|
|
, ppcElement
|
|
, ppcContent
|
|
, showTopElement
|
|
, showElement
|
|
, showContent
|
|
, useShortEmptyTags
|
|
, defaultConfigPP
|
|
, ConfigPP(..)
|
|
) where
|
|
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.Lazy as TL
|
|
import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
|
|
import Text.Pandoc.XML.Light.Types
|
|
|
|
--
|
|
-- duplicates functinos from Text.XML.Output
|
|
--
|
|
|
|
-- | The XML 1.0 header
|
|
xmlHeader :: Text
|
|
xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
data ConfigPP = ConfigPP
|
|
{ shortEmptyTag :: QName -> Bool
|
|
, prettify :: Bool
|
|
}
|
|
|
|
-- | Default pretty orinting configuration.
|
|
-- * Always use abbreviate empty tags.
|
|
defaultConfigPP :: ConfigPP
|
|
defaultConfigPP = ConfigPP { shortEmptyTag = const True
|
|
, prettify = False
|
|
}
|
|
|
|
-- | The predicate specifies for which empty tags we should use XML's
|
|
-- abbreviated notation <TAG />. This is useful if we are working with
|
|
-- some XML-ish standards (such as certain versions of HTML) where some
|
|
-- empty tags should always be displayed in the <TAG></TAG> form.
|
|
useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
|
|
useShortEmptyTags p c = c { shortEmptyTag = p }
|
|
|
|
|
|
-- | Specify if we should use extra white-space to make document more readable.
|
|
-- WARNING: This adds additional white-space to text elements,
|
|
-- and so it may change the meaning of the document.
|
|
useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
|
|
useExtraWhiteSpace p c = c { prettify = p }
|
|
|
|
-- | A configuration that tries to make things pretty
|
|
-- (possibly at the cost of changing the semantics a bit
|
|
-- through adding white space.)
|
|
prettyConfigPP :: ConfigPP
|
|
prettyConfigPP = useExtraWhiteSpace True defaultConfigPP
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
|
|
-- | Pretty printing renders XML documents faithfully,
|
|
-- with the exception that whitespace may be added\/removed
|
|
-- in non-verbatim character data.
|
|
ppTopElement :: Element -> Text
|
|
ppTopElement = ppcTopElement prettyConfigPP
|
|
|
|
-- | Pretty printing elements
|
|
ppElement :: Element -> Text
|
|
ppElement = ppcElement prettyConfigPP
|
|
|
|
-- | Pretty printing content
|
|
ppContent :: Content -> Text
|
|
ppContent = ppcContent prettyConfigPP
|
|
|
|
-- | Pretty printing renders XML documents faithfully,
|
|
-- with the exception that whitespace may be added\/removed
|
|
-- in non-verbatim character data.
|
|
ppcTopElement :: ConfigPP -> Element -> Text
|
|
ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e]
|
|
|
|
-- | Pretty printing elements
|
|
ppcElement :: ConfigPP -> Element -> Text
|
|
ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty
|
|
|
|
-- | Pretty printing content
|
|
ppcContent :: ConfigPP -> Content -> Text
|
|
ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty
|
|
|
|
ppcCData :: ConfigPP -> CData -> Text
|
|
ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty
|
|
|
|
type Indent = Builder
|
|
|
|
-- | Pretty printing content using ShowT
|
|
ppContentS :: ConfigPP -> Indent -> Content -> Builder
|
|
ppContentS c i x = case x of
|
|
Elem e -> ppElementS c i e
|
|
Text t -> ppCDataS c i t
|
|
CRef r -> showCRefS r
|
|
|
|
ppElementS :: ConfigPP -> Indent -> Element -> Builder
|
|
ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
|
|
(case elContent e of
|
|
[] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
|
|
| shortEmptyTag c name -> fromText " />"
|
|
[Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
|
|
cs -> singleton '>' <> nl <>
|
|
mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
|
|
i <> tagEnd name
|
|
where (nl,sp) = if prettify c then ("\n"," ") else ("","")
|
|
)
|
|
where name = elName e
|
|
|
|
ppCDataS :: ConfigPP -> Indent -> CData -> Builder
|
|
ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c)
|
|
then showCDataS t
|
|
else foldr cons mempty (T.unpack (showCData t))
|
|
where cons :: Char -> Builder -> Builder
|
|
cons '\n' ys = singleton '\n' <> i <> ys
|
|
cons y ys = singleton y <> ys
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- | Adds the <?xml?> header.
|
|
showTopElement :: Element -> Text
|
|
showTopElement c = xmlHeader <> showElement c
|
|
|
|
showContent :: Content -> Text
|
|
showContent = ppcContent defaultConfigPP
|
|
|
|
showElement :: Element -> Text
|
|
showElement = ppcElement defaultConfigPP
|
|
|
|
showCData :: CData -> Text
|
|
showCData = ppcCData defaultConfigPP
|
|
|
|
-- Note: crefs should not contain '&', ';', etc.
|
|
showCRefS :: Text -> Builder
|
|
showCRefS r = singleton '&' <> fromText r <> singleton ';'
|
|
|
|
-- | Convert a text element to characters.
|
|
showCDataS :: CData -> Builder
|
|
showCDataS cd =
|
|
case cdVerbatim cd of
|
|
CDataText -> escStr (cdData cd)
|
|
CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
|
|
fromText "]]>"
|
|
CDataRaw -> fromText (cdData cd)
|
|
|
|
--------------------------------------------------------------------------------
|
|
escCData :: Text -> Builder
|
|
escCData t
|
|
| "]]>" `T.isPrefixOf` t =
|
|
fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
|
|
escCData t
|
|
= case T.uncons t of
|
|
Nothing -> mempty
|
|
Just (c,t') -> singleton c <> escCData t'
|
|
|
|
escChar :: Char -> Builder
|
|
escChar c = case c of
|
|
'<' -> fromText "<"
|
|
'>' -> fromText ">"
|
|
'&' -> fromText "&"
|
|
'"' -> fromText """
|
|
-- we use ' instead of ' because IE apparently has difficulties
|
|
-- rendering ' in xhtml.
|
|
-- Reported by Rohan Drape <rohan.drape@gmail.com>.
|
|
'\'' -> fromText "'"
|
|
_ -> singleton c
|
|
|
|
{- original xml-light version:
|
|
-- NOTE: We escape '\r' explicitly because otherwise they get lost
|
|
-- when parsed back in because of then end-of-line normalization rules.
|
|
_ | isPrint c || c == '\n' -> singleton c
|
|
| otherwise -> showText "&#" . showsT oc . singleton ';'
|
|
where oc = ord c
|
|
-}
|
|
|
|
escStr :: Text -> Builder
|
|
escStr cs = if T.any needsEscape cs
|
|
then mconcat (map escChar (T.unpack cs))
|
|
else fromText cs
|
|
where
|
|
needsEscape '<' = True
|
|
needsEscape '>' = True
|
|
needsEscape '&' = True
|
|
needsEscape '"' = True
|
|
needsEscape '\'' = True
|
|
needsEscape _ = False
|
|
|
|
tagEnd :: QName -> Builder
|
|
tagEnd qn = fromText "</" <> showQName qn <> singleton '>'
|
|
|
|
tagStart :: QName -> [Attr] -> Builder
|
|
tagStart qn as = singleton '<' <> showQName qn <> as_str
|
|
where as_str = if null as
|
|
then mempty
|
|
else mconcat (map showAttr as)
|
|
|
|
showAttr :: Attr -> Builder
|
|
showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
|
|
singleton '=' <>
|
|
singleton '"' <> escStr v <> singleton '"'
|
|
|
|
showQName :: QName -> Builder
|
|
showQName q =
|
|
case qPrefix q of
|
|
Nothing -> fromText (qName q)
|
|
Just p -> fromText p <> singleton ':' <> fromText (qName q)
|