Here's some Haskell code that does the job. I won't claim that it will parse any HTML, but it should be adequate. Also, my code does not check for legal HTML. That's your job. I did a little testing, but there may be bugs in it. If you find any or have any comments/suggestions just post a reply.
Main.hs
Code:
module Main (
) where
import Prelude hiding ((.))
import System.Environment (getArgs)
import ApplicativeParsec
import Control.Monad.State
import Data.List (intercalate)
import Data.Char (toLower, toUpper)
-----------------------------------------------------------
main :: IO ()
main = do
args <- getArgs
case args of
[] -> putStrLn "use: htmlToDom inputFile [outputFile]"
file : dest -> let
out = case dest of
[] -> putStrLn
file' : _ -> writeFile file'
in readFile file >>= out . either show show . parse (spaces *> tag) ""
-----------------------------------------------------------
infixr 9 .
(.) :: (Functor f) => (a -> b) -> f a -> f b
(.) = fmap
mapFst :: (a -> b) -> (a, c) -> (b, c)
mapFst f (x, y) = (f x, y)
compose :: [a -> a] -> (a -> a)
compose = foldr (.) id
-----------------------------------------------------------
type Name = String
type Attribute = (Name, String)
type Text = String
data Tag = Tag Name [Attribute] [Either Tag Text]
instance Show Tag where
showsPrec _ tag = snd $ evalState (showsTag tag) 0
where
ss = showString
showsTag (Tag name attrs cs) = do
num <- get
modify (+1)
(cIdents, innerDefs) <- unzip . mapM showsChild cs
let ident = '$' : show num
decl = ss "var " . ss ident . ss " = document.createElement(" . shows name . ss ");\n"
appendChildren = compose $ map (
\cIdent -> let
arg = either ss (\txt -> ss "document.createTextNode(" . shows txt . ss ")") cIdent
in ss ident . ss ".appendChild(" . arg . ss ");\n"
) cIdents
setAttrs = compose $ map (
\(name, val) -> ss ident . ss ".setAttribute(" . shows name . ss ", " . shows val . ss ");\n"
) attrs
return (ident, compose innerDefs . decl . setAttrs . appendChildren)
showsChild (Left tag) = mapFst Left . showsTag tag
showsChild (Right txt) = return (Right txt, id)
-----------------------------------------------------------
ws :: [Char]
ws = " \v\f\t\r\n"
optionalWs :: CharParser st String
optionalWs = option "" $ (: []) . oneOf ws
ichar :: Char -> CharParser st Char
ichar c = oneOf [toUpper c, toLower c]
istring :: String -> CharParser st String
istring = mapM ichar
brackets :: CharParser st a -> CharParser st a
brackets p = char '<' *> p <* char '>'
ident :: CharParser st Name
ident = many1 alphaNum <* spaces
tag :: CharParser () Tag
tag = do
(tagName, attrs, slash) <- brackets $ liftM3 (,,) ident attrs $ not . null . option "" (string "/")
let mkTag = return . Tag tagName attrs
if slash
then mkTag []
else (children >>= mkTag) <* istring tagName <* char '>'
where
children = manyTill (Left . tag <|> Right . text) $ try $ string "</"
text :: CharParser st Text
text = do
space1 <- optionalWs
spaces
text <- intercalate " " . sepBy (many (noneOf $ '<' : ws)) (many1 space)
space2 <- optionalWs
spaces
return $ space1 ++ text ++ space2
attr :: CharParser st Attribute
attr = do
name <- many1 alphaNum
char '='
quote <- oneOf "'\""
value <- manyTill anyChar $ char quote
return (name, value)
attrs :: CharParser st [Attribute]
attrs = sepEndBy (try attr) (many1 space)
ApplicativeParsec.hs
Code:
module ApplicativeParsec (
module Control.Applicative
, module Text.ParserCombinators.Parsec
) where
import Control.Applicative
import Control.Monad (MonadPlus(..), ap)
import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))
instance Applicative (GenParser s a) where
pure = return
(<*>) = ap
instance Alternative (GenParser s a) where
empty = mzero
(<|>) = mplus
Bookmarks