Difference between revisions of "HXT/Practical/Ebay1"

From HaskellWiki
< HXT‎ | Practical
Jump to: navigation, search
m
Line 3: Line 3:
 
module Main where
 
module Main where
 
 
import Text.XML.HXT.Arrow hiding (deep)
+
import Text.XML.HXT.Core hiding (deep)
 
import Data.List (nub,sort,isPrefixOf,transpose,groupBy)
 
import Data.List (nub,sort,isPrefixOf,transpose,groupBy)
   
Line 32: Line 32:
 
dst = "feedback.report.html"
 
dst = "feedback.report.html"
   
unicoding= (a_encoding, unicodeString)
 
  +
main = runX ( readDocument [ withParseHTML yes
nowarnings = (a_issue_warnings,v_0)
 
  +
, withInputEncoding unicodeString
 
  +
, withWarnings no
main = runX ( readDocument [(a_parse_html, v_1),unicoding,nowarnings] src
 
  +
] src
 
>>> root [] [deep getFeedbackAndValue]
 
>>> root [] [deep getFeedbackAndValue]
>>> writeDocument [(a_indent,v_1),unicoding,nowarnings] dst
+
>>> writeDocument
  +
[ withIndent yes
  +
, withOutputEncoding unicodeString
  +
] dst
 
)
 
)
 
</hask>
 
</hask>

Revision as of 17:54, 24 January 2011

{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-} module Main where import Text.XML.HXT.Core hiding (deep) import Data.List (nub,sort,isPrefixOf,transpose,groupBy) deep f = f `orElse` (getChildren >>> deep f) -- deep redefinition to allow a broader signature split "" = [] split xs = a : split (drop 1 b) where (a,b) = break (=='/') xs through = (getChildren >>>) . foldr1 (/>). map hasName . split -- contains = (getChildren >>>). foldr1 (</). (map hasName) mkReport = mkelem "p" [] . map constA {- The datas we are munging is unstructured - Every feedback is spanned on two contigous rows of a big table - We cannot catch the all data in a match, so we use listA to have the two single-row lists - and then zip them to rebuild the data. -} getFeedbackAndValue = hasName "table" >>> hasAttrValue "class" (=="fbOuter") /> hasName "tbody" >>> proc table -> do feedbacks <- listA (through "tr/td/img") -< table values <- listA (through "tr/td" /> hasText (isPrefixOf "EUR")) -< table catA (map mkReport $ transpose [values,feedbacks]) -<< () src = "feedback.example.html" dst = "feedback.report.html" main = runX ( readDocument [ withParseHTML yes , withInputEncoding unicodeString , withWarnings no ] src >>> root [] [deep getFeedbackAndValue] >>> writeDocument [ withIndent yes , withOutputEncoding unicodeString ] dst )

Note that the use of groupBy in defining 'split' abuses the implementation details of 'groupBy' which are not guaranteed by its definition in the Haskell 98 standard report.