||----------------------------------------------------------------------||
||                                                                      ||
||	Section 7.5: Creating an index					||
||                                                                      ||
||	(c) Simon Thompson, 1995.					||
||                                                                      ||
||----------------------------------------------------------------------||


||----------------------------------------------------------------------|| 
||	 The goal of this section; defining an index automatically.	||
||----------------------------------------------------------------------|| 

makeIndex :: doc -> [ ([num],word) ]

||----------------------------------------------------------------------|| 
||	Type synonyms used.						||
||----------------------------------------------------------------------|| 

doc  == string
line == string
word == string

||----------------------------------------------------------------------|| 
||	The function is a composition of the following components.	||
||----------------------------------------------------------------------|| 

makeIndex
  = shorten .        ||   [([num],word)] -> [([num],word)]
    amalgamate .     ||   [([num],word)] -> [([num],word)]
    makeLists .      ||   [(num,word)]   -> [([num],word)]
    sortLs .         ||   [(num,word)]   -> [(num,word)]
    allNumWords .    ||   [(num,line)]   -> [(num,word)]
    numLines .       ||   [line]         -> [(num,line)] 
    splitup          ||   doc            -> [line]

||----------------------------------------------------------------------|| 
||	Split a document into a list of lines.				||
||									||
||	Its definition is an exercise.					||
||----------------------------------------------------------------------|| 

splitup :: doc -> [line]

||----------------------------------------------------------------------|| 
||	Pair each line with its line number.				||
||----------------------------------------------------------------------|| 

numLines :: [line] -> [ ( num , line ) ]

numLines linels
  = zip2 [1..(#linels)] linels

||----------------------------------------------------------------------|| 
||	Split the line component into words.				||
||----------------------------------------------------------------------|| 

numWords :: ( num , line ) -> [ ( num , word ) ]

numWords (number , lin)
  = map addLineNo (splitWords lin)
    where
    addLineNo wd = (number,wd)

||----------------------------------------------------------------------|| 
||	Include the definition of splitWords				||
||----------------------------------------------------------------------|| 

%include "../Chapter4/Section4-7" -word -line

||----------------------------------------------------------------------|| 
||	Split every line component into words, and join the results.	||
||----------------------------------------------------------------------|| 

allNumWords :: [ ( num , line ) ] -> [ ( num , word ) ]

allNumWords = concat . map numWords

||----------------------------------------------------------------------|| 
||	Example text.							||
||----------------------------------------------------------------------|| 

exampleText = "cat dog\nbat dog\ncat"

||----------------------------------------------------------------------|| 
||	Comparing word , line number pairs.				||
||	First look at the word ordering; two pairs with the same	||
||	word component have their numeric parts compared.		||
||----------------------------------------------------------------------|| 

compare :: ( num , word ) -> ( num , word ) -> bool
compare ( n1 , w1 ) ( n2 , w2 )
  = w1 < w2 \/ ( w1 = w2 & n1 < n2 )

||----------------------------------------------------------------------|| 
||	Sort the list of word , line number pairs using the ordering	||
||	given by compare.						||
||----------------------------------------------------------------------|| 

sortLs :: [ ( num , word ) ] -> [ ( num , word ) ]

sortLs []    = []
sortLs (a:x)
  = sortLs smaller ++ [a] ++ sortLs larger
    where
    smaller = [ b | b<-x ; compare b a ]
    larger  = [ b | b<-x ; compare a b ]

||----------------------------------------------------------------------|| 
||	Make the number part of a pair into a list with one element.	||
||----------------------------------------------------------------------|| 

makeLists ::  [ (num,word) ] -> [ ([num],word) ]

makeLists 
  = map mklis 
    where
    mklis ( n , st ) = ( [n] , st )

||----------------------------------------------------------------------|| 
||	Amalgamate entries containing the same word together.		||
||----------------------------------------------------------------------|| 

amalgamate [] = []
amalgamate [a] = [a]
amalgamate ((l1,w1):(l2,w2):rest)
  = (l1,w1) : amalgamate ((l2,w2):rest)  , if w1 ~= w2
  = amalgamate ((l1++l2,w1):rest)        , otherwise

examAmalgamate = amalgamate [ ([2],"bat") , ([1],"cat") , ([3],"cat") ]

||----------------------------------------------------------------------|| 
||	Remove words of less than five letters.				||
||----------------------------------------------------------------------|| 

shorten = filter sizer 
          where
          sizer (nl,wd) = #wd > 4

||----------------------------------------------------------------------|| 
||	Erroneous definition of amalgamate.				||
||----------------------------------------------------------------------|| 

amalgamate' ((l1,w1):(l2,w2):rest)
  = (l1,w1) : amalgamate' ((l2,w2):rest)  , if w1 ~= w2
  = (l1++l2,w1) : amalgamate' rest        , otherwise

||----------------------------------------------------------------------|| 
||	Type declaration of exercise function.				||
||----------------------------------------------------------------------|| 

printIndex :: [ ([num],word) ] -> string



