Erlang Programming Exercise: 3-9
In exercise three-nine you are to read some text into a raw document (list of lines) and then into a document (list of words). With this data you generate an index for the words to which lines they were on:
{ "Erlang", [1,1,2,4,5,6,6,98,100,102,102] }
After you have the word to line index, pretty print the index such that the example above would result in:
"Erlang 1-2,4-6,98,100,102"
I decided to start using more of the library functions in order to become more familiar with them. I used the lists/foldr function more than anything else since it lets you visit every element in a list and maintain an accumulator. It was particularly useful in situations where I didn't want to do any pattern matching on the argument, like when I broke the lines into words. It was not useful when I tried to generate the tuples needed for the pretty printed line numbers, this could have been done using a switch statement but I found the code more elegant using the recursive pattern matching.
Instead of copy/pasting a chunk of text into erl every time I wanted to test it, I made the indexer load a file from disk. I also aligned the line numbers, having them all start on the same column made a big difference visually. Here's an example run:
1> threenine:index("./threenine.erl").
0 70
1 2, 30, 32, 37, 41, 57, 80
2 64
binary 77
bs 71
case 5, 22, 37
cleannumbers 46, 51-60
close 8, 14
concat 87-88
...
updatedlinenumbers 46-47
updatedwordindex 41-42
updatewordindexlinenumbers 17, 44
when 57
Here is the code:
-module(threenine).
-export([index/1]).
index(Filename) ->
case file:open(Filename, read) of
{ok, IoDevice} ->
indexFile(IoDevice),
file:close(IoDevice);
{error, Reason} -> {error, Reason}
end.
indexFile(IoDevice) ->
RawDocument = loadRawDocument([], IoDevice),
file:close(IoDevice),
Document = loadDocument(RawDocument),
WordIndex1 = indexWords(Document),
WordIndex2 = updateWordIndexLineNumbers(WordIndex1),
prettyPrintWordIndex(WordIndex2),
ok.
loadRawDocument(Lines, IoDevice) ->
case file:read_line(IoDevice) of
{ok, Line} -> loadRawDocument([Line|Lines], IoDevice);
eof -> lists:reverse(Lines)
end.
loadDocument(RawDocument) ->
MakeDoc = fun(Line, {LineNo, Document}) ->
Tokens = string:tokens(Line, "\r\n\t {,}->:;[]().|\\=/~\"'+_"),
{LineNo+1, [{LineNo, Tokens}|Document]}
end,
{_LineNumber, Document} = lists:foldl(MakeDoc, {1, []}, RawDocument),
lists:reverse(Document).
indexWordsFun(Word, {LineNo, WordIndex}) ->
LowerWord = string:to_lower(Word),
LineNumbersList = case lists:keyfind(LowerWord, 1, WordIndex) of
{LowerWord, LineNumbers} -> LineNumbers;
false -> []
end,
UpdatedWordIndex = lists:keystore(LowerWord, 1, WordIndex, {LowerWord, [LineNo|LineNumbersList]}),
{LineNo, UpdatedWordIndex}.
updateWordIndexLineNumbers(WordIndex) ->
Update = fun({Word, LineNumbers}, NewWordIndex) ->
UpdatedLineNumbers = cleanNumbers(LineNumbers, []),
[{Word, UpdatedLineNumbers}|NewWordIndex]
end,
lists:foldr(Update, [], WordIndex).
cleanNumbers([], CleanNumbers) ->
CleanNumbers;
cleanNumbers([Number|LineNumbers], []) ->
cleanNumbers(LineNumbers, [{Number, Number}]);
cleanNumbers([Number|LineNumbers], [{_Start, Number}|_Tail]=NewNumbers) ->
cleanNumbers(LineNumbers, NewNumbers);
cleanNumbers([Number|LineNumbers], [{Start, End}|Tail]) when Number == End+1 ->
cleanNumbers(LineNumbers, [{Start, Number}|Tail]);
cleanNumbers([Number|LineNumbers], NewNumbers) ->
cleanNumbers(LineNumbers, [{Number, Number}|NewNumbers]).
indexWords(Document) ->
IndexWords = fun({LineNo, Words}, WordIndex) ->
{LineNo, WordIndex2} = lists:foldr(fun indexWordsFun/2, {LineNo, WordIndex}, Words),
WordIndex2
end,
lists:foldr(IndexWords, [], Document).
makePrintFormat(WordIndex) ->
MaxLength = lists:foldr(fun({Word,_Lines}, Max) -> erlang:max(string:len(Word), Max) end, 0, WordIndex),
io_lib:format("~~-~Bs ~~s~~n", [MaxLength]).
prettyPrintWordIndex(WordIndex) ->
PrintFormat = makePrintFormat(WordIndex),
PrintWord = fun({Word, LineNumbers}, Format) ->
PrettyLineNumbers = makePrettyLineNumbers(LineNumbers),
io:format(list_to_binary(PrintFormat), [Word, PrettyLineNumbers]),
Format
end,
lists:foldr(PrintWord, PrintFormat, lists:reverse(lists:keysort(1,WordIndex))).
getPrettyLineNumbers(Result, []) ->
Result;
getPrettyLineNumbers(Result, [{Number,Number}|LineNumbers]) ->
getPrettyLineNumbers([integer_to_list(Number)|Result], LineNumbers);
getPrettyLineNumbers(Result, [{Start,End}|LineNumbers]) ->
Range1=string:concat(integer_to_list(Start), "-"),
Range2=string:concat(Range1, integer_to_list(End)),
getPrettyLineNumbers([Range2|Result], LineNumbers).
makePrettyLineNumbers(LineNumbers) ->
RangedLineNumbers = getPrettyLineNumbers([], LineNumbers),
string:join(RangedLineNumbers, ", ").
Cheers,
-Halzy