Benjamin Halsted // [bgh] todo, add something clever here.

7Apr/100

Erlang Programming Exercise: 3-9

In exercise three-nine you are to read some text into a raw document (list of lines) and then into a document (list of words). With this data you generate an index for the words to which lines they were on:

{ "Erlang", [1,1,2,4,5,6,6,98,100,102,102] }

After you have the word to line index, pretty print the index such that the example above would result in:

"Erlang 1-2,4-6,98,100,102"

I decided to start using more of the library functions in order to become more familiar with them. I used the lists/foldr function more than anything else since it lets you visit every element in a list and maintain an accumulator. It was particularly useful in situations where I didn't want to do any pattern matching on the argument, like when I broke the lines into words. It was not useful when I tried to generate the tuples needed for the pretty printed line numbers, this could have been done using a switch statement but I found the code more elegant using the recursive pattern matching.

Instead of copy/pasting a chunk of text into erl every time I wanted to test it, I made the indexer load a file from disk. I also aligned the line numbers, having them all start on the same column made a big difference visually. Here's an example run:

1> threenine:index("./threenine.erl").
0                          70
1                          2, 30, 32, 37, 41, 57, 80
2                          64
binary                     77
bs                         71
case                       5, 22, 37
cleannumbers               46, 51-60
close                      8, 14
concat                     87-88
...
updatedlinenumbers         46-47
updatedwordindex           41-42
updatewordindexlinenumbers 17, 44
when                       57

Here is the code:

-module(threenine).
-export([index/1]).

index(Filename) ->
	case file:open(Filename, read) of
		{ok, IoDevice} ->
			indexFile(IoDevice),
			file:close(IoDevice);
		{error, Reason} -> {error, Reason}
	end.

indexFile(IoDevice) ->
	RawDocument = loadRawDocument([], IoDevice),
	file:close(IoDevice),
	Document = loadDocument(RawDocument),
	WordIndex1 = indexWords(Document),
	WordIndex2 = updateWordIndexLineNumbers(WordIndex1),
	prettyPrintWordIndex(WordIndex2),
	ok.

loadRawDocument(Lines, IoDevice) ->
	case file:read_line(IoDevice) of
		{ok, Line} -> loadRawDocument([Line|Lines], IoDevice);
		eof -> lists:reverse(Lines)
	end.

loadDocument(RawDocument) ->
	MakeDoc = fun(Line, {LineNo, Document}) ->
		Tokens = string:tokens(Line, "\r\n\t {,}->:;[]().|\\=/~\"'+_"),
		{LineNo+1, [{LineNo, Tokens}|Document]}
	end,
	{_LineNumber, Document} = lists:foldl(MakeDoc, {1, []}, RawDocument),
	lists:reverse(Document).

indexWordsFun(Word, {LineNo, WordIndex}) ->
	LowerWord = string:to_lower(Word),
	LineNumbersList = case lists:keyfind(LowerWord, 1, WordIndex) of
		{LowerWord, LineNumbers} -> LineNumbers;
		false -> []
	end,
	UpdatedWordIndex = lists:keystore(LowerWord, 1, WordIndex, {LowerWord, [LineNo|LineNumbersList]}),
	{LineNo, UpdatedWordIndex}.

updateWordIndexLineNumbers(WordIndex) ->
	Update = fun({Word, LineNumbers}, NewWordIndex) ->
		UpdatedLineNumbers = cleanNumbers(LineNumbers, []),
		[{Word, UpdatedLineNumbers}|NewWordIndex]
	end,
	lists:foldr(Update, [], WordIndex).

cleanNumbers([], CleanNumbers) ->
	CleanNumbers;
cleanNumbers([Number|LineNumbers], []) ->
	cleanNumbers(LineNumbers, [{Number, Number}]);
cleanNumbers([Number|LineNumbers], [{_Start, Number}|_Tail]=NewNumbers) ->
	cleanNumbers(LineNumbers, NewNumbers);
cleanNumbers([Number|LineNumbers], [{Start, End}|Tail]) when Number == End+1 ->
	cleanNumbers(LineNumbers, [{Start, Number}|Tail]);
cleanNumbers([Number|LineNumbers], NewNumbers) ->
	cleanNumbers(LineNumbers, [{Number, Number}|NewNumbers]).

indexWords(Document) ->
	IndexWords = fun({LineNo, Words}, WordIndex) ->
		{LineNo, WordIndex2} = lists:foldr(fun indexWordsFun/2, {LineNo, WordIndex}, Words),
		WordIndex2
	end,
	lists:foldr(IndexWords, [], Document).

makePrintFormat(WordIndex) ->
	MaxLength = lists:foldr(fun({Word,_Lines}, Max) -> erlang:max(string:len(Word), Max) end, 0, WordIndex),
	io_lib:format("~~-~Bs ~~s~~n", [MaxLength]).

prettyPrintWordIndex(WordIndex) ->
	PrintFormat = makePrintFormat(WordIndex),
	PrintWord = fun({Word, LineNumbers}, Format) ->
		PrettyLineNumbers = makePrettyLineNumbers(LineNumbers),
		io:format(list_to_binary(PrintFormat), [Word, PrettyLineNumbers]),
		Format
	end,
	lists:foldr(PrintWord, PrintFormat, lists:reverse(lists:keysort(1,WordIndex))).

getPrettyLineNumbers(Result, []) ->
	Result;
getPrettyLineNumbers(Result, [{Number,Number}|LineNumbers]) ->
	getPrettyLineNumbers([integer_to_list(Number)|Result], LineNumbers);
getPrettyLineNumbers(Result, [{Start,End}|LineNumbers]) ->
	Range1=string:concat(integer_to_list(Start), "-"),
	Range2=string:concat(Range1, integer_to_list(End)),
	getPrettyLineNumbers([Range2|Result], LineNumbers).

makePrettyLineNumbers(LineNumbers) ->
	RangedLineNumbers = getPrettyLineNumbers([], LineNumbers),
	string:join(RangedLineNumbers, ", ").

Cheers,
-Halzy

Share and Enjoy:
  • Digg
  • del.icio.us
  • Facebook
  • Reddit
  • Twitter
Comments (0) Trackbacks (0)

No comments yet.


Leave a comment


No trackbacks yet.