package nl.uva.sc.parser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import nl.uva.sc.parser.subscriber.BookParserSubscriber;
public class BookParser {
private final File mBookFile;
private final List<BookParserSubscriber> mSubscriber = new ArrayList<>();
/**
* Create a book parser
*
* @param bookFile
* The book to parse
*/
public BookParser(final File bookFile) {
mBookFile = bookFile;
}
/**
* Starts the parser with standard UTF-8 encoding
*
* @throws IOException
* If the file is invalid or it cannot be parsed with UTF-8 encoding
*/
public void parse() throws IOException {
parse("UTF-8");
}
/**
* Starts the parser
*
* @throws IOException
* If the file is invalid or it cannot be parsed given the character encoding
*/
public void parse(final String encoding) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(
mBookFile), encoding));
String line = null;
while ((line = reader.readLine()) != null) {
line = line.replaceAll("[^a-zA-Z\\s]", " ");
line = line.replaceAll("\\s+", " ");
String[] words = line.split(" ");
for (String word : words) {
if (word.isEmpty() || word.length() == 1) continue;
notifyListener(word.toLowerCase());
}
}
reader.close();
}
/**
* Notify all listeners about the last parsed word
*
* @param word
*/
private void notifyListener(final String word) {
for (BookParserSubscriber subscriber : mSubscriber) {
subscriber.nextWord(word);
}
}
/**
* Subscribe to the word listener list
*
* @param subscriber
* @return True if successful subscribed
*/
public boolean subscribe(final BookParserSubscriber subscriber) {
return mSubscriber.add(subscriber);
}
/**
* Unsubscribe the given subscriber from the listener list
*
* @param subscriber
* @return True if successful unsubscribed
*/
public boolean unsubscribe(final BookParserSubscriber subscriber) {
return mSubscriber.remove(subscriber);
}
}