import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileAlreadyExistsException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
/**
* Read Utils Functions
*/
/**
* @author luismr
*
*/
public class ReadPhrase {
private static ReadPhrase instance;
/**
* Return a Singleton Instance to process a file
*
* @param inputFilename
* Absolute Path of input file
* @return a ReadPhraseUtils instance
* @throws FileNotFoundException
* if input file can not be found!
*/
public static ReadPhrase getInstance(final String inputFilename)
throws FileNotFoundException {
if (instance == null) {
instance = new ReadPhrase(inputFilename);
} else {
if (instance.isFree()) {
instance = new ReadPhrase(inputFilename);
} else {
throw new IllegalStateException(
"Instance is not free to process another file");
}
}
return instance;
}
private boolean free;
private boolean busy;
private File input;
private File output;
private Map<String, String> phrases;
private Map<String, Integer> counter;
private Map<String, Integer> result;
/**
* Class Constructor
*
* @param inputFilename
* absolute path of input file
* @throws FileNotFoundException
* if input file can not be found!
*/
private ReadPhrase(final String inputFilename)
throws FileNotFoundException {
if (inputFilename == null) {
throw new IllegalArgumentException("inputFilename == null");
}
input = new File(inputFilename);
if (input.exists() == false) {
throw new FileNotFoundException("inputFilename does not exists! ["
+ inputFilename + "]");
}
setFree(true);
setBusy(false);
}
/**
* Thread safe method to process file
*
* @param outputFilename
* absolute path of output file
* @param n
* number of most frequent phrases
* @throws FileAlreadyExistsException
* if output already exists
*/
public synchronized void processMostFrequentPhrases(
final String outputFilename, final int n)
throws FileAlreadyExistsException {
if (isBusy() == true) {
throw new IllegalStateException(
"instance is busy or not valid to process a file");
} else if (outputFilename == null) {
throw new IllegalArgumentException("outputFilename == null");
} else if (outputFilename.trim() == "") {
throw new IllegalArgumentException("outputFilename is empty");
} else if (n < 1) {
throw new IllegalArgumentException(
"n most frequent phrases must be greater then 0");
}
output = new File(outputFilename);
if (output.exists()) {
throw new FileAlreadyExistsException(outputFilename);
}
setBusy(true);
readPhrases();
processPhrases(n);
writePhrasesToOutput();
setBusy(false);
}
/**
* Release an Instance to another job
*/
public void releaseInstance() {
if (isFree() || isBusy()) {
throw new IllegalStateException(
"instance is already free or is busy");
}
setFree(true);
setBusy(false);
}
/**
* Instance is Free
*
* @return
*/
public boolean isFree() {
return (free == true);
}
/**
* Set instance free or not
*
* @param free
*/
private void setFree(final boolean free) {
this.free = free;
}
/**
* Instance is Busy
*
* @return
*/
public boolean isBusy() {
return (busy == true);
}
/**
* Set instance busy or not
*
* @param busy
*/
private void setBusy(final boolean busy) {
this.busy = busy;
}
/**
* Read File Identify phrases Count each one
*/
private void readPhrases() {
phrases = new HashMap<String, String>();
counter = new HashMap<String, Integer>();
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(input));
for (String line; (line = br.readLine()) != null;) {
StringTokenizer tokenizer = new StringTokenizer(line, "|");
while (tokenizer.hasMoreTokens()) {
String phrase = tokenizer.nextToken();
String key = StringUtils.getMd5Hash(phrase);
if (phrases.containsKey(key)) {
int count = counter.get(key);
counter.put(key, ++count);
} else {
phrases.put(key, phrase);
counter.put(key, 1);
}
}
}
} catch (IOException e) {
throw new IllegalAccessError("Error reading input ["
+ input.getName() + "]");
} finally {
try {
br.close();
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
}
/**
* Process most N frequent phrases
*
* @param n
* most frequent phrases
*/
private void processPhrases(final int n) {
result = new LinkedHashMap<String, Integer>();
List<Entry<String, Integer>> list = new LinkedList<Entry<String, Integer>>(
counter.entrySet());
Collections.sort(list, new Comparator<Entry<String, Integer>>() {
public int compare(final Entry<String, Integer> o1, final Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
int count = 0;
int last = 0;
for (Entry<String, Integer> entry : list) {
if (count >= n) {
if (entry.getValue() < last) {
break;
}
}
result.put(entry.getKey(), entry.getValue());
last = entry.getValue();
count++;
}
}
/**
* Write processed phrases to output
*/
private void writePhrasesToOutput() {
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(output));
for (String key : result.keySet()) {
String line = String.format("(%d)\t\t\t %s\n", result.get(key), phrases.get(key));
bw.write(line);
}
} catch (IOException ioe) {
throw new IllegalAccessError(ioe.getMessage());
} finally {
try {
bw.close();
} catch (Exception e) {
}
}
}
}