//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.jobs.interactions.io;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.stream.StreamSupport;
import com.opencsv.CSVReader;
import net.sf.extjwnl.data.POS;
import uk.gov.dstl.baleen.jobs.interactions.data.InteractionDefinition;
import uk.gov.dstl.baleen.jobs.interactions.data.Word;
/**
* Reads interactions from CSV.
* <p>
* This reads interaction data as written by the {@link CsvInteractionWriter}.
*/
public class CsvInteractionReader {
private final String inputFilename;
/**
* Instantiates a new CSV interaction reader.
*
* @param inputFilename
* the input filename
*/
public CsvInteractionReader(String inputFilename) {
this.inputFilename = inputFilename;
}
/**
* Read the CSV file and send interactions to the consumer
*
* @param consumer
* the consumer (first param is the InteractionRelation and the second the list of
* alternative words)
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public void read(BiConsumer<InteractionDefinition, Collection<String>> consumer) throws IOException {
try(CSVReader reader = new CSVReader(new FileReader(inputFilename))){
StreamSupport.stream(reader.spliterator(), false).forEach(r -> processRecord(r, consumer));
}
}
private void processRecord(String[] r, BiConsumer<InteractionDefinition, Collection<String>> consumer){
if (r.length < 2) {
return;
}
String type = r[0];
String subType = r[1];
if ("Type".equalsIgnoreCase(type) && "Subtype".equalsIgnoreCase(subType)) {
// Header, ignore
return;
}
String source = getOrEmpty(r, 2);
String target = getOrEmpty(r, 3);
String lemma = getOrEmpty(r, 4);
POS pos = POS.getPOSForLabel(getOrEmpty(r, 5).toLowerCase());
if (pos == null) {
// Can't include words without a POS
return;
}
InteractionDefinition i = new InteractionDefinition(type, subType, new Word(lemma, pos), source,
target);
List<String> alternatives = new ArrayList<>(r.length - 6);
for (int j = 6; j < r.length; j++) {
String alternative = r[j].trim();
if (!alternative.isEmpty()) {
alternatives.add(alternative);
}
}
consumer.accept(i, alternatives);
}
private String getOrEmpty(String[] array, int index){
if(array.length < index)
return "";
return array[index];
}
}