package edu.stanford.nlp.ie.machinereading.structure;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
*
* @author Andrey Gusev
* @author Mason Smith
* @author Mihai
*
*/
public class ExtractionDataSet implements Serializable {
private static final long serialVersionUID = 201150461234284548L;
private final List<ExtractionSentence> sentences;
public ExtractionDataSet() {
sentences = new ArrayList<>();
}
/**
* Copy c'tor that performs deep copy of the sentences in the original dataset
*/
public ExtractionDataSet(ExtractionDataSet original) {
sentences = new ArrayList<>();
for(ExtractionSentence sent: original.getSentences()){
// deep copy of the sentence: we create new entity/relation/event lists here
// however, we do not deep copy the ExtractionObjects themselves!
ExtractionSentence sentCopy = new ExtractionSentence(sent);
sentences.add(sentCopy);
}
}
public ExtractionSentence getSentence(int i) { return sentences.get(i); }
public int sentenceCount() { return sentences.size(); }
public void addSentence(ExtractionSentence sentence) {
this.sentences.add(sentence);
}
public void addSentences(List<ExtractionSentence> sentences) {
for(ExtractionSentence sent: sentences){
addSentence(sent);
}
}
public List<ExtractionSentence> getSentences() {
return Collections.unmodifiableList(this.sentences);
}
public void shuffle() {
// we use a constant seed for replicability of experiments
Collections.shuffle(sentences, new Random(0));
}
/*
public List<List<CoreLabel>> toCoreLabels(Set<String> annotationsToSkip, boolean useSubTypes) {
List<List<CoreLabel>> retVal = new ArrayList<List<CoreLabel>>();
for (ExtractionSentence sentence : sentences) {
List<CoreLabel> labeledSentence = sentence.toCoreLabels(true, annotationsToSkip, useSubTypes);
if (labeledSentence != null) {
// here we accumulate all sentences (we split into training and test set
// if and when doing cross validation)
retVal.add(labeledSentence);
}
}
return retVal;
}
*/
}