package hu.u_szeged.kpe.readers;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;
public class DocumentSet extends TreeSet<DocumentData> {
private static final long serialVersionUID = -5225565570903384980L;
private String baseDir;
private KpeReader reader;
public static int adaptationType;
private static boolean isGenuineDAon;
public DocumentSet(KpeReader r) {
this(-1, "", r);
}
public DocumentSet(int adaptation, KpeReader r) {
this(adaptation, "", r);
}
public DocumentSet(String bd, KpeReader r) {
this(-1, bd, r);
}
public DocumentSet(int adaptation, String bd, KpeReader r) {
adaptationType = adaptation;
baseDir = bd;
reader = r;
}
public void setBaseDir(String bd) {
baseDir = bd;
}
public KpeReader getReader() {
return reader;
}
public Collection<DocumentData> determineDocumentSet(int foldNum, int totalFolds, boolean forTraining) {
if (adaptationType == -1 || !forTraining) {
return determineDocumentSet(foldNum, totalFolds, forTraining, null);
} else {
try {
throw new Exception("Test document set would be needed to carry out domain adaptation properly.");
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
public List<DocumentData> determineDocumentSet(int foldNum, int totalFolds, boolean forTraining, DocumentSet otherData) {
if (otherData != null) {
isGenuineDAon = !baseDir.equals(otherData.baseDir);
}
double foldSize = (otherData != null && !baseDir.equals(otherData.baseDir) && adaptationType > -1 && forTraining ? otherData
.size() : size())
* Math.pow(totalFolds, -1);
List<DocumentData> docs = new ArrayList<DocumentData>(this);
if (totalFolds == 1) {
return docs;
}
int fromIndex = (int) Math.floor((foldNum - 1) * foldSize), toIndex;
if (foldNum == totalFolds) {
toIndex = adaptationType > -1 && forTraining ? otherData.size() : size();
} else {
toIndex = (int) Math.floor(foldNum * foldSize);
}
if (forTraining) {
if (otherData != null && !baseDir.equals(otherData.baseDir) && adaptationType > -1) {
List<DocumentData> targetSubDomain = new ArrayList<DocumentData>(otherData).subList(fromIndex, toIndex);
docs.addAll(targetSubDomain);
} else {
List<DocumentData> docsToRemove = new ArrayList<DocumentData>(docs).subList(fromIndex, toIndex);
docs.removeAll(docsToRemove);
}
return docs;
} else {
if (isGenuineDAon && adaptationType > -1) {
List<DocumentData> docsToRemove = new ArrayList<DocumentData>(docs.subList(fromIndex, toIndex));
docs.removeAll(docsToRemove);
return docs;
} else {
return docs.subList(fromIndex, toIndex);
}
}
}
}