package importexport.importing;
import gui.settings.Settings;
import importexport.util.InvalidFileException;
import importexport.util.Utility;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.LinkedBlockingQueue;
import au.com.bytecode.opencsv.CSVReader;
import db.Database;
import db.DatabaseAccessException;
import db.DatabaseConfiguration;
/**
* Class Importer is an abstract importer for an arbitrary based files.
*/
public abstract class Importer {
/**
* N<sup>o</sup> of object which should be pushed together.
*/
public static final int BATCH_SIZE = DatabaseConfiguration.TRANSACTIONSIZE;
/**
* Position where definition of a subspace base begins.
*/
private static final int START_BASE_DEF = 5;
/**
* Position where the dimension is mentioned.
*/
private static final int POS_SS_SIZE = 4;
/**
* Database where data will stored.
*/
private final Database database;
/**
* Extracts all necessary informations from a file which are needed for storing the data correctly.
*/
private final FileInfoExtractor infoExtractor;
/**
* Constructs an Importer.
*
* @param db
* place where imported data will be stored.
* @param extractor
* needed for extracting informations from dataset.
*/
public Importer(final Database db, final FileInfoExtractor extractor) {
if (db == null || extractor == null) {
throw new IllegalArgumentException();
}
this.database = db;
this.infoExtractor = extractor;
}
/**
* Imports an arbitrary file and a belonging .ssd-file.
*
* @param input
* Reference of the file which should be imported.
* @param algoOut
* Reference of the .ssd file which is made by a Datamining algorithm.
*
* @throws IOException
* threw if something other is going wrong.
* @throws DatabaseAccessException
* threw if something went wrong with the Database connection.
* @throws InvalidFileException
* threw if importing file isn't valid.
*/
public abstract void importFile(File input, File algoOut) throws IOException, DatabaseAccessException,
InvalidFileException;
/**
* Parses .ssd-Files and submits subspaces to Database.
*
* @param algoOut
* Output file of an Dataminig algortihm containing detected subspaces.
*
* @return a queue of with outlierness values to each object in each subspace.
*
* @throws IOException
* threw if something other is going wrong.
* @throws DatabaseAccessException
* threw if something went wrong with the Database connection.
*/
@SuppressWarnings(value = { "unchecked" })
protected final LinkedBlockingQueue<Float>[] parseAlgoOut(final File algoOut, int startOutliernessRef)
throws IOException, DatabaseAccessException {
if (algoOut == null || !algoOut.exists()) {
throw new FileNotFoundException(Settings.getInstance().getResourceBundle().getString("noFile"));
}
int curOutlierRef = startOutliernessRef;
String line = null;
int[] actSubspace = null;
int idx = 1;
BufferedReader reader = new BufferedReader(new FileReader(algoOut));
line = reader.readLine();
int noSubspaces = 0;
char delimiter = '0';
while (line != null && !line.equals("@data")) {
actSubspace = line.isEmpty() ? null : parseLine(line);
if (actSubspace != null) {
int[] tmp = new int[actSubspace.length + 1];
tmp[0] = curOutlierRef++;
for (int i = 1; i < tmp.length; ++i) {
tmp[i] = actSubspace[i - 1];
}
database.pushSubspace(idx, tmp, "Subspace " + idx);
noSubspaces++;
}
line = reader.readLine();
idx++;
}
try {
line = reader.readLine();
delimiter = Utility.filterDelimiterFromString(line);
} catch (NullPointerException npe) {
throw new InvalidFileException();
}
// Pos in Array is equal to the idx - 1 of a subspace
LinkedBlockingQueue<Float>[] queue = new LinkedBlockingQueue[noSubspaces];
// first line of dataseg is a specialcase.
String[] sa = line.split("" + delimiter);
int tmp;
CSVReader cr = new CSVReader(reader, delimiter);
while (sa != null) {
if (sa.length == 0 || (sa.length == 1 & (sa[0] == null || sa[0].isEmpty()))
|| sa.length == 2 & (sa[0] == null || sa[0].isEmpty()) & (sa[1] == null || sa[1].isEmpty())) {
sa = cr.readNext();
continue;
}
tmp = Integer.parseInt(sa[1].trim());
if (queue[tmp] == null) {
queue[tmp] = new LinkedBlockingQueue<Float>();
}
queue[tmp].offer(Float.parseFloat(sa[2]));
sa = cr.readNext();
}
reader.close();
System.gc();
return queue;
}
/**
* Parses a subspace definition line of a .sdd-File.
*
* @param line
* of a ssd file
* @return an int[] with feature ids spanning a feature space.
*/
private int[] parseLine(final String line) {
String[] splittedLine = line.split("(\\,|\\[|\\]|\\s+)");
int k = -1;
int l = 0;
for (int i = 0; i < splittedLine.length; ++i) {
if (!splittedLine[i].isEmpty()) {
l++;
}
}
String[] splittedLine2 = new String[l];
l = 0;
for (int i = 0; i < splittedLine.length; ++i) {
if (!splittedLine[i].isEmpty()) {
splittedLine2[l++] = splittedLine[i];
}
}
splittedLine = splittedLine2;
int[] res = new int[Integer.parseInt(splittedLine[POS_SS_SIZE])];
if (res.length > 0) {
for (int i = START_BASE_DEF; i < splittedLine.length; ++i) {
k = i - START_BASE_DEF;
res[k] = Integer.parseInt(splittedLine[i]) + 1;
}
}
return res;
}
/**
* Returns referenced Database object.
*
* @return the held Database object.
*/
protected final Database getDB() {
return this.database;
}
/**
* Returns referenced FileInfoExtractor object.
*
* @return the held FileInfoExtractor object.
*/
protected final FileInfoExtractor getFileInfoExtractor() {
return this.infoExtractor;
}
}