package importexport.importing;
import importexport.util.CSVFileInfo;
import importexport.util.InvalidFileException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.LinkedBlockingQueue;
import au.com.bytecode.opencsv.CSVReader;
import db.Database;
import db.DatabaseAccessException;
/**
*{@inheritDoc}.
*/
public class CSVBasedImporter extends Importer {
/**
* Constructs a new instance of Importer which is specialized for csv-based file formats.
*
* @param db
* Database where imported should be stored.
* @param extractor
* Extractor specialized for csv based formats which is extracting necessary informatinos for importing a
* file.
*/
public CSVBasedImporter(final Database db, final CSVFileInfoExtractor extractor) {
super(db, extractor);
}
/**
* Imports an csv-based file and a belonging .ssd-file.
*
* @param input
* Reference of the file which should be imported.
* @param algoOut
* Reference of the .ssd file which is made by a Datamining algorithm.
*
* @throws IOException
* threw if something other is going wrong.
* @throws DatabaseAccessException
* threw if something went wrong with the Database connection.
* @throws InvalidFileException
* threw if importing file isn't valid.
*/
@Override
public void importFile(final File input, final File algoOut) throws IOException, DatabaseAccessException,
InvalidFileException {
if (input == null || algoOut == null || !input.exists() || !algoOut.exists()) {
throw new FileNotFoundException();
}
CSVFileInfo info = (CSVFileInfo) this.getFileInfoExtractor().extractFileInfo(input);
CSVReader cr = new CSVReader(new FileReader(input), info.getDelimiter(), '\'',
info.getFirstLineOfDataSegment() - 1);
if (info.getFeatures().length < 2) {
throw new InvalidFileException();
}
//amount of queues is equal to the amount of defined subspaces.
LinkedBlockingQueue<Float>[] queues = this.parseAlgoOut(algoOut, info.noOfFeatures());
//number of all features (natural features + outlierness values.
int amountFeatures = queues.length + info.noOfFeatures();
int infoNoOfFeatures = info.noOfFeatures();
//because the class array is no real feature.
if (info.getFeatures()[info.getFeatures().length - 1].matches("class(.)*")) {
--amountFeatures;
--infoNoOfFeatures;
}
//amount of all features which are detected in ssd.
int amountObjects = queues[0].size();
String[] allFeatures = new String[amountFeatures];
boolean[] featureTypes = new boolean[amountFeatures];
int i = 0;
for (; i < infoNoOfFeatures; ++i) {
allFeatures[i] = info.getFeatures()[i];
featureTypes[i] = false;
}
for (int k = 0; i < amountFeatures; ++k, ++i) {
allFeatures[i] = "Outlierness" + (k + 1);
featureTypes[i] = true;
}
getDB().initFeatures(allFeatures, featureTypes);
String[] line = null;
//buffervector of build objects which will be stored next.
float[][] batch = new float[BATCH_SIZE][];
//current built object.
float[] actObj = null;
int actBatchSize = 0;
for (int k = 0; k < amountObjects; ++k) {
int pos = 0;
actObj = new float[amountFeatures];
line = cr.readNext();
if (line.length == 0 || (line.length == 1 & (line[0] == null || line[0].isEmpty()))
|| line.length == 2 & (line[0] == null || line[0].isEmpty()) & (line[1] == null || line[1].isEmpty())) {
--k;
continue;
}
for (; pos < infoNoOfFeatures; ++pos) {
try {
actObj[pos] = Float.parseFloat(line[pos]);
} catch (NumberFormatException e) {
actObj[pos] = Float.NaN;
}
}
for (int m = 0; m < queues.length; ++m, ++pos) {
actObj[pos] = queues[m].poll();
}
batch[actBatchSize++] = actObj;
if (actBatchSize >= BATCH_SIZE) {
getDB().pushObject(batch);
actBatchSize = 0;
batch = new float[BATCH_SIZE][];
System.gc();
}
}
// removing null vectors from last batch matrix.
int noVectors = 0;
while (noVectors < BATCH_SIZE && batch[noVectors] != null) {
noVectors++;
}
float[][] batchOle = new float[noVectors][amountFeatures];
for (int k = 0; k < noVectors; ++k) {
batchOle[k] = batch[k];
}
// push last batch matrix.
this.getDB().pushObject(batchOle);
// update db min/max values
this.getDB().updateFeaturesMinMax();
cr.close();
}
}