package matrix.implementations.binary;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.zip.DataFormatException;
import javax.naming.NamingException;
import matrix.general.VerifyCsv;
import matrix.general.VerifyCsvException;
import matrix.implementations.binary.etc.ElementLengthException;
import org.apache.commons.fileupload.FileUploadException;
import org.apache.log4j.Logger;
import org.molgenis.data.Data;
import org.molgenis.framework.db.CsvToDatabase.IntegerWrapper;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.jdbc.JDBCDatabase;
import org.molgenis.framework.db.jpa.JpaDatabase;
import org.molgenis.util.CsvFileReader;
import org.molgenis.util.Tuple;
import decorators.NameConvention;
import filehandling.generic.PerformUpload;
public class BinaryDataMatrixWriter
{
private Logger logger = Logger.getLogger(getClass().getSimpleName());
private String nullChar = "\5";
/**
* Empty constructor to create an instance that is able to run standalone
* functions such as 'CsvToBin'
*/
public BinaryDataMatrixWriter()
{
//
}
/**
* Wrapper constructor Writes a binary matrix to the filesystem
*
* @param data
* @param inputFile
* @param db
* @param testMode
* @throws Exception
*/
public BinaryDataMatrixWriter(Data data, File inputFile, Database db) throws Exception
{
HashMap<Data, File> dataFileMap = new HashMap<Data, File>();
dataFileMap.put(data, inputFile);
new BinaryDataMatrixWriter(dataFileMap, db);
}
/**
* Wrapper constructor Use filepointer as a directory of input files Dont
* use unless you know exactly what you're doing (filenames have to map to
* datanames)
*
* @param dataList
* @param inputDir
* @param db
* @param testMode
* @throws Exception
*/
public BinaryDataMatrixWriter(List<Data> dataList, File inputDir, Database db) throws Exception
{
List<File> inputFiles = new ArrayList<File>();
for (File input : inputDir.listFiles())
{
inputFiles.add(input);
}
HashMap<Data, File> dataFileMap = new HashMap<Data, File>();
for (Data data : dataList)
{
File inputFile = getInputFileForName(NameConvention.escapeFileName(data.getName()) + ".txt", inputFiles);
dataFileMap.put(data, inputFile);
}
new BinaryDataMatrixWriter(dataFileMap, db);
}
/**
* Core constructor. Prepare and verify a list of files and import them.
*
* @param dataList
* @param inputFiles
* @param db
* @param testMode
* @throws Exception
*/
public BinaryDataMatrixWriter(HashMap<Data, File> dataFileMap, Database db) throws Exception
{
for (Data data : dataFileMap.keySet())
{
File src = dataFileMap.get(data);
if (src == null || !src.exists())
{
throw new FileUploadException("File input for BinaryMatrixWriter does not exists.");
}
int[] rowAndColLength = VerifyCsv.verify(src, data.getValueType());
// make the binary file
File dest = new File(System.getProperty("java.io.tmpdir") + File.separator + "tmp_binmatrix_"
+ System.nanoTime());
File binFile = makeBinaryBackend(data, src, dest, rowAndColLength[0], rowAndColLength[1]);
// upload as a MolgenisFile, type 'BinaryDataMatrix'
HashMap<String, String> extraFields = new HashMap<String, String>();
extraFields.put("data_" + Data.ID, data.getId().toString());
extraFields.put("data_" + Data.NAME, data.getName());
PerformUpload.doUpload(db, true, data.getName() + ".bin", "BinaryDataMatrix", binFile, extraFields, false);
}
}
public void CsvToBin(String[] args) throws Exception
{
if (args.length != 6)
{
throw new DataFormatException(
"You must supply 6 arguments: data name, investigation name, row type, column type, value type, and source file name.");
}
// get args
String dataName = args[0];
String invName = args[1];
String rowType = args[2];
String colType = args[3];
String valType = args[4];
String fileString = args[5];
// print args
System.out.println("CsvToBin called with arguments:");
System.out.println("data name = " + dataName);
System.out.println("investigation name = " + invName);
System.out.println("row type = " + rowType);
System.out.println("column type = " + colType);
System.out.println("value type = " + valType);
System.out.println("source file = " + fileString);
// check if source file exists and ends with '.txt'
File src = new File(fileString);
if (src == null || !src.exists())
{
throw new VerifyCsvException("Source file '" + fileString + "' not found at location '"
+ src.getAbsolutePath() + "'");
}
if (!src.getName().endsWith(".txt"))
{
throw new VerifyCsvException("Source file name '" + fileString
+ "' does not end with '.txt', are you sure it is a CSV matrix?");
}
System.out.println("Source file exists and ends with '.txt'..");
// create Data object, validate the names and valuetype
Data d = new Data();
d.setName(dataName);
d.setInvestigation_Name(invName);
d.setTargetType(rowType);
d.setFeatureType(colType);
d.setValueType(valType);
// FIXME: strict should only be applied when application is an XGAP
NameConvention.validateEntityNameStrict(dataName);
NameConvention.validateEntityNameStrict(invName);
System.out.println("'Data' object created..");
if (!valType.equals("Text") && !valType.equals("Decimal"))
{
throw new NamingException("Value type '" + valType + "' not reckognized. Use 'Text' or 'Decimal'.");
}
System.out.println("Valuetype OK..");
// verify the CSV file to be a correct matrix and get the dimensions
int[] dims = VerifyCsv.verify(src, valType);
System.out.println("CSV input file verified..");
// convert to binary
File dest = new File(src.getName().substring(0, (src.getName().length() - 4)) + ".bin");
System.out.println("Starting conversion..");
makeBinaryBackend(d, src, dest, dims[0], dims[1]);
System.out.println("..done!");
}
/**
* Convert an input file into a binary matrix
*
* @param data
* @param db
* @param inputFile
* @param totalRows
* @param totalCols
* @throws Exception
* @throws Exception
*/
private File makeBinaryBackend(Data data, File src, File dest, int totalRows, int totalCols) throws Exception
{
if (dest.exists())
{
throw new IOException("Destination file '" + dest.getName() + "' already exists");
}
FileOutputStream fos = new FileOutputStream(dest);
final DataOutputStream dos = new DataOutputStream(fos);
// 0) write nullCharacter
dos.writeBytes(this.nullChar);
// 1) properties belonging to the 'Data' object
dos.writeByte(data.getName().length());
dos.writeBytes(data.getName());
dos.writeByte(data.getInvestigation_Name().length());
dos.writeBytes(data.getInvestigation_Name());
dos.writeByte(data.getFeatureType().length());
dos.writeBytes(data.getFeatureType());
dos.writeByte(data.getTargetType().length());
dos.writeBytes(data.getTargetType());
if (data.getValueType().equals("Decimal"))
{
dos.writeBoolean(true);
}
else
{
dos.writeBoolean(false);
}
dos.writeInt(totalCols);
dos.writeInt(totalRows);
// 2) matrix content specific properties
CsvFileReader csvFile = new CsvFileReader(src);
List<String> colNames = csvFile.colnames();
List<String> rowNames = csvFile.rownames();
// hack for xgap matrix datatype
if (colNames.get(0).equals(""))
{
colNames.remove(0);
}
for (int i = 0; i < totalCols; i++)
{
dos.writeByte(colNames.get(i).length());
}
for (int i = 0; i < totalRows; i++)
{
dos.writeByte(rowNames.get(i).length());
}
for (int i = 0; i < totalCols; i++)
{
dos.writeBytes(colNames.get(i));
}
for (int i = 0; i < totalRows; i++)
{
dos.writeBytes(rowNames.get(i));
}
// information about text DataMatrix elements
int textLength = -1;
if (data.getValueType().equals("Text"))
{
textLength = elementLength(src);
dos.writeByte(textLength);
logger.info("text DataMatrix element length: " + textLength);
if (textLength == 0)
{
logger.info("length zero, making variable length array");
// determine lengths and write to binary
byte[] textElementLenghts = getTextDataElementLengths(src, totalCols * totalRows);
dos.write(textElementLenghts);
}
}
logger.info("Writing elements..");
// writing the actual elements
if (data.getValueType().equals("Text"))
{
writeBinaryTextElements(dos, src, textLength);
}
else
{
writeBinaryDecimalElements(dos, src);
}
return dest;
}
private long writeBinaryTextElements(final DataOutputStream dos, File inputFile, int textLength)
throws FileNotFoundException, ParseException
{
long start = System.currentTimeMillis();
// adjusting the NA string to text length, if this is a fixed length, it
// does not break the special treatment that makes fixed length
// efficient
String naStringCreate = "";
if (textLength == 0)
{
naStringCreate = nullChar;
}
else if (textLength > 0)
{
naStringCreate = "";
for (int i = 0; i < textLength; i++)
{
naStringCreate += nullChar;
}
}
final String naString = naStringCreate;
try
{
for (Tuple line : new CsvFileReader(inputFile))
{
for (int columnIndex = 1; columnIndex < line.size(); columnIndex++)
{
if (line.getString(columnIndex) == null)
{ // FIXME: null: because of a parsed missing value
// indicator..
dos.writeBytes(naString);
}
else if (line.getString(columnIndex).equals(""))
{
dos.writeBytes(naString);
}
else
{
// FIXME: little experiment..
// String str = line.getString(columnIndex);
// byte[] arr = new byte[str.length()];
// int count = 0;
// for(char c : str.toCharArray()){
// arr[count] = (byte) (c + 100);
// count++;
// }
// dos.write(arr);
dos.writeBytes(line.getString(columnIndex));
}
}
}
}
catch (Exception e)
{
throw new ParseException(e.getMessage(), 0);
}
long stop = System.currentTimeMillis();
return stop - start;
}
private long writeBinaryDecimalElements(final DataOutputStream dos, File inputFile) throws FileNotFoundException,
ParseException
{
long start = System.currentTimeMillis();
try
{
for (Tuple line : new CsvFileReader(inputFile))
{
for (int columnIndex = 1; columnIndex < line.size(); columnIndex++)
{
if (line.getDouble(columnIndex) == null)
{
dos.writeDouble(Double.MAX_VALUE);
}
else
{
dos.writeDouble(line.getDouble(columnIndex));
}
}
}
}
catch (Exception e)
{
throw new ParseException(e.getMessage(), 0);
}
long stop = System.currentTimeMillis();
return stop - start;
}
private byte[] getTextDataElementLengths(File inputFile, int totalElements) throws FileNotFoundException,
ParseException
{
final byte[] textElementLenghts = new byte[totalElements];
try
{
int index = 0;
for (Tuple line : new CsvFileReader(inputFile))
{
for (int columnIndex = 1; columnIndex < line.size(); columnIndex++)
{
if (line.getString(columnIndex) == null)
{ // FIXME:
// null??
textElementLenghts[index] = (byte) 1;
}
else if (line.getString(columnIndex).equals(""))
{
textElementLenghts[index] = (byte) 1;
}
else
{
textElementLenghts[index] = (byte) line.getString(columnIndex).length();
}
index++;
}
}
}
catch (Exception e)
{
throw new ParseException(e.getMessage(), 0);
}
return textElementLenghts;
}
/**
* Check if all TEXT elements in a matrix are of equal length. The first
* element is used to get the length, then each element after that must have
* the same length in order for this length to be returned.
*
* @param inputFile
* @return
* @throws FileNotFoundException
* @throws ParseException
* @throws Exception
*/
private int elementLength(File inputFile) throws FileNotFoundException, ParseException
{
final IntegerWrapper elementLength = new IntegerWrapper(0);
try
{
for (Tuple line : new CsvFileReader(inputFile))
{
for (int columnIndex = 1; columnIndex < line.size(); columnIndex++)
{
// get one element
if (elementLength.get() == 0)
{
elementLength.set(line.getString(columnIndex) != null ? line.getString(columnIndex).length()
: 0);
logger.info("First element, size: " + elementLength.get());
}
else
{
if (elementLength.get() != (line.getString(columnIndex) != null ? line.getString(columnIndex)
.length() : 0)) // nullpointer
// ???
{
logger.info("Element " + line.getString(columnIndex) + " is not of length "
+ elementLength.get());
logger.info("Element of unequal size found, exiting from function by throwing error");
elementLength.set(0);
throw new ElementLengthException("Exiting from CsvFileReader...");
}
}
}
}
}
catch (ElementLengthException e)
{
// this is okay..
}
catch (Exception ex)
{
throw new ParseException(ex.getMessage(), 0);
}
return elementLength.get();
}
private File getInputFileForName(String name, List<File> inputFiles)
{
logger.info("getting file for name: " + name);
for (File f : inputFiles)
{
logger.info("file: " + f.getAbsolutePath() + " (" + f.getName() + ")");
if (f.getName().equals(name))
{
logger.info("FOUND!");
return f;
}
}
logger.info("NOT FOUND");
return null;
}
}