/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.ml.data.buffer.codec;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import org.encog.Encog;
import org.encog.ml.data.buffer.BufferedDataError;
import org.encog.parse.tags.read.ReadXML;
import org.encog.parse.tags.write.WriteXML;
import org.encog.util.csv.CSVFormat;
import org.encog.util.file.ResourceInputStream;
import org.encog.util.logging.EncogLogging;
/**
* A CODEC that can read/write Microsoft Excel (*.XLSX) files.
*
*/
public class ExcelCODEC implements DataSetCODEC {
/**
* The Excel file.
*/
private final File file;
/**
* The Excel file that we are reading.
*/
private ZipFile readZipFile;
/**
* The current zip entry.
*/
private ZipEntry entry;
/**
* XML that is currently being parsed.
*/
private ReadXML xmlIn;
/**
* The number of inputs.
*/
private int inputCount;
/**
* THe number of ideals.
*/
private int idealCount;
/**
* The file stream to write to.
*/
private FileOutputStream fos;
/**
* The zip stream to write to.
*/
private ZipOutputStream zos;
/**
* A byte buffer to hold the output during an export to XLSX.
*/
private ByteArrayOutputStream buffer;
/**
* The XML output.
*/
private WriteXML xmlOut;
/**
* THe current row, during an export.
*/
private int row;
/**
* Constructor to create Excel from binary.
*
* @param theFile
* The CSV file to create.
*/
public ExcelCODEC(final File theFile) {
this.file = theFile;
}
/**
* Create a CODEC to load data from Excel to binary.
*
* @param theFile
* The Excel file to load.
* @param theInputCount
* The number of input columns.
* @param theIdealCount
* The number of ideal columns.
*/
public ExcelCODEC(final File theFile, final int theInputCount,
final int theIdealCount) {
this.file = theFile;
this.inputCount = theInputCount;
this.idealCount = theIdealCount;
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
if (this.readZipFile != null) {
try {
this.readZipFile.close();
this.readZipFile = null;
} catch (final IOException e) {
throw new BufferedDataError(e);
}
}
if (this.zos != null) {
try {
final ZipEntry theEntry
= new ZipEntry("xl/worksheets/sheet1.xml");
this.xmlOut.endTag();
this.xmlOut.addAttribute("left", "0.7");
this.xmlOut.addAttribute("right", "0.7");
this.xmlOut.addAttribute("top", "0.75");
this.xmlOut.addAttribute("bottom", "0.75");
this.xmlOut.addAttribute("header", "0.3");
this.xmlOut.addAttribute("footer", "0.3");
this.xmlOut.beginTag("pageMargins");
this.xmlOut.endTag();
this.xmlOut.endTag();
this.xmlOut.endDocument();
final byte[] b = this.buffer.toByteArray();
theEntry.setSize(b.length);
theEntry.setCompressedSize(-1);
theEntry.setMethod(ZipEntry.DEFLATED);
this.zos.putNextEntry(theEntry);
this.zos.write(b);
this.zos.closeEntry();
this.zos.close();
this.zos = null;
} catch (final IOException e) {
throw new BufferedDataError(e);
}
}
if (this.fos != null) {
try {
this.fos.close();
this.fos = null;
} catch (final IOException e) {
throw new BufferedDataError(e);
}
}
}
/**
* {@inheritDoc}
*/
@Override
public int getIdealSize() {
return this.idealCount;
}
/**
* {@inheritDoc}
*/
@Override
public int getInputSize() {
return this.inputCount;
}
/**
* {@inheritDoc}
*/
@Override
public void prepareRead() {
try {
this.readZipFile = new ZipFile(this.file);
final Enumeration<? extends ZipEntry> entries = this.readZipFile
.entries();
this.entry = null;
while (entries.hasMoreElements()) {
final ZipEntry e = entries.nextElement();
if (e.getName().equals("xl/worksheets/sheet1.xml")) {
this.entry = e;
}
}
if (this.entry == null) {
this.readZipFile.close();
this.readZipFile = null;
throw new BufferedDataError("Could not find worksheet.");
}
final InputStream is = this.readZipFile.getInputStream(this.entry);
this.xmlIn = new ReadXML(is);
} catch (final ZipException e) {
throw new BufferedDataError("Not a valid Excel file.");
} catch (final IOException e) {
throw new BufferedDataError(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void prepareWrite(final int recordCount, final int inputSize,
final int idealSize) {
this.inputCount = inputSize;
this.idealCount = idealSize;
ZipInputStream zis = null;
try {
this.fos = new FileOutputStream(this.file);
this.zos = new ZipOutputStream(this.fos);
final InputStream is = ResourceInputStream
.openResourceInputStream("org/encog/data/blank.xlsx");
zis = new ZipInputStream(is);
ZipEntry theEntry;
while (zis.available() > 0) {
theEntry = zis.getNextEntry();
if ((entry != null)
&& !"xl/worksheets/sheet1.xml".equals(entry.getName())) {
final ZipEntry entry2 = new ZipEntry(theEntry);
entry2.setCompressedSize(-1);
this.zos.putNextEntry(entry2);
final byte[] theBuffer
= new byte[(int) entry.getSize()];
zis.read(theBuffer);
this.zos.write(theBuffer);
this.zos.closeEntry();
}
}
zis.close();
zis = null;
this.buffer = new ByteArrayOutputStream();
this.xmlOut = new WriteXML(this.buffer);
this.xmlOut.beginDocument();
this.xmlOut
.addAttribute("xmlns",
"http://schemas.openxmlformats.org/spreadsheetml/2006/main");
this.xmlOut
.addAttribute("xmlns:r",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships");
this.xmlOut.beginTag("worksheet");
final StringBuilder d = new StringBuilder();
d.append(toColumn(this.inputCount + this.idealCount));
d.append("" + recordCount);
this.xmlOut.addAttribute("ref", "A1:" + d.toString());
this.xmlOut.beginTag("dimension");
this.xmlOut.endTag();
this.xmlOut.beginTag("sheetViews");
this.xmlOut.addAttribute("tabSelected", "1");
this.xmlOut.addAttribute("workbookViewId", "0");
this.xmlOut.beginTag("sheetView");
this.xmlOut.endTag();
this.xmlOut.endTag();
this.xmlOut.addAttribute("defaultRowHeight", "15");
this.xmlOut.beginTag("sheetFormatPtr");
this.xmlOut.endTag();
this.row = 1;
this.xmlOut.beginTag("sheetData");
} catch (final IOException ex) {
throw new BufferedDataError(ex);
} finally {
if( zis!=null ) {
try {
zis.close();
} catch (IOException e) {
EncogLogging.log(e);
}
}
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean read(final double[] input, final double[] ideal, double[] significance) {
int ch;
while ((ch = this.xmlIn.read()) != -1) {
if (ch == 0) {
if (this.xmlIn.is("row", true)) {
readRow(this.xmlIn, input, ideal);
return true;
}
}
}
significance[0] = 1.0;
return false;
}
/**
* {@inheritDoc}
*/
private void readRow(final ReadXML xmlIn,
final double[] input,
final double[] ideal) {
int ch;
int index = 0;
while ((ch = this.xmlIn.read()) != -1) {
if (ch == 0) {
if (this.xmlIn.is("v", true)) {
final String str = this.xmlIn.readTextToTag();
final double d = CSVFormat.ENGLISH.parse(str);
if (index < input.length) {
input[index] = d;
} else {
ideal[index - input.length] = d;
}
index++;
} else if (this.xmlIn.is("row", false)) {
break;
}
}
}
}
/**
* Convert a numeric index, to an Excel column.
*
* @param index
* The numeric index.
* @return The column, i.e. A or AA.
*/
private String toColumn(final int index) {
final StringBuilder result = new StringBuilder();
final int first = index / 26;
final int second = index % 26;
if (first > 0) {
result.append((char) ('A' + (first - 1)));
result.append((char) ('A' + (second - 1)));
} else {
result.append((char) ('A' + (second - 1)));
}
return result.toString();
}
/**
* {@inheritDoc}
*/
@Override
public void write(final double[] input, final double[] ideal,
final double significance) {
final StringBuilder builder = new StringBuilder();
builder.append("1");
builder.append(":");
builder.append(this.inputCount + this.idealCount);
this.xmlOut.addAttribute("spans", builder.toString());
this.xmlOut.addAttribute("r", "" + (this.row++));
this.xmlOut.beginTag("row");
int index = 0;
for (int i = 0; i < this.inputCount; i++) {
this.xmlOut.addAttribute("r", toColumn(index++));
this.xmlOut.beginTag("c");
this.xmlOut.beginTag("v");
this.xmlOut.addText(CSVFormat.EG_FORMAT.format(input[i],
Encog.DEFAULT_PRECISION));
this.xmlOut.endTag();
this.xmlOut.endTag();
}
for (int i = 0; i < this.idealCount; i++) {
this.xmlOut.addAttribute("r", toColumn(index++));
this.xmlOut.beginTag("c");
this.xmlOut.beginTag("v");
this.xmlOut.addText(CSVFormat.EG_FORMAT.format(ideal[i],
Encog.DEFAULT_PRECISION));
this.xmlOut.endTag();
this.xmlOut.endTag();
}
this.xmlOut.endTag();
}
}