/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.csv.normalize;
import java.io.File;
import org.encog.EncogError;
import org.encog.app.analyst.EncogAnalyst;
import org.encog.app.analyst.csv.TimeSeriesUtil;
import org.encog.app.analyst.csv.basic.BasicFile;
import org.encog.app.analyst.script.normalize.AnalystField;
import org.encog.app.analyst.util.CSVHeaders;
import org.encog.ml.data.basic.BasicMLData;
import org.encog.ml.data.buffer.BufferedMLDataSet;
import org.encog.util.csv.CSVFormat;
import org.encog.util.csv.ReadCSV;
import org.encog.util.logging.EncogLogging;
/**
* Normalize, or denormalize, a CSV file.
*/
public class AnalystNormalizeToEGB extends BasicFile {
/**
* The analyst to use.
*/
private EncogAnalyst analyst;
/**
* Used to process time series.
*/
private TimeSeriesUtil series;
/**
* THe headers.
*/
private CSVHeaders analystHeaders;
/**
* Analyze the file.
* @param inputFilename The input file.
* @param expectInputHeaders True, if input headers are present.
* @param inputFormat The format.
* @param theAnalyst The analyst to use.
*/
public void analyze(final File inputFilename,
final boolean expectInputHeaders, final CSVFormat inputFormat,
final EncogAnalyst theAnalyst) {
this.setInputFilename(inputFilename);
this.setInputFormat(inputFormat);
this.setExpectInputHeaders(expectInputHeaders);
this.analyst = theAnalyst;
this.setAnalyzed(true);
this.analystHeaders = new CSVHeaders(inputFilename, expectInputHeaders,
inputFormat);
for (final AnalystField field : analyst.getScript().getNormalize()
.getNormalizedFields()) {
field.init();
}
this.series = new TimeSeriesUtil(analyst,true,
this.analystHeaders.getHeaders());
}
/**
* Normalize the input file. Write to the specified file.
*
* @param file
* The file to write to.
*/
public void normalize(final File file) {
if (this.analyst == null) {
throw new EncogError(
"Can't normalize yet, file has not been analyzed.");
}
int inputCount = analyst.getScript().getNormalize().calculateInputColumns();
int idealCount = analyst.getScript().getNormalize().calculateOutputColumns();
BasicMLData inputData = new BasicMLData(inputCount);
BasicMLData idealData = new BasicMLData(idealCount);
ReadCSV csv = null;
BufferedMLDataSet buffer = new BufferedMLDataSet(file);
buffer.beginLoad(inputCount,idealCount);
try {
csv = new ReadCSV(getInputFilename().toString(),
isExpectInputHeaders(), getFormat());
resetStatus();
final int outputLength = this.analyst.determineTotalColumns();
// write file contents
while (csv.next() && !shouldStop()) {
updateStatus(false);
double[] output = AnalystNormalizeCSV.extractFields(
this.analyst, this.analystHeaders, csv, outputLength,
false);
if (this.series.getTotalDepth() > 1) {
output = this.series.process(output);
}
// copy the input
int idx = 0;
for(int i=0;i<inputData.size();i++) {
inputData.setData(i, output[idx++]);
}
for(int i=0;i<idealData.size();i++) {
idealData.setData(i, output[idx++]);
}
if (output != null) {
buffer.add(inputData,idealData);
}
}
} finally {
reportDone(false);
if (csv != null) {
try {
csv.close();
} catch (final Exception ex) {
EncogLogging.log(ex);
}
}
if (buffer != null) {
try {
buffer.endLoad();
} catch (final Exception ex) {
EncogLogging.log(ex);
}
}
}
}
/**
* Set the source file. This is useful if you want to use pre-existing stats
* to normalize something and skip the analyze step.
*
* @param file
* The file to use.
* @param headers
* True, if headers are to be expected.
* @param format
* The format of the CSV file.
*/
public void setSourceFile(final File file, final boolean headers,
final CSVFormat format) {
setInputFilename(file);
setExpectInputHeaders(headers);
setInputFormat(format);
}
}