/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.analyze;
import java.util.List;
import org.encog.app.analyst.AnalystError;
import org.encog.app.analyst.AnalystFileFormat;
import org.encog.app.analyst.EncogAnalyst;
import org.encog.app.analyst.script.AnalystClassItem;
import org.encog.app.analyst.script.AnalystScript;
import org.encog.app.analyst.script.DataField;
import org.encog.app.analyst.script.prop.ScriptProperties;
import org.encog.app.analyst.util.CSVHeaders;
import org.encog.app.analyst.util.ConvertStringConst;
import org.encog.util.csv.CSVFormat;
import org.encog.util.csv.ReadCSV;
/**
* This class is used to perform an analysis of a CSV file. This will help Encog
* to determine how the fields should be normalized.
*
*/
public class PerformAnalysis {
/**
* The file name to analyze.
*/
private final String filename;
/**
* True, if headers are present.
*/
private final boolean headers;
/**
* The format of this file.
*/
private final AnalystFileFormat format;
/**
* The fields to analyze.
*/
private AnalyzedField[] fields;
/**
* The script to use.
*/
private final AnalystScript script;
/**
* Construct the analysis object.
*
* @param theScript
* The script to use.
* @param theFilename
* The name of the file to analyze.
* @param theHeaders
* True if headers are present.
* @param theFormat
* The format of the file being analyzed.
*/
public PerformAnalysis(final AnalystScript theScript,
final String theFilename, final boolean theHeaders,
final AnalystFileFormat theFormat) {
this.filename = theFilename;
this.headers = theHeaders;
this.format = theFormat;
this.script = theScript;
}
/**
* Generate the header fields.
*
* @param csv
* The CSV file to use.
*/
private void generateFields(final ReadCSV csv) {
if (this.headers) {
generateFieldsFromHeaders(csv);
} else {
generateFieldsFromCount(csv);
}
}
/**
* Generate the fields using counts, no headers provided.
*
* @param csv
* The CSV file to use.
*/
private void generateFieldsFromCount(final ReadCSV csv) {
this.fields = new AnalyzedField[csv.getColumnCount()];
for (int i = 0; i < this.fields.length; i++) {
this.fields[i] = new AnalyzedField(this.script, "field:" + (i + 1));
}
}
/**
* Generate the fields using header values.
*
* @param csv
* The CSV file to use.
*/
private void generateFieldsFromHeaders(final ReadCSV csv) {
final CSVHeaders h = new CSVHeaders(csv.getColumnNames());
this.fields = new AnalyzedField[csv.getColumnCount()];
for (int i = 0; i < this.fields.length; i++) {
if (i >= csv.getColumnNames().size()) {
throw new AnalystError(
"CSV header count does not match column count");
}
this.fields[i] = new AnalyzedField(this.script, h.getHeader(i));
}
}
/**
* Perform the analysis.
* @param target The Encog analyst object to analyze.
*/
public void process(final EncogAnalyst target) {
int count = 0;
final CSVFormat csvFormat = ConvertStringConst
.convertToCSVFormat(this.format);
ReadCSV csv = new ReadCSV(this.filename, this.headers, csvFormat);
// pass one, calculate the min/max
while (csv.next()) {
if (this.fields == null) {
generateFields(csv);
}
for (int i = 0; i < csv.getColumnCount(); i++) {
this.fields[i].analyze1(csv.get(i));
}
count++;
}
if( count==0 ) {
throw new AnalystError("Can't analyze file, it is empty.");
}
for (final AnalyzedField field : this.fields) {
field.completePass1();
}
csv.close();
// pass two, standard deviation
csv = new ReadCSV(this.filename, this.headers, csvFormat);
while (csv.next()) {
for (int i = 0; i < csv.getColumnCount(); i++) {
this.fields[i].analyze2(csv.get(i));
}
}
for (final AnalyzedField field : this.fields) {
field.completePass2();
}
csv.close();
String str = this.script.getProperties().getPropertyString(
ScriptProperties.SETUP_CONFIG_ALLOWED_CLASSES);
if (str == null) {
str = "";
}
final boolean allowInt = str.contains("int");
final boolean allowReal = str.contains("real")
|| str.contains("double");
final boolean allowString = str.contains("string");
// remove any classes that did not qualify
for (final AnalyzedField field : this.fields) {
if (field.isClass()) {
if (!allowInt && field.isInteger()) {
field.setClass(false);
}
if (!allowString && (!field.isInteger() && !field.isReal())) {
field.setClass(false);
}
if (!allowReal && field.isReal() && !field.isInteger()) {
field.setClass(false);
}
}
}
// merge with existing
if ((target.getScript().getFields() != null)
&& (this.fields.length
== target.getScript().getFields().length)) {
for (int i = 0; i < this.fields.length; i++) {
// copy the old field name
this.fields[i].setName(target.getScript().getFields()[i]
.getName());
this.fields[i].setSource(target.getScript().getFields()[i].getSource());
if (this.fields[i].isClass()) {
final List<AnalystClassItem> t = this.fields[i]
.getAnalyzedClassMembers();
final List<AnalystClassItem> s = target.getScript()
.getFields()[i].getClassMembers();
if (s.size() == t.size()) {
for (int j = 0; j < s.size(); j++) {
if (t.get(j).getCode().equals(s.get(j).getCode())) {
t.get(j).setName(s.get(j).getName());
}
}
}
}
}
}
// now copy the fields
final DataField[] df = new DataField[this.fields.length];
for (int i = 0; i < df.length; i++) {
df[i] = this.fields[i].finalizeField();
}
target.getScript().setFields(df);
}
/** {@inheritDoc} */
@Override
public String toString() {
final StringBuilder result = new StringBuilder("[");
result.append(getClass().getSimpleName());
result.append(" filename=");
result.append(this.filename);
result.append(", headers=");
result.append(this.headers);
result.append("]");
return result.toString();
}
}