/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.script;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.encog.app.analyst.AnalystError;
import org.encog.app.analyst.script.ml.ScriptOpcode;
import org.encog.app.analyst.script.normalize.AnalystField;
import org.encog.app.analyst.script.process.ProcessField;
import org.encog.app.analyst.script.prop.PropertyConstraints;
import org.encog.app.analyst.script.prop.PropertyEntry;
import org.encog.app.analyst.script.segregate.AnalystSegregateTarget;
import org.encog.app.analyst.script.task.AnalystTask;
import org.encog.persist.EncogFileSection;
import org.encog.persist.EncogReadHelper;
import org.encog.util.arrayutil.NormalizationAction;
import org.encog.util.csv.CSVFormat;
/**
* Used to load an Encog Analyst script.
*
*/
public class ScriptLoad {
/**
* Column 1.
*/
public static final int COLUMN_ONE = 1;
/**
* Column 2.
*/
public static final int COLUMN_TWO = 2;
/**
* Column 3.
*/
public static final int COLUMN_THREE = 3;
/**
* Column 4.
*/
public static final int COLUMN_FOUR = 4;
/**
* Column 5.
*/
public static final int COLUMN_FIVE = 5;
/**
* The script being loaded.
*/
private final AnalystScript script;
/**
* Construct a script loader.
*
* @param theScript
* The script to load into.
*/
public ScriptLoad(final AnalystScript theScript) {
this.script = theScript;
}
/**
* Handle loading the data classes.
*
* @param section
* The section being loaded.
*/
private void handleDataClasses(final EncogFileSection section) {
final Map<String, List<AnalystClassItem>> map = new HashMap<String, List<AnalystClassItem>>();
boolean first = true;
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
if (cols.size() < COLUMN_FOUR) {
throw new AnalystError("Invalid data class: " + line);
}
final String field = cols.get(0);
final String code = cols.get(1);
final String name = cols.get(2);
final int count = Integer.parseInt(cols.get(3));
final DataField df = this.script.findDataField(field);
if (df == null) {
throw new AnalystError(
"Attempting to add class to unknown field: " + name);
}
List<AnalystClassItem> classItems;
if (!map.containsKey(field)) {
classItems = new ArrayList<AnalystClassItem>();
map.put(field, classItems);
} else {
classItems = map.get(field);
}
classItems.add(new AnalystClassItem(code, name, count));
} else {
first = false;
}
}
for (final DataField field : this.script.getFields()) {
if (field.isClass()) {
final List<AnalystClassItem> classList = map.get(field
.getName());
if (classList != null) {
Collections.sort(classList);
field.getClassMembers().clear();
field.getClassMembers().addAll(classList);
}
}
}
}
/**
* Handle loading data stats.
*
* @param section
* The section being loaded.
*/
private void handleDataStats(final EncogFileSection section) {
final List<DataField> dfs = new ArrayList<DataField>();
boolean first = true;
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
final String name = cols.get(0);
final boolean isclass = Integer.parseInt(cols.get(1)) > 0;
final boolean iscomplete = Integer.parseInt(cols.get(2)) > 0;
final boolean isint = Integer.parseInt(cols.get(COLUMN_THREE)) > 0;
final boolean isreal = Integer.parseInt(cols.get(COLUMN_FOUR)) > 0;
final double amax = CSVFormat.EG_FORMAT.parse(cols.get(5));
final double amin = CSVFormat.EG_FORMAT.parse(cols.get(6));
final double mean = CSVFormat.EG_FORMAT.parse(cols.get(7));
final double sdev = CSVFormat.EG_FORMAT.parse(cols.get(8));
String source = "";
// source was added in Encog 3.2, so it might not be there
if (cols.size() > 9) {
source = cols.get(9);
}
final DataField df = new DataField(name);
df.setClass(isclass);
df.setComplete(iscomplete);
df.setInteger(isint);
df.setReal(isreal);
df.setMax(amax);
df.setMin(amin);
df.setMean(mean);
df.setStandardDeviation(sdev);
df.setSource(source);
dfs.add(df);
} else {
first = false;
}
}
final DataField[] array = new DataField[dfs.size()];
for (int i = 0; i < array.length; i++) {
array[i] = dfs.get(i);
}
this.script.setFields(array);
}
/**
* Handle loading the filenames.
*
* @param section
* The section being loaded.
*/
private void handleFilenames(final EncogFileSection section) {
final Map<String, String> prop = section.parseParams();
this.script.getProperties().clearFilenames();
for (final Entry<String, String> e : prop.entrySet()) {
this.script.getProperties().setFilename(e.getKey(), e.getValue());
}
}
/**
* Handle normalization ranges.
*
* @param section
* The section being loaded.
*/
private void handleNormalizeRange(final EncogFileSection section) {
this.script.getNormalize().getNormalizedFields().clear();
boolean first = true;
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
final String name = cols.get(0);
final String io = cols.get(1);
final int timeSlice = Integer.parseInt(cols.get(2));
final String action = cols.get(3);
final double high = CSVFormat.EG_FORMAT.parse(cols.get(4));
final double low = CSVFormat.EG_FORMAT.parse(cols.get(5));
boolean isOutput;
if (io.equalsIgnoreCase("input")) {
isOutput = false;
} else if (io.equalsIgnoreCase("output")) {
isOutput = true;
} else {
throw new AnalystError("Unknown io type:" + io);
}
NormalizationAction des = null;
if (action.equals("range")) {
des = NormalizationAction.Normalize;
} else if (action.equals("ignore")) {
des = NormalizationAction.Ignore;
} else if (action.equals("pass")) {
des = NormalizationAction.PassThrough;
} else if (action.equals("equilateral")) {
des = NormalizationAction.Equilateral;
} else if (action.equals("single")) {
des = NormalizationAction.SingleField;
} else if (action.equals("oneof")) {
des = NormalizationAction.OneOf;
} else {
throw new AnalystError("Unknown field type:" + action);
}
final AnalystField nf = new AnalystField(name, des, high, low);
nf.setTimeSlice(timeSlice);
nf.setOutput(isOutput);
this.script.getNormalize().getNormalizedFields().add(nf);
} else {
first = false;
}
}
}
/**
* Handle loading segregation info.
*
* @param section
* The section being loaded.
*/
private void handleSegregateFiles(final EncogFileSection section) {
final List<AnalystSegregateTarget> nfs = new ArrayList<AnalystSegregateTarget>();
boolean first = true;
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
final String filename = cols.get(0);
final int percent = Integer.parseInt(cols.get(1));
final AnalystSegregateTarget nf = new AnalystSegregateTarget(
filename, percent);
nfs.add(nf);
} else {
first = false;
}
}
final AnalystSegregateTarget[] array = new AnalystSegregateTarget[nfs
.size()];
for (int i = 0; i < array.length; i++) {
array[i] = nfs.get(i);
}
this.script.getSegregate().setSegregateTargets(array);
}
/**
* Handle loading a task.
*
* @param section
* The section.
*/
private void handleTask(final EncogFileSection section) {
final AnalystTask task = new AnalystTask(section.getSubSectionName());
for (final String line : section.getLines()) {
task.getLines().add(line);
}
this.script.addTask(task);
}
/**
* Load an Encog script.
*
* @param stream
* The stream to load from.
*/
public void load(final InputStream stream) {
EncogReadHelper reader = null;
try {
EncogFileSection section = null;
reader = new EncogReadHelper(stream);
while ((section = reader.readNextSection()) != null) {
processSubSection(section);
}
// init the script
this.script.init();
} finally {
if (reader != null) {
reader.close();
}
}
}
/**
* Load a generic subsection.
*
* @param section
* The section to load from.
*/
private void loadSubSection(final EncogFileSection section) {
final Map<String, String> prop = section.parseParams();
for (final String name : prop.keySet()) {
final String key = section.getSectionName().toUpperCase() + ":"
+ section.getSubSectionName().toUpperCase() + "_" + name;
String value = prop.get(name);
if (value == null) {
value = "";
}
validateProperty(section.getSectionName(),
section.getSubSectionName(), name, value);
this.script.getProperties().setProperty(key, value);
}
}
/**
* Process one of the subsections.
*
* @param section
* The section.
*/
private void processSubSection(final EncogFileSection section) {
final String currentSection = section.getSectionName();
final String currentSubsection = section.getSubSectionName();
if (currentSection.equals("SETUP")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("SETUP")
&& currentSubsection.equalsIgnoreCase("FILENAMES")) {
handleFilenames(section);
} else if (currentSection.equals("DATA")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("DATA")
&& currentSubsection.equalsIgnoreCase("STATS")) {
handleDataStats(section);
} else if (currentSection.equals("DATA")
&& currentSubsection.equalsIgnoreCase("CLASSES")) {
handleDataClasses(section);
} else if (currentSection.equals("NORMALIZE")
&& currentSubsection.equalsIgnoreCase("RANGE")) {
handleNormalizeRange(section);
} else if (currentSection.equals("NORMALIZE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("NORMALIZE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("CLUSTER")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("SERIES")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("RANDOMIZE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("SEGREGATE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("SEGREGATE")
&& currentSubsection.equalsIgnoreCase("FILES")) {
handleSegregateFiles(section);
} else if (currentSection.equals("GENERATE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("HEADER")
&& currentSubsection.equalsIgnoreCase("DATASOURCE")) {
loadSubSection(section);
} else if (currentSection.equals("ML")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("ML")
&& currentSubsection.equalsIgnoreCase("TRAIN")) {
loadSubSection(section);
} else if (currentSection.equals("ML")
&& currentSubsection.equalsIgnoreCase("OPCODES")) {
loadOpcodes(section);
} else if (currentSection.equals("TASKS")
&& (currentSubsection.length() > 0)) {
handleTask(section);
} else if (currentSection.equals("BALANCE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("CODE")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("PROCESS")
&& currentSubsection.equalsIgnoreCase("CONFIG")) {
loadSubSection(section);
} else if (currentSection.equals("PROCESS")
&& currentSubsection.equalsIgnoreCase("FIELDS")) {
handleProcessFields(section);
}
}
/**
* Validate a property.
*
* @param section
* The section.
* @param subSection
* The sub section.
* @param name
* The name of the property.
* @param value
* The new value for the property.
*/
private void validateProperty(final String section,
final String subSection, final String name, final String value) {
final PropertyEntry entry = PropertyConstraints.getInstance().getEntry(
section, subSection, name);
if (entry == null) {
throw new AnalystError("Unknown property: "
+ PropertyEntry.dotForm(section, subSection, name));
}
entry.validate(section, subSection, name, value);
}
private void handleProcessFields(final EncogFileSection section) {
List<ProcessField> fields = this.script.getProcess().getFields();
boolean first = true;
fields.clear();
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
final String name = cols.get(0);
final String command = cols.get(1);
final ProcessField pf = new ProcessField(name, command);
fields.add(pf);
} else {
first = false;
}
}
}
private void loadOpcodes(EncogFileSection section) {
boolean first = true;
for (final String line : section.getLines()) {
if (!first) {
final List<String> cols = EncogFileSection.splitColumns(line);
final String name = cols.get(0);
final int childCount = Integer.parseInt(cols.get(1));
ScriptOpcode opcode = new ScriptOpcode(name, childCount);
this.script.getOpcodes().add(opcode);
} else {
first = false;
}
}
}
}