/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.app.analyst.analyze; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.encog.Encog; import org.encog.app.analyst.script.AnalystClassItem; import org.encog.app.analyst.script.AnalystScript; import org.encog.app.analyst.script.DataField; import org.encog.app.analyst.script.prop.ScriptProperties; import org.encog.util.csv.CSVFormat; /** * This class represents a field that the Encog Analyst is in the process of * analyzing. This class is used to track statistical information on the field * that will help the Encog analyst determine what type of field this is, and * how to normalize it. * */ public class AnalyzedField extends DataField { /** * Tge sum of all values of this field. */ private double total; /** * The number of instances of this field. */ private int instances; /** * The total for standard deviation calculation. */ private double devTotal; /** * A mapping between the class names that the class items. */ private final Map<String, AnalystClassItem> classMap = new HashMap<String, AnalystClassItem>(); /** * The analyst script that the results are saved to. */ private final AnalystScript script; /** * The numeric format. */ private CSVFormat fmt; /** * Construct an analyzed field. * @param theScript The script being analyzed. * @param name The name of the field. */ public AnalyzedField(final AnalystScript theScript, final String name) { super(name); this.instances = 0; this.script = theScript; this.fmt = this.script.determineFormat(); } /** * Perform a pass one analysis of this field. * @param v The current value. */ public void analyze1(final String v) { boolean accountedFor = false; String str = v.trim(); if (str.trim().length() == 0 || str.equals("?")) { setComplete(false); return; } this.instances++; if (isReal()) { if (this.fmt.isValid(str)) { final double d = this.fmt.parse(str); setMax(Math.max(d, getMax())); setMin(Math.min(d, getMin())); this.total += d; accountedFor = true; } else { setReal(false); if (!isInteger()) { setMax(0); setMin(0); setStandardDeviation(0); } } } if (isInteger()) { try { final int i = Integer.parseInt(str); setMax(Math.max(i, getMax())); setMin(Math.min(i, getMin())); if (!accountedFor) { this.total += i; } } catch (final NumberFormatException ex) { setInteger(false); if (!isReal()) { setMax(0); setMin(0); setStandardDeviation(0); } } } if (isClass()) { AnalystClassItem item; // is this a new class? if (!this.classMap.containsKey(str)) { item = new AnalystClassItem(str, str, 1); this.classMap.put(str, item); // do we have too many different classes? final int max = this.script.getProperties().getPropertyInt( ScriptProperties.SETUP_CONFIG_MAX_CLASS_COUNT); if (this.classMap.size() > max) { setClass(false); } } else { item = this.classMap.get(str); item.increaseCount(); } } } /** * Perform a pass two analysis of this field. * @param str The current value. */ public void analyze2(final String str) { if (str.trim().length() == 0) { return; } if (isReal() || isInteger()) { if (!str.equals("") && !str.equals("?")) { final double d = this.fmt.parse(str); this.devTotal += Math.pow((d - getMean()), 2); } } } /** * Complete pass 1. */ public void completePass1() { this.devTotal = 0; if (this.instances == 0) { setMean(0); } else { setMean(this.total / this.instances); } } /** * Complete pass 2. */ public void completePass2() { setStandardDeviation(Math.sqrt(this.devTotal / this.instances)); } /** * Finalize the field, and create a DataField. * @return The new DataField. */ public DataField finalizeField() { final DataField result = new DataField(getName()); // if max and min are the same, we are dealing with a zero-sized range, // which will cause other issues. This is caused by ever number in the // column having exactly (or nearly exactly) the same value. Provide a // small range around that value so that every value in this column normalizes // to the midpoint of the desired normalization range, typically 0 or 0.5. if( Math.abs(getMax()-getMin())<Encog.DEFAULT_DOUBLE_EQUAL ) { result.setMin(getMin()-0.0001); result.setMax(getMin()+0.0001); } else { result.setMin(getMin()); result.setMax(getMax()); } result.setName(getName()); result.setMean(getMean()); result.setStandardDeviation(getStandardDeviation()); result.setInteger(isInteger()); result.setReal(isReal()); result.setClass(isClass()); result.setComplete(isComplete()); result.setSource(getSource()); result.getClassMembers().clear(); if (result.isClass()) { final List<AnalystClassItem> list = getAnalyzedClassMembers(); result.getClassMembers().addAll(list); } return result; } /** * Get the class members. * @return The class members. */ public List<AnalystClassItem> getAnalyzedClassMembers() { final List<String> sorted = new ArrayList<String>(); sorted.addAll(this.classMap.keySet()); Collections.sort(sorted); final List<AnalystClassItem> result = new ArrayList<AnalystClassItem>(); for (final String str : sorted) { result.add(this.classMap.get(str)); } return result; } /** {@inheritDoc} */ @Override public String toString() { final StringBuilder result = new StringBuilder("["); result.append(getClass().getSimpleName()); result.append(" total="); result.append(this.total); result.append(", instances="); result.append(this.instances); result.append("]"); return result.toString(); } }