/* * Encog(tm) Core v2.5 - Java Version * http://www.heatonresearch.com/encog/ * http://code.google.com/p/encog-java/ * Copyright 2008-2010 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.neural.data.xml; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.encog.neural.NeuralNetworkError; import org.encog.neural.data.NeuralData; import org.encog.neural.data.NeuralDataError; import org.encog.neural.data.NeuralDataPair; import org.encog.neural.data.NeuralDataSet; import org.encog.neural.data.basic.BasicNeuralData; import org.encog.neural.data.basic.BasicNeuralDataPair; import org.encog.parse.tags.read.ReadXML; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A data source that reads XML files. This class is not memory based, so very * large XML files can be used, without problem. * * The XML data is assumed to look something like below. The names of the tags * can be configured using the various properties on this object. * * <DataSet> <pair> <input><value>0</value><value>0</value></input> <ideal><value>0</value></ideal> * </pair> <pair> <input><value>1</value><value>0</value></input> <ideal><value>1</value></ideal> * </pair> <pair> <input><value>0</value><value>1</value></input> <ideal><value>1</value></ideal> * </pair> <pair> <input><value>1</value><value>1</value></input> <ideal><value>0</value></ideal> * </pair> </DataSet> */ public class XMLNeuralDataSet implements NeuralDataSet { /** * An iterator designed to read from XML files. * * @author jheaton */ public class XMLNeuralIterator implements Iterator<NeuralDataPair> { /** * The XML file being read. */ private InputStream file; /** * A reader for the XML file. */ private ReadXML reader; /** * The data pair just read. */ private NeuralDataPair nextPair; /** * Construct an iterator to read the XML data. */ public XMLNeuralIterator() { try { this.file = new FileInputStream(getFilename()); this.reader = new ReadXML(this.file); } catch (final IOException e) { if (XMLNeuralDataSet.this.logger.isErrorEnabled()) { XMLNeuralDataSet.this.logger.error("Exception", e); } throw new NeuralNetworkError(e); } } /** * Is there any more data to read? * * @return True if there is more data to read. */ public boolean hasNext() { if (this.nextPair != null) { return true; } return obtainNext(); } /** * Internal function called by several functions to display an error * that indicates that the XML is not valid. */ private void invalidError() { final String str = "Could not parse XML, " + "inconsistant tag structure."; if (XMLNeuralDataSet.this.logger.isErrorEnabled()) { XMLNeuralDataSet.this.logger.error(str); } throw new NeuralNetworkError(str); } /** * Read the next training set item. * * @return The next training set item. */ public NeuralDataPair next() { NeuralDataPair result = this.nextPair; if (result == null) { if (!obtainNext()) { return null; } result = this.nextPair; } this.nextPair = null; return result; } /** * Internal function to obtain the next training set item. * * @return True if one was found. */ private boolean obtainNext() { if (!this.reader.findTag(getPairXML(), true)) { return false; } final NeuralData input = new BasicNeuralData( XMLNeuralDataSet.this.inputSize); final NeuralData ideal = new BasicNeuralData( XMLNeuralDataSet.this.idealSize); if (!this.reader.findTag(getInputXML(), true)) { invalidError(); } for (int i = 0; i < XMLNeuralDataSet.this.inputSize; i++) { if (!this.reader.findTag(getValueXML(), true)) { invalidError(); } final String str = this.reader.readTextToTag(); input.setData(i, Double.parseDouble(str)); } if (XMLNeuralDataSet.this.idealSize > 0) { if (!this.reader.findTag(getIdealXML(), true)) { invalidError(); } for (int i = 0; i < XMLNeuralDataSet.this.idealSize; i++) { if (!this.reader.findTag(getValueXML(), true)) { invalidError(); } final String str = this.reader.readTextToTag(); ideal.setData(i, Double.parseDouble(str)); } } if (ideal != null) { this.nextPair = new BasicNeuralDataPair(input, ideal); } else { this.nextPair = new BasicNeuralDataPair(input); } return true; } /** * Remove this iterator. */ public void remove() { try { this.file.close(); XMLNeuralDataSet.this.iterators.remove(this); } catch (final IOException e) { if (XMLNeuralDataSet.this.logger.isErrorEnabled()) { XMLNeuralDataSet.this.logger.error("Error", e); } throw new NeuralNetworkError(e); } } } /** * The serial id. */ private static final long serialVersionUID = -5960796361565902008L; /** * Error Message: adds are not supported. */ public static final String ADD_NOT_SUPPORTED = "Adds are not supported with this dataset, it is read only."; /** * The logging object. */ private final Logger logger = LoggerFactory.getLogger(this.getClass()); /** * The file name to read. */ private final String filename; /** * The XML that indicates that a pair is about to start. */ private final String pairXML; /** * The XML that indicates that input data is about to start. */ private final String inputXML; /** * XMl that indicates that ideal data is about to start. */ private final String idealXML; /** * XML that indicates that a numeric value is about to start. */ private final String valueXML; /** * The input data size. */ private final int inputSize; /** * The ideal data size. */ private final int idealSize; /** * A collection of iterators that have been created. */ private final List<XMLNeuralIterator> iterators = new ArrayList<XMLNeuralIterator>(); /** * Construct an XML neural data set. * * @param filename * The filename to read. * @param inputSize * The input size. * @param idealSize * The ideal size. Zero for unsupervised. * @param pairXML * The XML that starts a pair. * @param inputXML * The XML that starts input. * @param idealXML * The XML that starts ideal. * @param valueXML * The XML that starts values. */ public XMLNeuralDataSet(final String filename, final int inputSize, final int idealSize, final String pairXML, final String inputXML, final String idealXML, final String valueXML) { this.filename = filename; this.pairXML = pairXML; this.inputXML = inputXML; this.idealXML = idealXML; this.valueXML = valueXML; this.idealSize = idealSize; this.inputSize = inputSize; } /** * Adds are not supported, this is a read only data set. * * @param data1 * Not used. */ public void add(final NeuralData data1) { throw new NeuralDataError(XMLNeuralDataSet.ADD_NOT_SUPPORTED); } /** * Adds are not supported, this is a read only data set. * * @param inputData * Not used. * @param idealData * Not used. */ public void add(final NeuralData inputData, final NeuralData idealData) { throw new NeuralDataError(XMLNeuralDataSet.ADD_NOT_SUPPORTED); } /** * Adds are not supported, this is a read only data set. * * @param inputData * Not used. */ public void add(final NeuralDataPair inputData) { throw new NeuralDataError(XMLNeuralDataSet.ADD_NOT_SUPPORTED); } /** * Close the XML data source. */ public void close() { for (int i = 0; i < this.iterators.size(); i++) { final XMLNeuralIterator iterator = this.iterators.get(i); iterator.remove(); } } /** * @return The XML filename. */ public String getFilename() { return this.filename; } /** * @return The ideal size. */ public int getIdealSize() { return this.idealSize; } /** * @return The XML tag for ideal. */ public String getIdealXML() { return this.idealXML; } /** * @return The input size. */ public int getInputSize() { return this.inputSize; } /** * @return The XML tag for input. */ public String getInputXML() { return this.inputXML; } /** * @return The XML tag for pairs. */ public String getPairXML() { return this.pairXML; } /** * @return The XML tag for values. */ public String getValueXML() { return this.valueXML; } /** * @return An iterator for this data. */ public Iterator<NeuralDataPair> iterator() { final XMLNeuralIterator result = new XMLNeuralIterator(); this.iterators.add(result); return result; } /** * @return True if this training data is supervised. */ @Override public boolean isSupervised() { return this.idealSize>0; } }