/* * Copyright [2013-2015] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.core.dtrain.dataset; import java.io.InputStream; import java.io.OutputStream; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.encog.engine.network.activation.ActivationFunction; import org.encog.neural.flat.FlatNetwork; import org.encog.neural.networks.BasicNetwork; import org.encog.neural.networks.PersistBasicNetwork; import org.encog.persist.EncogFileSection; import org.encog.persist.EncogPersistor; import org.encog.persist.EncogReadHelper; import org.encog.persist.EncogWriteHelper; import org.encog.persist.PersistConst; import org.encog.persist.PersistError; import org.encog.util.csv.CSVFormat; /** * Support {@link BasicFloatNetwork} serialization and de-serialization. This is copied from {@link PersistBasicNetwork} * and only {@link #getPersistClassString()} is changed to 'BasicFloatNetwork'. * * <p> * Because of all final methods in {@link PersistBasicNetwork}, we have to copy code while not take extension. */ public class PersistBasicFloatNetwork implements EncogPersistor { /** * {@inheritDoc} */ @Override public final int getFileVersion() { return 1; } /** * {@inheritDoc} */ @Override public final String getPersistClassString() { return "BasicFloatNetwork"; } /** * {@inheritDoc} */ @Override public final Object read(final InputStream is) { final BasicFloatNetwork result = new BasicFloatNetwork(); final FlatNetwork flat = new FlatNetwork(); final EncogReadHelper in = new EncogReadHelper(is); EncogFileSection section; while((section = in.readNextSection()) != null) { if(section.getSectionName().equals("BASIC") && section.getSubSectionName().equals("PARAMS")) { final Map<String, String> params = section.parseParams(); result.getProperties().putAll(params); } if(section.getSectionName().equals("BASIC") && section.getSubSectionName().equals("NETWORK")) { final Map<String, String> params = section.parseParams(); flat.setBeginTraining(EncogFileSection.parseInt(params, BasicNetwork.TAG_BEGIN_TRAINING)); flat.setConnectionLimit(EncogFileSection.parseDouble(params, BasicNetwork.TAG_CONNECTION_LIMIT)); flat.setContextTargetOffset(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_CONTEXT_TARGET_OFFSET)); flat.setContextTargetSize(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_CONTEXT_TARGET_SIZE)); flat.setEndTraining(EncogFileSection.parseInt(params, BasicNetwork.TAG_END_TRAINING)); flat.setHasContext(EncogFileSection.parseBoolean(params, BasicNetwork.TAG_HAS_CONTEXT)); flat.setInputCount(EncogFileSection.parseInt(params, PersistConst.INPUT_COUNT)); flat.setLayerCounts(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_LAYER_COUNTS)); flat.setLayerFeedCounts(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_LAYER_FEED_COUNTS)); flat.setLayerContextCount(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_LAYER_CONTEXT_COUNT)); flat.setLayerIndex(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_LAYER_INDEX)); flat.setLayerOutput(EncogFileSection.parseDoubleArray(params, PersistConst.OUTPUT)); flat.setLayerSums(new double[flat.getLayerOutput().length]); flat.setOutputCount(EncogFileSection.parseInt(params, PersistConst.OUTPUT_COUNT)); flat.setWeightIndex(EncogFileSection.parseIntArray(params, BasicNetwork.TAG_WEIGHT_INDEX)); flat.setWeights(EncogFileSection.parseDoubleArray(params, PersistConst.WEIGHTS)); flat.setBiasActivation(EncogFileSection.parseDoubleArray(params, BasicNetwork.TAG_BIAS_ACTIVATION)); } else if(section.getSectionName().equals("BASIC") && section.getSubSectionName().equals("ACTIVATION")) { int index = 0; flat.setActivationFunctions(new ActivationFunction[flat.getLayerCounts().length]); for(final String line: section.getLines()) { ActivationFunction af = null; final List<String> cols = EncogFileSection.splitColumns(line); String name = "org.encog.engine.network.activation." + cols.get(0); if(cols.get(0).equals("ActivationReLU")) { name = "ml.shifu.shifu.core.dtrain.nn.ActivationReLU"; } try { final Class<?> clazz = Class.forName(name); af = (ActivationFunction) clazz.newInstance(); } catch (final ClassNotFoundException e) { throw new PersistError(e); } catch (final InstantiationException e) { throw new PersistError(e); } catch (final IllegalAccessException e) { throw new PersistError(e); } for(int i = 0; i < af.getParamNames().length; i++) { af.setParam(i, CSVFormat.EG_FORMAT.parse(cols.get(i + 1))); } flat.getActivationFunctions()[index++] = af; } } else if(section.getSectionName().equals("BASIC") && section.getSubSectionName().equals("SUBSET")) { final Map<String, String> params = section.parseParams(); String subsetStr = params.get("SUBSETFEATURES"); if(StringUtils.isBlank(subsetStr)) { result.setFeatureSet(null); } else { String[] splits = subsetStr.split(","); Set<Integer> subFeatures = new HashSet<Integer>(); for(String split: splits) { int featureIndex = Integer.parseInt(split); subFeatures.add(featureIndex); } result.setFeatureSet(subFeatures); } } } result.getStructure().setFlat(flat); return result; } /** * {@inheritDoc} */ @Override public final void save(final OutputStream os, final Object obj) { final EncogWriteHelper out = new EncogWriteHelper(os); final BasicFloatNetwork net = (BasicFloatNetwork) obj; final FlatNetwork flat = net.getStructure().getFlat(); out.addSection("BASIC"); out.addSubSection("PARAMS"); out.addProperties(net.getProperties()); out.addSubSection("NETWORK"); out.writeProperty(BasicNetwork.TAG_BEGIN_TRAINING, flat.getBeginTraining()); out.writeProperty(BasicNetwork.TAG_CONNECTION_LIMIT, flat.getConnectionLimit()); out.writeProperty(BasicNetwork.TAG_CONTEXT_TARGET_OFFSET, flat.getContextTargetOffset()); out.writeProperty(BasicNetwork.TAG_CONTEXT_TARGET_SIZE, flat.getContextTargetSize()); out.writeProperty(BasicNetwork.TAG_END_TRAINING, flat.getEndTraining()); out.writeProperty(BasicNetwork.TAG_HAS_CONTEXT, flat.getHasContext()); out.writeProperty(PersistConst.INPUT_COUNT, flat.getInputCount()); out.writeProperty(BasicNetwork.TAG_LAYER_COUNTS, flat.getLayerCounts()); out.writeProperty(BasicNetwork.TAG_LAYER_FEED_COUNTS, flat.getLayerFeedCounts()); out.writeProperty(BasicNetwork.TAG_LAYER_CONTEXT_COUNT, flat.getLayerContextCount()); out.writeProperty(BasicNetwork.TAG_LAYER_INDEX, flat.getLayerIndex()); out.writeProperty(PersistConst.OUTPUT, flat.getLayerOutput()); out.writeProperty(PersistConst.OUTPUT_COUNT, flat.getOutputCount()); out.writeProperty(BasicNetwork.TAG_WEIGHT_INDEX, flat.getWeightIndex()); out.writeProperty(PersistConst.WEIGHTS, flat.getWeights()); out.writeProperty(BasicNetwork.TAG_BIAS_ACTIVATION, flat.getBiasActivation()); out.addSubSection("ACTIVATION"); for(final ActivationFunction af: flat.getActivationFunctions()) { out.addColumn(af.getClass().getSimpleName()); for(int i = 0; i < af.getParams().length; i++) { out.addColumn(af.getParams()[i]); } out.writeLine(); } out.addSubSection("SUBSET"); Set<Integer> featureList = net.getFeatureSet(); if(featureList == null || featureList.size() == 0) { out.writeProperty("SUBSETFEATURES", ""); } else { String subFeaturesStr = StringUtils.join(featureList, ","); out.writeProperty("SUBSETFEATURES", subFeaturesStr); } out.flush(); } }