/* * INESC-ID, Instituto de Engenharia de Sistemas e Computadores Investigação e Desevolvimento em Lisboa * Copyright 2013 INESC-ID and/or its affiliates and other * contributors as indicated by the @author tags. All rights reserved. * See the copyright.txt in the distribution for a full listing of * individual contributors. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 3.0 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.infinispan.dataplacement.c50; import org.infinispan.configuration.cache.Configuration; import org.infinispan.dataplacement.OwnersInfo; import org.infinispan.dataplacement.c50.keyfeature.Feature; import org.infinispan.dataplacement.c50.keyfeature.FeatureValue; import org.infinispan.dataplacement.c50.keyfeature.KeyFeatureManager; import org.infinispan.dataplacement.c50.lookup.BloomFilter; import org.infinispan.dataplacement.c50.lookup.BloomFilter2; import org.infinispan.dataplacement.c50.tree.DecisionTree; import org.infinispan.dataplacement.c50.tree.DecisionTreeBuilder; import org.infinispan.dataplacement.c50.tree.DecisionTreeParser; import org.infinispan.dataplacement.c50.tree.ParseTreeNode; import org.infinispan.dataplacement.lookup.ObjectLookup; import org.infinispan.dataplacement.lookup.ObjectLookupFactory; import org.infinispan.util.TypedProperties; import org.infinispan.util.Util; import org.infinispan.util.logging.Log; import org.infinispan.util.logging.LogFactory; import java.io.BufferedWriter; import java.io.Closeable; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeSet; /** * Object Lookup Factory when Machine Learner (C5.0) and Bloom Filters technique is used * * @author Pedro Ruivo * @since 5.2 */ @SuppressWarnings("UnusedDeclaration") //this is loaded in runtime public class C50MLObjectLookupFactory implements ObjectLookupFactory { public static final String LOCATION = "location"; public static final String KEY_FEATURE_MANAGER = "keyFeatureManager"; public static final String BF_FALSE_POSITIVE = "bfFalsePositiveProb"; private static final String INPUT_FORMAT = "%1$sinput-%2$s"; private static final String INPUT_ML_DATA_FORMAT = INPUT_FORMAT + ".data"; private static final String INPUT_ML_NAMES_FORMAT = INPUT_FORMAT + ".names"; private static final String INPUT_ML_TREE_FORMAT = INPUT_FORMAT + ".tree"; private static final String EXEC_FORMAT = "%1$sc5.0 -f " + INPUT_FORMAT; private static final Log log = LogFactory.getLog(C50MLObjectLookupFactory.class); private KeyFeatureManager keyFeatureManager; private final Map<String, Feature> featureMap; private DecisionTreeBuilder decisionTreeBuilder; private String machineLearnerPath = System.getProperty("user.dir"); private double bloomFilterFalsePositiveProbability = 0.001; public C50MLObjectLookupFactory() { featureMap = new HashMap<String, Feature>(); } @Override public void setConfiguration(Configuration configuration) { TypedProperties typedProperties = configuration.dataPlacement().properties(); machineLearnerPath = typedProperties.getProperty(LOCATION, machineLearnerPath); if (!machineLearnerPath.endsWith(File.separator)) { machineLearnerPath += File.separator; } String keyFeatureManagerClassName = typedProperties.getProperty(KEY_FEATURE_MANAGER, null); if (keyFeatureManagerClassName == null) { throw new IllegalStateException("Key Feature Manager cannot be null"); } keyFeatureManager = Util.getInstance(keyFeatureManagerClassName, Thread.currentThread().getContextClassLoader()); if (keyFeatureManager == null) { throw new IllegalStateException("Key Feature Manager cannot be null"); } try { String tmp = typedProperties.getProperty(BF_FALSE_POSITIVE, "0.001"); bloomFilterFalsePositiveProbability = Double.parseDouble(tmp); } catch (NumberFormatException nfe) { log.warnf("Error parsing bloom filter false positive probability. The value is %s. %s", bloomFilterFalsePositiveProbability, nfe.getMessage()); } for (Feature feature : keyFeatureManager.getAllKeyFeatures()) { featureMap.put(feature.getName(), feature); } } @Override public void init(ObjectLookup objectLookup) { if (objectLookup instanceof C50MLObjectLookup) { ((C50MLObjectLookup) objectLookup).setKeyFeatureManager(keyFeatureManager); } } @Override public ObjectLookup createObjectLookup(Map<Object, OwnersInfo> toMoveObj, int numberOfOwners) { BloomFilter bloomFilter = createBloomFilter(toMoveObj.keySet()); C50MLObjectLookup objectLookup = new C50MLObjectLookup(numberOfOwners, bloomFilter); objectLookup.setKeyFeatureManager(keyFeatureManager); deleteAll(); for (int iteration = 0; iteration < numberOfOwners; ++iteration) { Set<Integer> ownersIndexes = new TreeSet<Integer>(); boolean success = writeObjectsToInputData(toMoveObj, ownersIndexes, iteration); if (!success) { log.errorf("Cannot create Object Lookup. Error writing input.data"); return null; } success = writeInputNames(ownersIndexes, iteration); if (!success) { log.errorf("Cannot create Object Lookup. Error writing input.name"); return null; } try { runMachineLearner(iteration); } catch (IOException e) { log.errorf(e, "Error while trying to executing the Machine Learner"); return null; } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; } ParseTreeNode root; try { root = DecisionTreeParser.parse(String.format(INPUT_ML_TREE_FORMAT, machineLearnerPath, iteration)); } catch (Exception e) { log.errorf(e, "Error parsing Machine Learner tree"); return null; } DecisionTree tree = DecisionTreeBuilder.build(root, featureMap); objectLookup.setDecisionTreeList(iteration, tree); } return objectLookup; } @Override public int getNumberOfQueryProfilingPhases() { return 3; } /** * returns the bloom filter with the objects to move encoding on it * * @param objectsToMove the objects to move * @return the bloom filter */ private BloomFilter createBloomFilter(Collection<Object> objectsToMove) { BloomFilter bloomFilter = new BloomFilter(bloomFilterFalsePositiveProbability, objectsToMove.size()); for (Object key : objectsToMove) { bloomFilter.add(key); } return bloomFilter; } private BloomFilter2 createBloomFilter2(Collection<Object> objectsToMove) { return new BloomFilter2(objectsToMove, bloomFilterFalsePositiveProbability); } /** * it starts the machine learner and blocks until the process ends * * @throws java.io.IOException if an error occurs when launch the process * @param iteration the iteration number * @throws InterruptedException if interrupted while waiting */ private void runMachineLearner(int iteration) throws IOException, InterruptedException { Process process = Runtime.getRuntime() .exec(String.format(EXEC_FORMAT, machineLearnerPath, iteration)); if (process != null) { process.getOutputStream(); //this is needed because the process can block if the input stream buffer gets full while (process.getInputStream().read() != -1) {} process.waitFor(); } } /** * writes the input.name files needed to run the machine leaner * * * @param possibleReturnValues the possible values of the decision * @param iteration the iteration number * @return true if the file was correctly written, false otherwise */ private boolean writeInputNames(Collection<Integer> possibleReturnValues, int iteration) { BufferedWriter writer = getBufferedWriter(String.format(INPUT_ML_NAMES_FORMAT,machineLearnerPath,iteration)); if (writer == null) { log.errorf("Cannot create writer when tried to write the input.names"); return false; } try { writer.write("home"); writer.newLine(); writer.newLine(); for (Feature feature : keyFeatureManager.getAllKeyFeatures()) { writeInputNames(writer, feature); } if (possibleReturnValues.isEmpty()) { writer.write("home: -2,-1"); } else if (possibleReturnValues.size() == 1) { writer.write("home: -1,"); } else { writer.write("home: "); } Iterator<Integer> iterator = possibleReturnValues.iterator(); if (iterator.hasNext()) { writer.write(Integer.toString(iterator.next())); } while (iterator.hasNext()) { writer.write(","); writer.write(Integer.toString(iterator.next())); } writer.write("."); writer.flush(); } catch (IOException e) { log.errorf("Error writing input.names. %s", e.getMessage()); return false; } close(writer); return true; } /** * writes a single feature in the input.names * * * @param writer the writer for the file * @param feature the feature instance (with type, etc...) * @throws IOException if it cannot write in the file */ private void writeInputNames(BufferedWriter writer, Feature feature) throws IOException { writer.write(feature.getName()); writer.write(": "); String[] listOfNames = feature.getMachineLearnerClasses(); if (listOfNames.length == 1) { writer.write(listOfNames[0]); } else { writer.write(listOfNames[0]); for (int i = 1; i < listOfNames.length; ++i) { writer.write(","); writer.write(listOfNames[i]); } } writer.write("."); writer.newLine(); writer.flush(); } /** * writes the input.data with the objects to move and their new owner * * @param toMoveObj the objects to move and new location * @param ownersIndexes the new owners indexes. to write in the .names file * @param iteration the iteration number * @return true if the file was correctly wrote, false otherwise */ private boolean writeObjectsToInputData(Map<Object, OwnersInfo> toMoveObj, Set<Integer> ownersIndexes, int iteration) { BufferedWriter writer = getBufferedWriter(String.format(INPUT_ML_DATA_FORMAT, machineLearnerPath, iteration)); if (writer == null) { log.errorf("Cannot create writer when tried to write the input.data"); return false; } for (Map.Entry<Object, OwnersInfo> entry : toMoveObj.entrySet()) { try { //TODO: hack int owner = entry.getValue().getOwner(0) + iteration; owner %= 40; writeInputData(entry.getKey(), owner, writer); ownersIndexes.add(owner); } catch (IOException e) { log.errorf("Error writing input.data. %s", e.getMessage()); return false; } } close(writer); return true; } /** * writes a single key in the input.data * * @param key the key * @param nodeIndex the new owner index * @param writer the writer for input.data * @throws IOException if it cannot write on it */ private void writeInputData(Object key, Integer nodeIndex, BufferedWriter writer) throws IOException { Map<Feature, FeatureValue> keyFeatures = keyFeatureManager.getFeatures(key); for (Feature feature : keyFeatureManager.getAllKeyFeatures()) { FeatureValue keyFeatureValue = keyFeatures.get(feature); String value; if (keyFeatureValue == null) { value = "N/A"; } else { value = keyFeatureValue.getValueAsString(); } writer.write(value); writer.write(","); } writer.write(nodeIndex.toString()); writer.newLine(); writer.flush(); } /** * returns a buffered writer for the file in file path * * @param filePath the file path * @return the buffered writer or null if the file cannot be written */ private BufferedWriter getBufferedWriter(String filePath) { try { return new BufferedWriter(new FileWriter(filePath)); } catch (IOException e) { log.errorf("Cannot create writer for file %s. %s", filePath, e.getMessage()); } return null; } private void deleteAll() { try { Runtime.getRuntime().exec("rm " + String.format(INPUT_FORMAT, machineLearnerPath, "*")); } catch (IOException e) { log.warnf("Error deleting old files"); } } /** * close closeable instance * * @param closeable the object to close */ private void close(Closeable closeable) { try { closeable.close(); } catch (IOException e) { log.warnf("Error closing %s. %s", closeable, e.getMessage()); } } }