package edu.brown.markov; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.log4j.Logger; import org.voltdb.CatalogContext; import org.voltdb.catalog.Procedure; import weka.core.Instances; import edu.brown.markov.features.AbstractFeature; import edu.brown.markov.features.BasePartitionFeature; import edu.brown.markov.features.ParamArrayAllSameHashFeature; import edu.brown.markov.features.ParamArrayLengthFeature; import edu.brown.markov.features.ParamHashEqualsBasePartitionFeature; import edu.brown.markov.features.ParamHashPartitionFeature; import edu.brown.markov.features.TransactionIdFeature; import edu.brown.utils.ArgumentsParser; import edu.brown.utils.ClassUtil; import edu.brown.utils.FileUtil; import edu.brown.utils.PartitionEstimator; import edu.brown.workload.TransactionTrace; import edu.brown.workload.Workload; /** * * @author pavlo */ public class FeatureExtractor { private static final Logger LOG = Logger.getLogger(FeatureExtractor.class); // HACK: What position is the TransactionId in all of our FeatureSets public static final int TXNID_ATTRIBUTE_IDX = 0; private final CatalogContext catalogContext; private final PartitionEstimator p_estimator; private final Map<Procedure, List<AbstractFeature>> proc_features = new HashMap<Procedure, List<AbstractFeature>>(); private static final Class<?> DEFAULT_FEATURE_CLASSES[] = new Class<?>[] { TransactionIdFeature.class, BasePartitionFeature.class, // ParamNumericValuesFeature.class, ParamArrayAllSameHashFeature.class, ParamHashPartitionFeature.class, ParamArrayLengthFeature.class, ParamHashEqualsBasePartitionFeature.class }; /** * Full Constructor * @param catalog_db * @param feature_classes */ public FeatureExtractor(CatalogContext catalogContext, PartitionEstimator p_estimator, Class<? extends AbstractFeature>...feature_classes) { this.catalogContext = catalogContext; this.p_estimator = p_estimator; for (Class<? extends AbstractFeature> fclass : feature_classes) { this.addFeatureClass(fclass); } // FOR } public FeatureExtractor(CatalogContext catalogContext, Class<? extends AbstractFeature>...feature_classes) { this(catalogContext, new PartitionEstimator(catalogContext), feature_classes); } @SuppressWarnings("unchecked") public FeatureExtractor(CatalogContext catalogContext, PartitionEstimator p_estimator) { this(catalogContext, p_estimator, (Class<? extends AbstractFeature>[])DEFAULT_FEATURE_CLASSES); } /** * Constructor * @param catalog_db */ @SuppressWarnings("unchecked") public FeatureExtractor(CatalogContext catalogContext) { this(catalogContext, (Class<? extends AbstractFeature>[])DEFAULT_FEATURE_CLASSES); } /** * Add a feature class to this extractor * @param feature_class */ public void addFeatureClass(Class<? extends AbstractFeature> feature_class) { assert(feature_class != null); if (LOG.isDebugEnabled()) LOG.debug("Adding " + feature_class.getSimpleName()); for (Procedure catalog_proc : catalogContext.database.getProcedures()) { if (catalog_proc.getSystemproc()) continue; if (!this.proc_features.containsKey(catalog_proc)) { this.proc_features.put(catalog_proc, new ArrayList<AbstractFeature>()); } AbstractFeature f = (AbstractFeature)ClassUtil.newInstance( feature_class, new Object[]{ this.p_estimator, catalog_proc }, new Class[] { PartitionEstimator.class, Procedure.class }); this.proc_features.get(catalog_proc).add(f); } // fOR } /** * * @param workload * @return */ public Map<Procedure, FeatureSet> calculate(Workload workload) throws Exception { Map<Procedure, FeatureSet> fsets = new HashMap<Procedure, FeatureSet>(); for (TransactionTrace txn_trace : workload.getTransactions()) { final boolean trace = LOG.isTraceEnabled(); if (trace) LOG.trace("Processing " + txn_trace); Procedure catalog_proc = txn_trace.getCatalogItem(catalogContext.database); assert(catalog_proc != null) : "Invalid procedure: " + txn_trace.getCatalogItemName(); FeatureSet fset = fsets.get(catalog_proc); if (fset == null) { fset = new FeatureSet(); fsets.put(catalog_proc, fset); } for (AbstractFeature f : this.proc_features.get(catalog_proc)) { LOG.trace(txn_trace + " - " + f.getClass().getSimpleName()); f.extract(fset, txn_trace); } if (trace) LOG.trace(txn_trace + ": " + fset.getFeatureValues(txn_trace)); } // FOR return (fsets); } public static void main(String[] vargs) throws Exception { ArgumentsParser args = ArgumentsParser.load(vargs); args.require( ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_MAPPINGS ); FeatureExtractor extractor = new FeatureExtractor(args.catalogContext); Map<Procedure, FeatureSet> fsets = extractor.calculate(args.workload); // List<String> targets = args.getOptParams(); for (Entry<Procedure, FeatureSet> e : fsets.entrySet()) { String proc_name = e.getKey().getName(); // if (targets.contains(proc_name) == false) continue; // File path = new File(proc_name + ".fset"); // e.getValue().save(path.getAbsolutePath()); // LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", e.getValue().getTransactionCount(), path.getAbsolutePath())); File path = new File(proc_name + ".arff"); Instances data = e.getValue().export(proc_name, false); FileUtil.writeStringToFile(path, data.toString()); LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", data.numInstances(), path.getAbsolutePath())); } } }