/* * avenir: Predictive analytic based on Hadoop Map Reduce * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.avenir.util; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.chombo.util.Utility; /** * Handles splits for an attribute * @author pranab * */ public class AttributeSplitHandler { private Map<Integer, List<Split>> attributeSplits = new HashMap<Integer, List<Split>>(); private List<Split> curSplitList; private int cursor; /** * Adds numerical split * @param attrOrd * @param splitPoints */ public void addIntSplits(int attrOrd, Integer[] splitPoints) { String key = Utility.join(splitPoints,";"); IntegerSplit intSplit = new IntegerSplit(key, splitPoints); List<Split> splitList = getSplitList(attrOrd); splitList.add(intSplit); } private List<Split> getSplitList(int attrOrd) { List<Split> splitList = attributeSplits.get(attrOrd); if (null == splitList) { splitList = new ArrayList<Split>(); attributeSplits.put(attrOrd, splitList); } return splitList; } /** * Adds categorical split * @param attrOrd * @param splitSets */ public void addCategoricalSplits(int attrOrd, List<List<String>> splitSets) { CategoricalSplit catSplit = new CategoricalSplit(splitSets); List<Split> splitList = getSplitList(attrOrd); splitList.add(catSplit); } /** * selects an attribute * @param attrOrd */ public void selectAttribute(int attrOrd) { curSplitList = attributeSplits.get(attrOrd); cursor = 0; } /** * returns key for next split * @return */ public String next() { String key = null; if (cursor < curSplitList.size()) { key = curSplitList.get(cursor).getKey(); } return key; } /** * Returns segment index for numerical split * @param value * @return */ public int getSegmentIndex(String value) { int index = curSplitList.get(cursor).getSegmentIndex(value); ++cursor; return index; } /** * Base class for splits * @author pranab * */ public static abstract class Split { protected String key; protected static final String SPLIT_ELEMENT_SEPRATOR = ":"; public Split() { } public Split(String key) { this.key = key; } public String getKey() { return key; } public void setKey(String key) { this.key = key; } public abstract int getSegmentIndex(String value); public abstract void fromString(); } /** * @author pranab * */ public static class IntegerSplit extends Split { private Integer[] splitPoints; public IntegerSplit(String key, Integer[] splitPoints) { super(key); this.splitPoints = splitPoints; } public IntegerSplit(String key) { super(key); } @Override public int getSegmentIndex(String value) { int i = 0; int iValue = Integer.parseInt(value); for ( ; i < splitPoints.length && iValue > splitPoints[i]; ++i) { } return i; } public String toString() { return Utility.join(splitPoints, SPLIT_ELEMENT_SEPRATOR); } public void fromString() { int[] intArray = Utility.intArrayFromString(key, SPLIT_ELEMENT_SEPRATOR); splitPoints = new Integer[intArray.length]; for (int i = 0; i < intArray.length; ++i) { splitPoints[i] = intArray[i]; } } } /** * @author pranab * */ public static class CategoricalSplit extends Split { private List<List<String>> splitSets; public CategoricalSplit(List<List<String>> splitSets) { this.splitSets = splitSets; key = toString(); } public CategoricalSplit(String key) { super(key); } public CategoricalSplit(String key, List<List<String>> splitSets) { super(key); this.splitSets = splitSets; } @Override public int getSegmentIndex(String value) { int indx = 0; boolean found = false; for (List<String> gr : splitSets) { if (gr.contains(value)) { found = true; break; } ++indx; } if (!found) { throw new IllegalArgumentException("split segment not found for " + value); } return indx; } public String toString() { StringBuilder stBld = new StringBuilder(); for (List<String> gr : splitSets) { stBld.append(gr.toString()).append(SPLIT_ELEMENT_SEPRATOR); } stBld.deleteCharAt(stBld.length()-1); return stBld.toString(); } /** * */ public void fromString() { splitSets = new ArrayList<List<String>>(); String[] splitSetsSt = key.split(SPLIT_ELEMENT_SEPRATOR); for (String splitSetSt : splitSetsSt) { splitSetSt = splitSetSt.substring(1, splitSetSt.length() -1); String[] items = splitSetSt.split(","); List<String> splitSet = new ArrayList<String>(); for (int i = 0; i < items.length; ++i) { splitSet.add(items[i].trim()); } splitSets.add(splitSet); } } } }