package quickml.supervised.tree.bagging; import com.google.common.collect.Lists; import quickml.collections.MapUtils; import quickml.data.instances.InstanceWithAttributesMap; import java.io.Serializable; import java.util.HashSet; import java.util.List; /** * Created by alexanderhawk on 4/5/15. */ public class StationaryBagging implements Bagging { private static com.twitter.common.util.Random rand = com.twitter.common.util.Random.Util.fromSystemRandom(MapUtils.random); @Override public <L extends Serializable,E extends InstanceWithAttributesMap<L>> TrainingDataPair<L, E> separateTrainingDataFromOutOfBagData(List<E> trainingData) { List<E> baggedTrainingData = Lists.newArrayList(); List<E> outOfBagTrainingData = Lists.newArrayList(); HashSet<Integer> unusedDataIndices = new HashSet<>(); for (int i = 0; i < trainingData.size(); i++) { unusedDataIndices.add(i); } for (int i = 0; i < trainingData.size(); i++) { int toAdd = rand.nextInt(trainingData.size()); if (unusedDataIndices.contains(toAdd)) unusedDataIndices.remove(toAdd); baggedTrainingData.add(trainingData.get(toAdd)); } for (Integer index : unusedDataIndices) { outOfBagTrainingData.add(trainingData.get(index)); } return new TrainingDataPair<>(baggedTrainingData, outOfBagTrainingData); } }