/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.pdf; import java.util.LinkedList; /** * Cluster values into buckets. A new bucket is opened if a new value is added that differs from the * average value of each of the existing buckets by more than a certain threshold. * */ public class Buckets { private final LinkedList<Bucket> buckets = new LinkedList<Bucket>(); private final double tolerance; private boolean modified = true; private Bucket cachedBest = null; Buckets(final double aTolerance) { tolerance = aTolerance; } public void put(final double aValue) { modified = true; // Empty so far if (buckets.size() == 0) { newBucket(aValue); return; } Bucket best = buckets.getFirst(); double best_diff = Math.abs(best.getValue() - aValue); for (final Bucket b : buckets) { final double cur_diff = Math.abs(b.getValue() - aValue); // Bail out on exact match if (cur_diff == 0.0) { b.add(aValue); return; } // Found better match? if (cur_diff < best_diff) { best = b; best_diff = cur_diff; } } // Add to existing bucket if within tolerance, otherwise create new one if (best_diff < tolerance) { best.add(aValue); } else { newBucket(aValue); } } private void newBucket(final double aValue) { buckets.add(new Bucket(aValue)); } public Bucket getBest() { if (modified == false) { return cachedBest; } Bucket best = buckets.getFirst(); for (final Bucket b : buckets) { if (best.size() < b.size()) { best = b; } } cachedBest = best; return best; } @Override public String toString() { return buckets.toString(); } } class Bucket { private final LinkedList<Double> values = new LinkedList<Double>(); private double cached_avg = 0.0; private boolean modified = true; double getValue() { if (!modified) { return cached_avg; } modified = false; double avg = 0.0; for (final Double v : values) { avg += v.doubleValue(); } cached_avg = avg / values.size(); return cached_avg; } int size() { return values.size(); } Bucket(final double aValue) { values.add(aValue); } void add(final double aValue) { modified = true; values.add(aValue); } @Override public String toString() { return "[" + getValue() + " : " + values.size() + "]"; } }