/* Copyright 2010, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.google.refine.browsing.util; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.Date; import java.util.List; import java.util.Properties; import com.google.refine.expr.ExpressionUtils; import com.google.refine.model.Project; import com.google.refine.model.Row; /** * A utility class for computing the base bins that form the base histograms of * temporal range facets. It evaluates an expression on all the rows of a project to * get temporal values, determines how many bins to distribute those values in, and * bins the rows accordingly. * * This class processes all rows rather than just the filtered rows because it * needs to compute the base bins of a temporal range facet, which remain unchanged * as the user interacts with the facet. */ abstract public class TimeBinIndex { protected int _totalValueCount; protected int _timeValueCount; protected long _min; protected long _max; protected long _step; protected int[] _bins; protected int _timeRowCount; protected int _nonTimeRowCount; protected int _blankRowCount; protected int _errorRowCount; protected boolean _hasError = false; protected boolean _hasNonTime = false; protected boolean _hasTime = false; protected boolean _hasBlank = false; protected long[] steps = { 1, // msec 1000, // sec 1000*60, // min 1000*60*60, // hour 1000*60*60*24, // day 1000*60*60*24*7, // week 1000l*2629746l, // month (average Gregorian year / 12) 1000l*31556952l, // year (average Gregorian year) 1000l*31556952l*10l, // decade 1000l*31556952l*100l, // century 1000l*31556952l*1000l, // millennium }; abstract protected void iterate(Project project, RowEvaluable rowEvaluable, List<Long> allValues); public TimeBinIndex(Project project, RowEvaluable rowEvaluable) { _min = Long.MAX_VALUE; _max = Long.MIN_VALUE; List<Long> allValues = new ArrayList<Long>(); iterate(project, rowEvaluable, allValues); _timeValueCount = allValues.size(); if (_min >= _max) { _step = 1; _min = Math.min(_min, _max); _max = _step; _bins = new int[1]; return; } long diff = _max - _min; for (long step : steps) { _step = step; if (diff / _step <= 100) { break; } } _bins = new int[(int) (diff / _step) + 1]; for (long d : allValues) { int bin = (int) Math.max((d - _min) / _step,0); _bins[bin]++; } } public boolean isTemporal() { return _timeValueCount > _totalValueCount / 2; } public long getMin() { return _min; } public long getMax() { return _max; } public long getStep() { return _step; } public int[] getBins() { return _bins; } public int getTimeRowCount() { return _timeRowCount; } public int getNonTimeRowCount() { return _nonTimeRowCount; } public int getBlankRowCount() { return _blankRowCount; } public int getErrorRowCount() { return _errorRowCount; } protected void processRow( Project project, RowEvaluable rowEvaluable, List<Long> allValues, int rowIndex, Row row, Properties bindings ) { Object value = rowEvaluable.eval(project, rowIndex, row, bindings); if (ExpressionUtils.isError(value)) { _hasError = true; } else if (ExpressionUtils.isNonBlankData(value)) { if (value.getClass().isArray()) { Object[] a = (Object[]) value; for (Object v : a) { _totalValueCount++; if (ExpressionUtils.isError(v)) { _hasError = true; } else if (ExpressionUtils.isNonBlankData(v)) { if (v instanceof Calendar) { v = ((Calendar) v).getTime(); } if (v instanceof Date) { _hasTime = true; processValue(((Date) v).getTime(), allValues); } else { _hasNonTime = true; } } else { _hasBlank = true; } } } else if (value instanceof Collection<?>) { for (Object v : ExpressionUtils.toObjectCollection(value)) { _totalValueCount++; if (ExpressionUtils.isError(v)) { _hasError = true; } else if (ExpressionUtils.isNonBlankData(v)) { if (v instanceof Calendar) { v = ((Calendar) v).getTime(); } if (v instanceof Date) { _hasTime = true; processValue(((Date) v).getTime(), allValues); } else { _hasNonTime = true; } } else { _hasBlank = true; } } } else { _totalValueCount++; if (value instanceof Calendar) { value = ((Calendar) value).getTime(); } if (value instanceof Date) { _hasTime = true; processValue(((Date) value).getTime(), allValues); } else { _hasNonTime = true; } } } else { _hasBlank = true; } } protected void preprocessing() { _hasBlank = false; _hasError = false; _hasNonTime = false; _hasTime = false; } protected void postprocessing() { if (_hasError) { _errorRowCount++; } if (_hasBlank) { _blankRowCount++; } if (_hasTime) { _timeRowCount++; } if (_hasNonTime) { _nonTimeRowCount++; } } protected void processValue(long v, List<Long> allValues) { _min = Math.min(_min, v); _max = Math.max(_max, v); allValues.add(v); } }