package mil.nga.giat.geowave.core.geotime.index.dimension; import java.nio.ByteBuffer; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import java.util.TimeZone; import mil.nga.giat.geowave.core.index.FloatCompareUtils; import mil.nga.giat.geowave.core.index.StringUtils; import mil.nga.giat.geowave.core.index.dimension.bin.BinRange; import mil.nga.giat.geowave.core.index.dimension.bin.BinValue; import mil.nga.giat.geowave.core.index.dimension.bin.BinningStrategy; import mil.nga.giat.geowave.core.index.sfc.data.NumericData; import mil.nga.giat.geowave.core.index.sfc.data.NumericRange; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; /** * This class is useful for establishing a consistent binning strategy using a * unit of time. Each bin will then be defined by the boundaries of that unit * within the timezone given in the constructor. So if the unit is year and the * data spreads across 2011-2013, the bins will be 2011, 2012, and 2013. The * unit chosen should represent a much more significant range than the average * query range (at least 20x larger) for efficiency purposes. So if the average * query is for a 24 hour period, the unit should not be a day, but could be * perhaps a month or a year (depending on the temporal extent of the dataset). */ public class TemporalBinningStrategy implements BinningStrategy { public static enum Unit { MINUTE( Calendar.MINUTE), HOUR( Calendar.HOUR_OF_DAY), DAY( Calendar.DAY_OF_MONTH), WEEK( Calendar.WEEK_OF_YEAR), MONTH( Calendar.MONTH), YEAR( Calendar.YEAR), DECADE( -1); // java.util.Calendar does not define a field number for decade // use -1 since that value is unused private final int calendarEnum; private Unit( final int calendarEnum ) { this.calendarEnum = calendarEnum; } public int toCalendarEnum() { return calendarEnum; } public static Unit getUnit( final int calendarEnum ) { for (final Unit u : values()) { if (u.calendarEnum == calendarEnum) { return u; } } throw new IllegalArgumentException( "Calendar enum '" + calendarEnum + "' not found as a valid unit "); } // converter that will be used later public static Unit fromString( final String code ) { for (final Unit output : Unit.values()) { if (output.toString().equalsIgnoreCase( code)) { return output; } } return null; } } protected static final long MILLIS_PER_DAY = 86400000L; private static final NumberFormat TWO_DIGIT_NUMBER = NumberFormat.getIntegerInstance(); { TWO_DIGIT_NUMBER.setMinimumIntegerDigits(2); TWO_DIGIT_NUMBER.setMaximumIntegerDigits(2); } private Unit unit; private String timezone; protected TemporalBinningStrategy() {} public TemporalBinningStrategy( final Unit unit ) { this( unit, "GMT"); } public TemporalBinningStrategy( final Unit unit, final String timezone ) { this.unit = unit; this.timezone = timezone; } @Override public double getBinMin() { return 0; } @Override public double getBinMax() { return getBinSizeMillis() - 1; } /** * Method used to bin a raw date in milliseconds to a binned value of the * Binning Strategy. */ @Override public BinValue getBinnedValue( final double value ) { // convert to a calendar and subtract the epoch for the bin final Calendar epochCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); epochCal.setTimeInMillis((long) value); setToEpoch(epochCal); // use the value to get the bin ID (although the epoch should work fine // too) final Calendar valueCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); valueCal.setTimeInMillis((long) value); return new BinValue( getBinId(valueCal), valueCal.getTimeInMillis() - epochCal.getTimeInMillis()); } private long getBinSizeMillis() { long binSizeMillis = MILLIS_PER_DAY; // use the max possible value for that unit as the bin size switch (unit) { case DECADE: binSizeMillis *= 3653; break; case YEAR: default: binSizeMillis *= 366; break; case MONTH: binSizeMillis *= 31; break; case WEEK: binSizeMillis *= 7; break; case DAY: break; case HOUR: binSizeMillis /= 24; break; case MINUTE: binSizeMillis /= 1440; break; } return binSizeMillis; } @SuppressFBWarnings(value = { "SF_SWITCH_FALLTHROUGH", "SF_SWITCH_NO_DEFAULT" }, justification = "Fallthrough intentional for time parsing; default case is provided") protected void setToEpoch( final Calendar value ) { // reset appropriate values to 0 based on the unit switch (unit) { case DECADE: value.set( Calendar.YEAR, ((value.get(Calendar.YEAR) / 10) * 10)); // don't break so that the other fields are also set to the // minimum case YEAR: default: value.set( Calendar.MONTH, value.getActualMinimum(Calendar.MONTH)); // don't break so that the other fields are also set to the // minimum case MONTH: value.set( Calendar.DAY_OF_MONTH, value.getActualMinimum(Calendar.DAY_OF_MONTH)); // don't break so that the other fields are also set to the // minimum case DAY: value.set( Calendar.HOUR_OF_DAY, value.getActualMinimum(Calendar.HOUR_OF_DAY)); // don't break so that the other fields are also set to the // minimum case HOUR: value.set( Calendar.MINUTE, value.getActualMinimum(Calendar.MINUTE)); // don't break so that the other fields are also set to the // minimum case MINUTE: value.set( Calendar.SECOND, value.getActualMinimum(Calendar.SECOND)); value.set( Calendar.MILLISECOND, value.getActualMinimum(Calendar.MILLISECOND)); break; // special handling for week case WEEK: value.set( Calendar.DAY_OF_WEEK, value.getActualMinimum(Calendar.DAY_OF_WEEK)); value.set( Calendar.HOUR_OF_DAY, value.getActualMinimum(Calendar.HOUR_OF_DAY)); value.set( Calendar.MINUTE, value.getActualMinimum(Calendar.MINUTE)); value.set( Calendar.SECOND, value.getActualMinimum(Calendar.SECOND)); value.set( Calendar.MILLISECOND, value.getActualMinimum(Calendar.MILLISECOND)); } } @Override public int getFixedBinIdSize() { switch (unit) { case YEAR: default: return 4; case MONTH: return 7; case WEEK: return 7; case DAY: return 10; case HOUR: return 13; case MINUTE: return 16; } } private byte[] getBinId( final Calendar value ) { // this is assuming we want human-readable bin ID's but alternatively we // could consider returning a more compressed representation switch (unit) { case YEAR: default: return StringUtils.stringToBinary(Integer.toString(value.get(Calendar.YEAR))); case MONTH: return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER .format(value.get(Calendar.MONTH)))); case WEEK: return StringUtils.stringToBinary(Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.WEEK_OF_YEAR))); case DAY: return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + TWO_DIGIT_NUMBER.format(value .get(Calendar.DAY_OF_MONTH)))); case HOUR: return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.DAY_OF_MONTH)) + "_" + TWO_DIGIT_NUMBER .format(value.get(Calendar.HOUR_OF_DAY)))); case MINUTE: return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.DAY_OF_MONTH)) + "_" + TWO_DIGIT_NUMBER.format(value.get(Calendar.HOUR_OF_DAY)) + "_" + TWO_DIGIT_NUMBER .format(value.get(Calendar.MINUTE)))); } } @SuppressFBWarnings(value = { "SF_SWITCH_FALLTHROUGH", "SF_SWITCH_NO_DEFAULT" }, justification = "Fallthrough intentional for time parsing") private Calendar getStartEpoch( final byte[] binId ) { final String str = StringUtils.stringFromBinary(binId); final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); switch (unit) { case MINUTE: final int minute = Integer.parseInt(str.substring( 14, 16)); cal.set( Calendar.MINUTE, minute); case HOUR: final int hour = Integer.parseInt(str.substring( 11, 13)); cal.set( Calendar.HOUR_OF_DAY, hour); case DAY: final int day = Integer.parseInt(str.substring( 8, 10)); cal.set( Calendar.DAY_OF_MONTH, day); case MONTH: final int month = Integer.parseInt(str.substring( 5, 7)); cal.set( Calendar.MONTH, month); case YEAR: default: final int year = Integer.parseInt(str.substring( 0, 4)); cal.set( Calendar.YEAR, year); break; // do not automatically fall-through to decade parsing case DECADE: int decade = Integer.parseInt(str.substring( 0, 4)); decade = (decade / 10) * 10; // int division will truncate ones cal.set( Calendar.YEAR, decade); break; // special handling for week case WEEK: final int yr = Integer.parseInt(str.substring( 0, 4)); cal.set( Calendar.YEAR, yr); final int weekOfYear = Integer.parseInt(str.substring( 5, 7)); cal.set( Calendar.WEEK_OF_YEAR, weekOfYear); break; } setToEpoch(cal); return cal; } @Override public BinRange[] getNormalizedRanges( final NumericData range ) { if (range.getMax() < range.getMin()) { return new BinRange[] {}; } final Calendar startEpoch = Calendar.getInstance(TimeZone.getTimeZone(timezone)); final long binSizeMillis = getBinSizeMillis(); // initialize the epoch to the range min and then reset appropriate // values to 0 based on the units startEpoch.setTimeInMillis((long) range.getMin()); setToEpoch(startEpoch); // now make sure all bin definitions between the start and end bins // are covered final long startEpochMillis = startEpoch.getTimeInMillis(); long epochIterator = startEpochMillis; final List<BinRange> bins = new ArrayList<BinRange>(); // track this, so that we can easily declare a range to be the full // extent and use the information to perform a more efficient scan boolean firstBin = ((long) range.getMin() != startEpochMillis); boolean lastBin = false; do { // because not every year has 366 days, and not every month has 31 // days we need to reset next epoch to the actual epoch final Calendar nextEpochCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); // set it to a value in the middle of the bin just to be sure (for // example if the bin size does not get to the next epoch as is // the case when units are days and the timezone accounts for // daylight savings time) nextEpochCal.setTimeInMillis(epochIterator + (long) (binSizeMillis * 1.5)); setToEpoch(nextEpochCal); final long nextEpoch = nextEpochCal.getTimeInMillis(); final long maxOfBin = nextEpoch - 1; final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); cal.setTimeInMillis(epochIterator); long startMillis, endMillis; boolean fullExtent; if ((long) range.getMax() <= maxOfBin) { lastBin = true; endMillis = (long) range.getMax(); // its questionable whether we use fullExtent = FloatCompareUtils.checkDoublesEqual( range.getMax(), maxOfBin); } else { endMillis = maxOfBin; fullExtent = !firstBin; } if (firstBin) { startMillis = (long) range.getMin(); firstBin = false; } else { startMillis = epochIterator; } // we have the millis for range, but to normalize for this bin we // need to subtract the epoch of the bin bins.add(new BinRange( getBinId(cal), startMillis - epochIterator, endMillis - epochIterator, fullExtent)); epochIterator = nextEpoch; // iterate until we reach our end epoch } while (!lastBin); return bins.toArray(new BinRange[bins.size()]); } @Override public byte[] toBinary() { final byte[] timeZone = StringUtils.stringToBinary(timezone); final ByteBuffer binary = ByteBuffer.allocate(timezone.length() + 4); binary.putInt(unit.calendarEnum); binary.put(timeZone); return binary.array(); } @Override public void fromBinary( final byte[] bytes ) { final ByteBuffer buffer = ByteBuffer.wrap(bytes); final int unitCalendarEnum = buffer.getInt(); final byte[] timeZoneName = new byte[bytes.length - 4]; buffer.get(timeZoneName); unit = Unit.getUnit(unitCalendarEnum); timezone = StringUtils.stringFromBinary(timeZoneName); } @Override public int hashCode() { final int prime = 31; int result = 1; final String className = getClass().getName(); result = (prime * result) + ((className == null) ? 0 : className.hashCode()); result = (prime * result) + ((timezone == null) ? 0 : timezone.hashCode()); result = (prime * result) + ((unit == null) ? 0 : unit.calendarEnum); return result; } @Override public boolean equals( final Object obj ) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final TemporalBinningStrategy other = (TemporalBinningStrategy) obj; if (timezone == null) { if (other.timezone != null) { return false; } } else if (!timezone.equals(other.timezone)) { return false; } if (unit == null) { if (other.unit != null) { return false; } } else if (unit.calendarEnum != other.unit.calendarEnum) { return false; } return true; } @Override public NumericRange getDenormalizedRanges( final BinRange binnedRange ) { final Calendar startofEpoch = getStartEpoch(binnedRange.getBinId()); final long startOfEpochMillis = startofEpoch.getTimeInMillis(); final long minMillis = startOfEpochMillis + (long) binnedRange.getNormalizedMin(); final long maxMillis = startOfEpochMillis + (long) binnedRange.getNormalizedMax(); return new NumericRange( minMillis, maxMillis); } }