/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.util.partition; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.TimeZone; import java.util.logging.Logger; import org.archive.wayback.util.partition.size.DayPartitionSize; import org.archive.wayback.util.partition.size.HourPartitionSize; import org.archive.wayback.util.partition.size.MonthPartitionSize; import org.archive.wayback.util.partition.size.TwoMonthPartitionSize; import org.archive.wayback.util.partition.size.TwoYearPartitionSize; import org.archive.wayback.util.partition.size.WeekPartitionSize; import org.archive.wayback.util.partition.size.YearPartitionSize; /** * Class which divides a set of date-related objects into sub-sets by time * ranges. * * This class provides methods for: * * 1) determining the smallest PartitionSize that can be used to cover a time * range, using at most a set number of partitions * 2) creating a List of Partition objects covering a span of time, each having * a specified size * 3) efficiently populating an iterator of date-related objects into List of * Partition objects * * @author brad * * @param <T> generic class type to use with this Partitioner */ public class Partitioner<T> { private static final TimeZone TZ_UTC = TimeZone.getTimeZone("UTC"); private static final Logger LOGGER = Logger.getLogger( Partitioner.class.getName()); private ElementPartitionMap<T> map = null; /** * PartitionSize based on Hour intervals */ public static PartitionSize hourSize = new HourPartitionSize(); /** * PartitionSize based on Day intervals */ public static PartitionSize daySize = new DayPartitionSize(); /** * PartitionSize based on Week intervals */ public static PartitionSize weekSize = new WeekPartitionSize(); /** * PartitionSize based on Month intervals */ public static PartitionSize monthSize = new MonthPartitionSize(); /** * PartitionSize based on Two Month intervals */ public static PartitionSize twoMonthSize = new TwoMonthPartitionSize(); /** * PartitionSize based on Year intervals */ public static PartitionSize yearSize = new YearPartitionSize(); /** * PartitionSize based on Two Year intervals */ public static PartitionSize twoYearSize = new TwoYearPartitionSize(); private static PartitionSize[] sizes = { hourSize, daySize, weekSize, monthSize, twoMonthSize, yearSize, twoYearSize }; /** * @param map that converts from the Generic type used in this instance * to a Date, and adds a Generic type used to a Partition */ public Partitioner(ElementPartitionMap<T> map) { this.map = map; } /** * Get a PartitionSize object by it's name * @param name of the PartitionSize * @return PartitionSize matching the name, or a TwoYearPartionSize if name * is unknown */ public static PartitionSize getSize(String name) { for(PartitionSize pa : sizes) { if(pa.name().equals(name)) { return pa; } } return twoYearSize; } /** * Attempt to find the smallest PartitionSize implementation which, spanning * the range first and last specified, produces at most maxP partitions. * @param first Date of beginning of time range * @param last Date of end of time range * @param maxP maximum number of Partitions to use * @return a PartitionSize object which will divide the range into at most * maxP Partitions */ public PartitionSize getSize(Date first, Date last, int maxP) { long diffMS = last.getTime() - first.getTime(); for(PartitionSize pa : sizes) { long maxMS = maxP * pa.intervalMS(); if(maxMS > diffMS) { return pa; } } return twoYearSize; } private void logDates(String message, Date date1, Date date2) { SimpleDateFormat f = new SimpleDateFormat("H:mm:ss:SSS MMM d, yyyy", Locale.ENGLISH); f.setTimeZone(TZ_UTC); String pd1 = f.format(date1); String pd2 = f.format(date2); LOGGER.info(message + ":" + pd1 + " - " + pd2); } /** * Create a List of Partition objects of the specified size, which span the * date range specified. * * @param size of Partitions to create * @param start Date of beginning of time range to cover * @param end Date of end of time range to cover * @return List of Partitions spanning start and end, sized size, in date- * ascending order. */ public List<Partition<T>> getRange(PartitionSize size, Date start, Date end) { // logDates("Constructing partitions Size(" + size.name() + ")",start,end); // Date origStart = new Date(start.getTime()); List<Partition<T>> partitions = new ArrayList<Partition<T>>(); Calendar cStart = Calendar.getInstance(TZ_UTC); cStart.setTime(start); size.alignStart(cStart); // logDates("AlignedStart("+size.name()+")",origStart,cStart.getTime()); Calendar cEnd = size.increment(cStart, 1); // logDates("AlignedEnd("+size.name()+")",cStart.getTime(),cEnd.getTime()); while(cStart.getTime().compareTo(end) < 0) { partitions.add(new Partition<T>(cStart.getTime(), cEnd.getTime())); cStart = cEnd; cEnd = size.increment(cStart, 1); // logDates("Incremented("+size.name()+")", // cStart.getTime(),cEnd.getTime()); } return partitions; } /** * Add elements from itr into the appropriate partitions. Assumes that * all elements fit in one of the argument Partitions, that the partitions * are in ascending order by time, and that elements returned from the * Iterator are in ascending time order. * * @param partitions to populate with objects * @param itr ascending Iterator of objects to place into the partitions */ public void populate(List<Partition<T>> partitions, Iterator<T> itr) { int idx = 0; int size = partitions.size(); T element = null; while(idx < size) { Partition<T> partition = partitions.get(idx); if(element == null) { if(itr.hasNext()) { element = itr.next(); } else { // all done break; } } // will current result fit in the current partition? while(partition.containsDate(map.elementToDate(element))) { map.addElementToPartition(element, partition); element = null; if(itr.hasNext()) { element = itr.next(); } else { break; } } idx++; } if(itr.hasNext()) { // eew... Likely bad usage. is this an error? LOGGER.warning("Not all elements fit in partitions!"); } } /** * Debugging method * @param partitions to dump */ public void dumpPartitions(List<Partition<T>> partitions) { int i = 0; for(Partition<T> partition : partitions) { i++; logDates("Partition("+i+")", partition.getStart(), partition.getEnd()); } } /* * * SOME UNFINISHED/UNTESTED CODE WHICH MAY BE OF INTEREST IN THE FUTURE * FOLLOWS. NONE IS USED FOR NOW: * */ // /** // * Create a List of Partitions centered at center, extending back in time // * to start, and forward to end. If more than count partitions are required, // * then the edge partitions will be grown until the range is extended to // * start and end, with the edge partitions being non-standard size. // * // * @param center // * @param start // * @param end // * @param count // * @return // */ // public List<Partition<T>> getCentered(PartitionSize size, Date center, // Date start, Date end, int count) { // // List<Partition<T>> partitions = new ArrayList<Partition<T>>(); // Calendar cStart = Calendar.getInstance(TimeZone.getTimeZone("GMT")); // cStart.setTime(center); // size.alignStart(cStart); // Calendar cEnd = size.increment(cStart, 1); // // partitions.add(new Partition<T>(cStart.getTime(),cEnd.getTime())); // // int numSides = (count - 1) / 2; // // first add those backwards: // Partition<T> cur = null; // for(int i=1; i <= numSides; i++) { // cEnd = cStart; // cStart = size.increment(cStart, -1); // Date curStart = cStart.getTime(); // if(i == numSides) { // // first partition, maybe make longer: // if(curStart.after(start)) { // curStart = new Date(start.getTime() - 1000); // } // } // cur = new Partition<T>(curStart, cEnd.getTime()); // partitions.add(0,cur); // } // // // re-align center, and increment: // cStart.setTime(center); // size.alignStart(cStart); // cStart = size.increment(cStart, 1); // cEnd = size.increment(cStart, 1); // // for(int i=1; i <= numSides; i++) { // Date curEnd = cEnd.getTime(); // if(i == numSides) { // // last partition, maybe make longer: // if(curEnd.before(end)) { // // end is exclusive, so make 1 MS more: // curEnd = end; // } // } // cur = new Partition<T>(cStart.getTime(),curEnd); // partitions.add(cur); // cStart = cEnd; // cEnd = size.increment(cStart, 1); // } // return partitions; // } // public List<Partition<T>> partitionRange(Date start, Date end, String name) { // // PartitionSize size = getSize(name); // return getRange(size, start, end); // } // public List<Partition<T>> partitionCentered(Date center, Date start, // Date end, int count, String name) { // // PartitionSize size = getSize(name); // return getCentered(size, center, start, end, count); // } // public List<Partition<T>> partitionRange(Date start, Date end, int max) { // // PartitionSize size = getSize(start, end, max); // return getRange(size, start, end); // } // public List<Partition<T>> partitionCentered(Date center, Date start, // Date end, int count) { // // PartitionSize size = getSize(start,end,count); // return getCentered(size, center, start, end, count); // } }