/* * DateGuesser.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.app.beauti.options; import dr.evolution.util.Date; import dr.evolution.util.Taxon; import dr.evolution.util.TaxonList; import dr.evolution.util.Units; import java.io.*; import java.text.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author Andrew Rambaut * @author Tommy Lam */ public class DateGuesser implements Serializable { private static final long serialVersionUID = -9106689400887615213L; public enum GuessType { ORDER, PREFIX, REGEX } public boolean guessDates = false; public GuessType guessType = GuessType.ORDER; public boolean fromLast = false; public int order = 0; public String prefix; public String regex; public File loadFile; public HashMap<String, String> load; public double offset = 0.0; public double unlessLessThan = 0.0; public double offset2 = 0.0; public boolean parseCalendarDates = false; public boolean parseCalendarDatesAndPrecision = false; public String calendarDateFormat = "yyyy-MM-dd"; private DateFormat dateFormat; public void guessDates(TaxonList taxonList) { // To avoid duplicating code, add all the taxa into a list and // pass it to guessDates(List<Taxon> taxonList) List<Taxon> taxa = new ArrayList<Taxon>(); for (Taxon taxon : taxonList) { taxa.add(taxon); } guessDates(taxa); } public void guessDates(TaxonList taxonList, Map<Taxon, String> taxonDateMap) { // To avoid duplicating code, add all the taxa into a list and // pass it to guessDates(List<Taxon> taxonList) List<Taxon> taxa = new ArrayList<Taxon>(); for (Taxon taxon : taxonList) { taxa.add(taxon); } guessDates(taxa, taxonDateMap); } public void guessDates(List<Taxon> taxonList) { guessDates(taxonList, null); } public void guessDates(List<Taxon> taxonList, Map<Taxon, String> taxonDateMap) { dateFormat = new SimpleDateFormat(calendarDateFormat); dateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); for (int i = 0; i < taxonList.size(); i++) { Taxon taxon = taxonList.get(i); // Allocates a Date object and initializes it to represent the specified number of milliseconds since the // standard base time known as "the epoch", namely January 1, 1970, 00:00:00 GMT java.util.Date origin = new java.util.Date(0); double[] values = new double[2]; try { if (taxonDateMap != null) { String dateString = taxonDateMap.get(taxon); parseDate(taxon.getId(), dateString, values); } else { switch (guessType) { case ORDER: guessDateFromOrder(taxonList.get(i).getId(), order, fromLast, values); break; case PREFIX: guessDateFromPrefix(taxonList.get(i).getId(), prefix, order, fromLast, values); break; case REGEX: guessDateFromRegex(taxonList.get(i).getId(), regex, values); break; default: throw new IllegalArgumentException("unknown GuessType"); } } } catch (GuessDatesException gfe) { // @todo catch errors and give to user } double d = values[0]; if (!parseCalendarDates && !parseCalendarDatesAndPrecision) { if (offset > 0) { if (unlessLessThan > 0) { if (d < unlessLessThan) { d += offset2; } else { d += offset; } } else { d += offset; } } } // @todo if any taxa aren't set then return warning Date date = Date.createTimeSinceOrigin(d, Units.Type.YEARS, origin); date.setPrecision(values[1]); taxon.setAttribute("date", date); } } public Date parseDate(String value) throws GuessDatesException { double[] values = new double[2]; parseDate("", value, values); // Allocates a Date object and initializes it to represent the specified number of milliseconds since the // standard base time known as "the epoch", namely January 1, 1970, 00:00:00 GMT java.util.Date origin = new java.util.Date(0); return Date.createTimeSinceOrigin(values[0], Units.Type.YEARS, origin); } private void guessDateFromOrder(String label, int order, boolean fromLast, double[] values) throws GuessDatesException { String field; if (fromLast) { int count = 0; int i = label.length() - 1; char c = label.charAt(i); do { // first find a part of a number while (!Character.isDigit(c) && c != '.') { i--; if (i < 0) break; c = label.charAt(i); } if (i < 0) throw new GuessDatesException("Missing number field in taxon label, " + label); int j = i + 1; // now find the beginning of the number while (Character.isDigit(c) || c == '.') { i--; if (i < 0) break; c = label.charAt(i); } field = label.substring(i + 1, j); count++; } while (count <= order); } else { int count = 0; int i = 0; char c = label.charAt(i); do { // first find a part of a number while (!Character.isDigit(c)) { i++; if (i == label.length()) break; c = label.charAt(i); } int j = i; if (i == label.length()) throw new GuessDatesException("Missing number field in taxon label, " + label); // now find the beginning of the number while (Character.isDigit(c) || c == '.') { i++; if (i == label.length()) break; c = label.charAt(i); } field = label.substring(j, i); count++; } while (count <= order); } parseDate(label, field, values); } private static final String REGEX_CHARACTERS = "|[].*()-^$"; private void guessDateFromPrefix(String label, String prefix, int order, boolean fromLast, double[] values) throws GuessDatesException { StringBuilder sb = new StringBuilder(); for (int i = 0; i < prefix.length(); i++) { if (REGEX_CHARACTERS.contains("" + prefix.charAt(i))) { sb.append("\\").append(prefix.charAt(i)); } else { sb.append(prefix.charAt(i)); } } String[] fields = label.split(sb.toString()); int index; if (fromLast) { index = fields.length - order - 1; } else { index = order; } if (index < 0) { index = 0; } if (index >= fields.length) { index = fields.length - 1; } parseDate(label, fields[index], values); } private void guessDateFromPrefix(String label, String prefix, double[] values) throws GuessDatesException { int i = label.indexOf(prefix); if (i == -1) throw new GuessDatesException("Missing prefix in taxon label, " + label); i += prefix.length(); int j = i; // now find the beginning of the number char c = label.charAt(i); while (i < label.length() - 1 && (Character.isDigit(c) || c == '.')) { i++; c = label.charAt(i); } if (i == j) throw new GuessDatesException("Missing field after prefix in taxon label, " + label); String field = label.substring(j, i + 1); parseDate(label, field, values); } private void guessDateFromRegex(String label, String regex, double[] values) throws GuessDatesException { if (!regex.contains("(")) { // if user hasn't specified a replace element, assume the whole regex should match regex = "(" + regex + ")"; } Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(label); if (!matcher.find()) { throw new GuessDatesException("Regular expression doesn't find a match in taxon label, " + label); } if (matcher.groupCount() < 1) { throw new GuessDatesException("Date group not defined in regular expression"); } parseDate(label, matcher.group(0), values); } private void parseDateFromValue(String label, HashMap<String, String> myload, double[] values) throws GuessDatesException { String dateStr = ""; if (myload.containsKey(label)) { dateStr = (String)(myload.get(label)); } else { throw new GuessDatesException("The imported table doesn't contain the taxon label, " + label); } parseDate(label, dateStr, values); } private DateFormat dateFormat1 = null; private DateFormat dateFormat2 = null; private DateFormat dateFormat3 = null; private void parseDate(String label, String value, double[] values) throws GuessDatesException { double d; double p = 0.0; if (dateFormat1 == null) { // set the timezones to GMT so they match the origin date... dateFormat1 = new SimpleDateFormat("yyyy-MM-dd"); dateFormat1.setTimeZone(TimeZone.getTimeZone("GMT")); dateFormat2 = new SimpleDateFormat("yyyy-MM"); dateFormat2.setTimeZone(TimeZone.getTimeZone("GMT")); dateFormat3 = new SimpleDateFormat("yyyy"); dateFormat3.setTimeZone(TimeZone.getTimeZone("GMT")); } if (parseCalendarDatesAndPrecision) { try { Date date = new Date(dateFormat1.parse(value)); d = date.getTimeValue(); p = 0.0; } catch (ParseException pe) { try { Date date = new Date(dateFormat2.parse(value)); d = date.getTimeValue(); p = 1.0 / 12.0; } catch (ParseException pe2) { try { Date date = new Date(dateFormat3.parse(value)); d = date.getTimeValue(); p = 1.0; } catch (ParseException pe3) { throw new GuessDatesException("Badly formatted date for taxon, " + label); } } } } else if (parseCalendarDates) { try { Date date = new Date(dateFormat.parse(value)); d = date.getTimeValue(); } catch (ParseException pe) { throw new GuessDatesException("Badly formatted date for taxon, " + label); } } else { try { d = Double.parseDouble(value); } catch (NumberFormatException nfe) { throw new GuessDatesException("Badly formatted date for taxon, " + label); } } values[0] = d; values[1] = p; } }