package net.filebot.similarity; import static java.util.stream.Collectors.*; import static net.filebot.util.FileUtilities.*; import static net.filebot.util.StringUtilities.*; import java.io.File; import java.time.LocalDate; import java.time.Month; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.time.format.TextStyle; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.function.Predicate; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.filebot.web.SimpleDate; import one.util.streamex.StreamEx; public class DateMatcher { public static final DateFilter DEFAULT_SANITY = new DateFilter(1930, 2050); private final DatePattern[] patterns; public DateMatcher(DateFilter sanity, Locale... locale) { // generate default date format patterns String[] format = new String[7]; // match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc format[0] = "y M d"; // match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc format[1] = "d M y"; // match yyyy.MMMMM.dd patterns like 2015.October.05 format[2] = "y MMMM d"; // match yyyy.MMM.dd patterns like 2015.Oct.6 format[3] = "y MMM d"; // match dd.MMMMM.yyyy patterns like 25 July 2014 format[4] = "d MMMM y"; // match dd.MMM.yyyy patterns like 8 Sep 2015 format[5] = "d MMM y"; // match yyyymmdd patterns like 20140408 format[6] = "yyyyMMdd"; this.patterns = compile(format, sanity, locale); } protected DatePattern[] compile(String[] pattern, DateFilter sanity, Locale... locale) { return StreamEx.of(pattern).flatMap(dateFormat -> { return StreamEx.of(locale).distinct(Locale::getLanguage).map(formatLocale -> { String regex = StreamEx.split(dateFormat, DateFormatPattern.DELIMITER).map(g -> getPatternGroup(g, formatLocale)).joining("\\D", "(?<!\\p{Alnum})", "(?!\\p{Alnum})"); return new DateFormatPattern(regex, dateFormat, formatLocale, sanity); }).distinct(DateFormatPattern::toString); }).toArray(DateFormatPattern[]::new); } protected String getPatternGroup(String token, Locale locale) { switch (token) { case "y": return "(\\d{4})"; case "M": return "(\\d{1,2})"; case "d": return "(\\d{1,2})"; case "yyyyMMdd": return "(\\d{8})"; case "MMMM": return getMonthNamePatternGroup(TextStyle.FULL, locale); case "MMM": return getMonthNamePatternGroup(TextStyle.SHORT, locale); default: throw new IllegalArgumentException(token); } } protected String getMonthNamePatternGroup(TextStyle style, Locale locale) { return StreamEx.of(Month.values()).map(m -> m.getDisplayName(style, locale)).map(Pattern::quote).joining("|", "(", ")"); } public SimpleDate match(CharSequence seq) { for (DatePattern pattern : patterns) { SimpleDate match = pattern.match(seq); if (match != null) { return match; } } return null; } public int find(CharSequence seq, int fromIndex) { for (DatePattern pattern : patterns) { int pos = pattern.find(seq, fromIndex); if (pos >= 0) { return pos; } } return -1; } public SimpleDate match(File file) { for (String name : tokenizeTail(file)) { for (DatePattern pattern : patterns) { SimpleDate match = pattern.match(name); if (match != null) { return match; } } } return null; } protected List<String> tokenizeTail(File file) { List<String> tail = new ArrayList<String>(2); for (File f : listPathTail(file, 2, true)) { tail.add(getName(f)); } return tail; } public static interface DatePattern { public SimpleDate match(CharSequence seq); public int find(CharSequence seq, int fromIndex); } public static class DateFormatPattern implements DatePattern { public static final String DELIMITER = " "; public final Pattern pattern; public final DateTimeFormatter format; public final DateFilter sanity; public DateFormatPattern(String pattern, String format, Locale locale, DateFilter sanity) { this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); this.format = DateTimeFormatter.ofPattern(format, locale); this.sanity = sanity; } protected SimpleDate process(MatchResult match) { try { String dateString = streamCapturingGroups(match).collect(joining(DELIMITER)); LocalDate date = LocalDate.parse(dateString, format); if (sanity == null || sanity.test(date)) { return new SimpleDate(date.getYear(), date.getMonthValue(), date.getDayOfMonth()); } } catch (DateTimeParseException e) { // date is invalid } return null; } @Override public SimpleDate match(CharSequence seq) { Matcher matcher = pattern.matcher(seq); if (matcher.find()) { return process(matcher); } return null; } @Override public int find(CharSequence seq, int fromIndex) { Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length()); if (matcher.find()) { if (process(matcher) != null) { return matcher.start(); } } return -1; } @Override public String toString() { return pattern.pattern(); } } public static class DateFilter implements Predicate<LocalDate> { public final LocalDate min; public final LocalDate max; private final int minYear; private final int maxYear; public DateFilter(LocalDate min, LocalDate max) { this.min = min; this.max = max; this.minYear = min.getYear(); this.maxYear = max.getYear(); } public DateFilter(int minYear, int maxYear) { this.min = LocalDate.of(minYear, Month.JANUARY, 1); this.max = LocalDate.of(maxYear, Month.JANUARY, 1); this.minYear = minYear; this.maxYear = maxYear; } @Override public boolean test(LocalDate date) { return date.isAfter(min) && date.isBefore(max); } public boolean acceptYear(int year) { return minYear <= year && year <= maxYear; } public boolean acceptDate(int year, int month, int day) { return acceptYear(year) && test(LocalDate.of(year, month, day)); } } }