package org.araqne.log.api; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import java.util.regex.Matcher; import java.util.regex.Pattern; public class DefaultDateParser implements DateParser { private SimpleDateFormat dateFormat; private Pattern p; private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DefaultDateParser.class); public DefaultDateParser(String simpleDateFormat) { this.dateFormat = new SimpleDateFormat(simpleDateFormat); this.p = Pattern.compile(dateFormatToRegex(simpleDateFormat)); } public DefaultDateParser(SimpleDateFormat dateFormat, String dateExtractor) { this.dateFormat = dateFormat; this.p = Pattern.compile(dateExtractor); } @Override public Date parse(String line) { Matcher m = p.matcher(line); if (!m.find() || m.groupCount() == 0) { logger.trace("araqne log api: cannot find date extractor pattern in line: {}", line); return null; } do { for (int group = 1; group <= m.groupCount(); group++) { try { String dateString = m.group(group); Date date = dateFormat.parse(dateString); Calendar c = Calendar.getInstance(); int currentYear = c.get(Calendar.YEAR); c.setTime(date); int year = c.get(Calendar.YEAR); if (year == 1970) c.set(Calendar.YEAR, currentYear); return c.getTime(); } catch (ParseException e) { } } } while (m.find()); logger.error("araqne log api: cannot find date in line: " + line); return null; } public static String dateFormatToRegex(String pattern) { StringBuilder regex = new StringBuilder(); boolean isInQuote = false; int l = pattern.length(); regex.append("("); for (int i = 0; i < l; i++) { if (i + 1 < l && pattern.charAt(i) == '\'') { if (pattern.charAt(i + 1) == '\'') { regex.append("'"); i++; } else { if (isInQuote) { if (pattern.charAt(i) == '\'') { isInQuote = false; continue; } regex.append(pattern.charAt(i)); continue; } else isInQuote = true; } continue; } int r = 1; while (i + 1 < l && pattern.charAt(i) == pattern.charAt(i + 1)) { r++; i++; continue; } switch (pattern.charAt(i)) { case 'G': regex.append("(AD|BC)"); break; case 'W': case 'F': regex.append("\\d" + repeat(1, r)); break; case 'E': if (r <= 3) regex.append(".{3}"); else regex.append("\\p{Upper}\\p{Lower}+day"); break; case 'a': regex.append("(AM|PM)"); break; case 'M': if (r > 3) { regex.append("(?i)(January|February|March|April|May|June|July|August|September|" + "October|November|December|Undecimber)"); break; } else if (r == 3) { regex.append(".{3}"); break; } case 'w': case 'd': case 'H': case 'k': case 'K': case 'h': case 'm': case 's': regex.append("\\d" + repeat(Math.max(1, r), Math.max(2, r))); break; case 'D': regex.append("\\d" + repeat(Math.max(1, r), Math.max(3, r))); break; case 'y': regex.append("\\d" + repeat(Math.max(1, r), Math.max(2, r))); break; case 'S': regex.append("\\d" + repeat(Math.max(1, r), Math.max(3, r))); break; case 'Z': regex.append("[+-]\\d" + repeat(4)); break; case '(': case ')': case '{': case '}': regex.append("\\"); default: regex.append(pattern.charAt(i)); if (r > 1) regex.append(repeat(r)); } } regex.append(")"); return regex.toString(); } private static String repeat(int num) { return "{" + num + "}"; } private static String repeat(int min, int max) { if (min == max) return repeat(min); return "{" + min + "," + max + "}"; } }