/* * Copyright 2013 Eediom Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.araqne.log.api; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.text.ParsePosition; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.TimeZone; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.araqne.api.SystemProperty; /** * @since 2.4.6 * @author xeraph * */ public class MultilineLogExtractor { private final org.slf4j.Logger slog = org.slf4j.LoggerFactory.getLogger(MultilineLogExtractor.class); private static boolean collectEmptyLine; private Logger logger; private String charset = "utf-8"; private Matcher beginMatcher; private Matcher endMatcher; private Matcher dateMatcher; private SimpleDateFormat dateFormat; private boolean eofFlush = false;; private LogPipe pipe; // XXX private boolean isUTF16LE = false; // assign current year to date private Calendar yearModifier; static { collectEmptyLine = SystemProperty.isEnabled("araqne.logapi.collect_empty_line"); } public static MultilineLogExtractor build(Logger logger, LogPipe receiver) { MultilineLogExtractor extractor = new MultilineLogExtractor(logger, receiver); Map<String, String> configs = logger.getConfigs(); // optional String datePatternRegex = configs.get("date_pattern"); if (datePatternRegex != null) { extractor.setDateMatcher(Pattern.compile(datePatternRegex).matcher("")); } // optional String dateLocale = configs.get("date_locale"); if (dateLocale == null) dateLocale = "en"; // optional String dateFormatString = configs.get("date_format"); String timeZone = configs.get("timezone"); if (dateFormatString != null) extractor.setDateFormat(new SimpleDateFormat(dateFormatString, new Locale(dateLocale)), timeZone); // optional String beginRegex = configs.get("begin_regex"); if (beginRegex != null) extractor.setBeginMatcher(Pattern.compile(beginRegex).matcher("")); String endRegex = configs.get("end_regex"); if (endRegex != null) extractor.setEndMatcher(Pattern.compile(endRegex).matcher("")); // optional String charset = configs.get("charset"); if (charset == null) charset = "utf-8"; extractor.setCharset(charset); String newlogRegex = configs.get("newlog_designator"); if (newlogRegex != null) extractor.setBeginMatcher(Pattern.compile(newlogRegex).matcher("")); String newlogEndRegex = configs.get("newlog_end_designator"); if (newlogEndRegex != null) extractor.setEndMatcher(Pattern.compile(newlogEndRegex).matcher("")); String eofFlush = configs.get("eof_flush"); if (eofFlush != null) extractor.setEofFlush(Boolean.parseBoolean(eofFlush)); return extractor; } public MultilineLogExtractor(Logger logger, LogPipe pipe) { this.logger = logger; this.pipe = pipe; } public String getCharset() { return charset; } public void setCharset(String charset) { this.charset = charset; isUTF16LE = charset.equalsIgnoreCase("UTF-16LE"); } public Matcher getBeginMatcher() { return beginMatcher; } public void setBeginMatcher(Matcher beginMatcher) { this.beginMatcher = beginMatcher; } public Matcher getEndMatcher() { return endMatcher; } public void setEndMatcher(Matcher endMatcher) { this.endMatcher = endMatcher; } public Matcher getDateMatcher() { return dateMatcher; } public void setDateMatcher(Matcher dateMatcher) { this.dateMatcher = dateMatcher; } public SimpleDateFormat getDateFormat() { return dateFormat; } public void setDateFormat(SimpleDateFormat dateFormat) { setDateFormat(dateFormat, null); } public void setDateFormat(SimpleDateFormat dateFormat, String timeZone) { this.dateFormat = dateFormat; if (timeZone != null) { if (TimeZoneMappings.getTimeZone(timeZone) != null) timeZone = (String) TimeZoneMappings.getTimeZone(timeZone); dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone)); } if (dateFormat != null && !dateFormat.toPattern().contains("yyyy")) { yearModifier = Calendar.getInstance(); if (timeZone != null) yearModifier.setTimeZone(TimeZone.getTimeZone(timeZone)); } } public boolean isEofFlush() { return eofFlush; } public void setEofFlush(boolean eofFlush) { this.eofFlush = eofFlush; } public void extract(InputStream is, AtomicLong lastPosition) throws IOException { extract(is, lastPosition, null); } public void extract(InputStream is, AtomicLong lastPosition, String dateFromFileName) throws IOException { ByteArrayOutputStream logBuf = new ByteArrayOutputStream(); // last chunk of page which does not contains new line ByteArrayOutputStream temp = new ByteArrayOutputStream(); byte[] b = new byte[512 * 1024]; while (true) { if (logger != null) { LoggerStatus status = logger.getStatus(); if (status == LoggerStatus.Stopping || status == LoggerStatus.Stopped) break; } int next = 0; int len = is.read(b); if (len < 0) break; ArrayList<Log> output = new ArrayList<Log>(4000); for (int i = 0; i < len; i++) { if (b[i] == 0xa) { if (isUTF16LE) i += 1; buildLogOutput(logBuf, b, next, i - next + 1, lastPosition, temp, dateFromFileName, output); next = i + 1; } } if (output.size() > 0) pipe.onLogBatch(logger, output.toArray(new Log[0])); // temp should be matched later (line regex test) temp.write(b, next, len - next); } if (eofFlush && temp.size() > 0) { String line = new String(temp.toByteArray(), charset); Map<String, Object> m = new HashMap<String, Object>(); m.put("line", line); SimpleLog log = new SimpleLog(parseDate(line, dateFromFileName), logger == null ? null : logger.getFullName(), m); pipe.onLog(logger, log); } } private void buildLogOutput(ByteArrayOutputStream logBuf, byte[] b, int offset, int length, AtomicLong lastPosition, ByteArrayOutputStream temp, String dateFromFileName, List<Log> output) { String log = null; long dataLength = 0; try { long before = lastPosition.get(); log = buildLog(logBuf, b, offset, length, lastPosition, temp); dataLength = lastPosition.get() - before; } catch (UnsupportedEncodingException e) { } if (log != null) { int l = log.length(); boolean cr = false; if (l >= 2) cr = log.charAt(l - 2) == '\r'; boolean lf = log.charAt(l - 1) == '\n'; if (cr && lf) log = log.substring(0, l - 2); else if (lf) log = log.substring(0, l - 1); if (log.length() > 0) { Date d = parseDate(log, dateFromFileName); Map<String, Object> m = new HashMap<String, Object>(); m.put("line", log); SimpleLog simpleLog = new SimpleLog(d, logger == null ? null : logger.getFullName(), m); simpleLog.setDataLength(dataLength); output.add(simpleLog); } else if (collectEmptyLine) { Date d = parseDate("", dateFromFileName); Map<String, Object> m = new HashMap<String, Object>(); m.put("line", ""); SimpleLog simpleLog = new SimpleLog(d, logger == null ? null : logger.getFullName(), m); simpleLog.setDataLength(0); output.add(simpleLog); } } } /** * @param buf * the buffer which hold partial multiline log * @param b * read block which contains new line * @param offset * the new line offset * @param len * the new line length * @param lastPosition * the last position which read and written as log * @return new (multiline) log * @throws UnsupportedEncodingException */ private String buildLog(ByteArrayOutputStream buf, byte[] b, int offset, int len, AtomicLong lastPosition, ByteArrayOutputStream temp) throws UnsupportedEncodingException { String line = null; if (temp.size() > 0) { temp.write(b, offset, len); line = new String(temp.toByteArray(), charset); } else { line = new String(b, offset, len, charset); } if (!line.endsWith("\n")) { if (temp.size() == 0) temp.write(b, offset, len); return null; } if (beginMatcher != null) beginMatcher.reset(line); if (endMatcher != null) endMatcher.reset(line); if (beginMatcher == null && endMatcher == null) { if (temp.size() > 0) { byte[] t = temp.toByteArray(); buf.write(t, 0, t.length); temp.reset(); } else { buf.write(b, offset, len); } byte[] old = buf.toByteArray(); buf.reset(); lastPosition.addAndGet(old.length); return new String(old, charset); } if (beginMatcher != null && beginMatcher.find()) { byte[] old = buf.toByteArray(); String log = null; if (old.length > 0) { log = new String(old, charset); lastPosition.addAndGet(old.length); buf.reset(); } if (temp.size() > 0) { byte[] t = temp.toByteArray(); buf.write(t, 0, t.length); temp.reset(); } else { buf.write(b, offset, len); } return log; } else if (endMatcher != null && endMatcher.find()) { if (temp.size() > 0) { byte[] t = temp.toByteArray(); buf.write(t, 0, t.length); temp.reset(); } else { buf.write(b, offset, len); } byte[] old = buf.toByteArray(); lastPosition.addAndGet(old.length); String log = new String(old, charset); buf.reset(); return log; } else { if (temp.size() > 0) { byte[] t = temp.toByteArray(); buf.write(t, 0, t.length); temp.reset(); } else { buf.write(b, offset, len); } } return null; } protected Date parseDate(String line, String dateFromFileName) { if (dateFormat == null) return new Date(); String s = dateFromFileName; if (dateMatcher != null) { dateMatcher.reset(line); if (dateMatcher.find()) { int count = dateMatcher.groupCount(); for (int i = 1; i <= count; i++) { if (s == null) s = dateMatcher.group(i); else s += dateMatcher.group(i); } } } if (s == null) return new Date(); Date d = null; try { d = dateFormat.parse(s, new ParsePosition(0)); if (d == null) return new Date(); } catch (NumberFormatException e) { slog.debug("araqne log api: cannot parse date [{}] line [{}]", s, line); return new Date(); } if (yearModifier != null) { int year = Calendar.getInstance().get(Calendar.YEAR); yearModifier.setTime(d); yearModifier.set(Calendar.YEAR, year); d = yearModifier.getTime(); } return d; } public static void main(String[] args) { Charset forName = Charset.forName("UTF-16LE"); System.out.println(forName); } }