/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.task.source; import java.io.File; import java.io.IOException; import java.io.InterruptedIOException; import java.io.RandomAccessFile; import java.nio.channels.FileLock; import com.addthis.basis.util.LessFiles; import com.addthis.basis.util.Parameter; import com.addthis.bundle.channel.DataChannelError; import com.addthis.bundle.core.Bundle; import com.addthis.hydra.store.db.DBKey; import com.addthis.hydra.store.db.PageDB; import com.addthis.hydra.store.nonconcurrent.NonConcurrentPage; import com.addthis.hydra.store.nonconcurrent.NonConcurrentPageCache; import com.addthis.hydra.store.skiplist.ConcurrentPage; import com.addthis.meshy.service.stream.StreamService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * tracks and auto-indexes (when possible) SourceTypeStateful sources */ public class SourceTracker { private static final Logger log = LoggerFactory.getLogger(SourceTracker.class); private final boolean ignoreBundleCorruption = Parameter.boolValue("sourceTracker.ignoreBundleCorruption", true); private final PageDB<SimpleMark> db; private final FileLock lockDir; public SourceTracker(String dir) { File dirFile = LessFiles.initDirectory(dir); try { lockDir = new RandomAccessFile(new File(dirFile, "tracker.lock"), "rw").getChannel().lock(); db = new PageDB<SimpleMark>(dirFile, SimpleMark.class, 100, 100, ConcurrentPage.ConcurrentPageFactory.singleton); } catch (Exception e) { throw new RuntimeException(e); } } /** * tracker MUST be closed when finished or data could be lost. */ public void close() { db.close(); try { lockDir.release(); } catch (Exception ex) { ex.printStackTrace(); } } public void open(final TaskDataSource source) { source.init(); } /** * return a wrapped source that saves state automatically on close(). * failure to close() will result in a loss of tracking state. close() * can be called on a wrapped/tracked source even if next() fails. * <p/> * this method is responsible for init()ing the source because * some sources may be skipped and not opened. closing a source that * didn't need to be opened is wasteful. * <p/> * NOTE: the source must be init before calling this method * * @param source source to index and track * @return wrapped source or null if it could not be tracked */ public TaskDataSource init(final TaskDataSource source) { if (!(source instanceof SourceTypeStateful)) { return null; } final SourceTypeStateful stateful = (SourceTypeStateful) source; if (initStateful(stateful)) { if (log.isDebugEnabled()) log.debug("[init] wrapping " + source + " // " + source.getClass().getSimpleName()); return new AbstractDataSourceWrapper(source) { private long read = 0; private boolean end = false; @Override public Bundle next() throws DataChannelError { Bundle next = null; try { if (log.isTraceEnabled()) log.trace("ADSW.next(" + getKey(stateful) + ")"); next = super.next(); if (log.isTraceEnabled()) log.trace("ADSW.next(" + getKey(stateful) + ")=" + next); } catch (Exception ex) { if (log.isTraceEnabled()) log.trace("ADSW.next.err(" + getKey(stateful) + ") = " + ex); // We have made no progress and have tried before, give up forever return handleSourceError(read, end, stateful, ex); } if (next == null) { end = true; } read++; return next; } @Override public Bundle peek() throws DataChannelError { Bundle peek = null; try { if (log.isTraceEnabled()) log.trace("ADSW.peek(" + getKey(stateful) + ")"); peek = super.peek(); if (log.isTraceEnabled()) log.trace("ADSW.peek(" + getKey(stateful) + ")=" + peek); } catch (Exception ex) { if (log.isTraceEnabled()) log.trace("ADSW.peek.err(" + getKey(stateful) + ") = " + ex); // We have made no progress and have tried before, give up forever return handleSourceError(read, end, stateful, ex); } if (peek == null) { end = true; } return peek; } @Override public void close() { checkpoint(stateful, read, end); super.close(); } }; } return null; } /** * Opens and initializes a stateful data source * <p/> * return a wrapped source that saves state automatically on close(). * failure to close() will result in a loss of tracking state. close() * can be called on a wrapped/tracked source even if next() fails. * <p/> * this method is responsible for init()ing the source because * some sources may be skipped and not opened. closing a source that * didn't need to be opened is wasteful. * * @param source source to index and track * @return wrapped source or null if it could not be tracked */ public TaskDataSource openAndInit(final TaskDataSource source) { open(source); return init(source); } private Bundle handleSourceError(long read, boolean end, SourceTypeStateful stateful, Exception ex) { // on shutdown rejected execution may be through, we shouldn't abandon a source because of that // or because the socket to the streamserver/kafka stalled String exToString = ex.toString(); if (read == 0 && !isNew(stateful) && !(ex instanceof RuntimeException) && !(ex instanceof InterruptedIOException) && !(exToString.contains(StreamService.ERROR_EXCEED_OPEN)) // guard against overloaded mesh && !(exToString.contains(StreamService.ERROR_CHANNEL_LOST)) // guard against downed mesh && !(exToString.contains("java.util.concurrent.RejectedExecutionException")) && !(ignoreBundleCorruption && ex instanceof IOException && exToString.contains("!= valid Bundle type")) ) { log.warn("[error] declaring stream " + stateful.getSourceIdentifier() + " dead to us forever because of..." + ex, ex); end = true; checkpoint(stateful, read, end); return null; } // otherwise just give up for now else { log.warn("[error] terminate/end stream " + stateful.getSourceIdentifier() + " on ", ex); checkpoint(stateful, read, false); return null; } } public boolean hasChanged(SourceTypeStateful source) { String key = getKey(source); String val = source.getSourceStateMarker(); SimpleMark rec = getRecord(key); if (rec != null) { if (val.equals(rec.getValue()) && rec.isEnd()) { return false; } } return true; } private String getKey(SourceTypeStateful source) { String key = source.getSourceIdentifier(); if (log.isTraceEnabled()) { log.trace("original key: " + key); } /*------------------- HACK - SHOULD BE REMOVED WHEN WE SWITCH TO MESH FULLY ------------------------*/ if (key.contains("/live/../gold/")) { key = key.replace("/live/../gold/", "/gold/"); } /*------------------ END HACK ---------------------------------------------------------------------*/ if (log.isTraceEnabled()) { log.trace("returning key: " + key); } return key; } /** * Have we ever saved information about this source before? */ private boolean isNew(SourceTypeStateful source) { String key = getKey(source); SimpleMark rec = getRecord(key); return rec == null; } /** * check source against records in tracker and auto-index source to last position. * * @param source source to track * @return true if init was successful and source was set to last position. false is index failed or source is up to date. */ public boolean initStateful(SourceTypeStateful source) { try { String key = getKey(source); String val = source.getSourceStateMarker(); SimpleMark rec = getRecord(key); if (rec == null) { if (log.isDebugEnabled()) log.debug("new " + key); return true; } if (!val.equals(rec.getValue()) || !rec.isEnd()) { if (source instanceof SourceTypeIndexable && ((SourceTypeIndexable) source).setOffset(rec.getIndex())) { if (log.isDebugEnabled()) log.debug("restore " + key + " skipped " + rec.getIndex()); return true; } else { long count = rec.getIndex(); while (count > 0) { source.next(); count--; } if (log.isDebugEnabled()) { log.debug("restore " + key + " indexed " + rec.getIndex() + " missed " + count + " end " + rec.isEnd()); } return count == 0; } } else { if (log.isDebugEnabled()) log.debug("unchanged " + key); return false; } } catch (Exception ex) { handleSourceError(0, false, source, ex); return false; } } private SimpleMark getRecord(String key) { try { SimpleMark rec = db.get(new DBKey(0, key)); if (rec == null) { return null; } return rec; } catch (Exception ex) { throw new RuntimeException(ex); } } /** * save position of source in tracker so it can later be re-initialized. * * @param source source to track * @param read number of records read/consumed since last init() */ public void checkpoint(final SourceTypeStateful source, final long read, final boolean end) { try { String key = getKey(source); SimpleMark rec = getRecord(key); if (rec == null) { rec = new SimpleMark(); } rec.setValue(source.getSourceStateMarker()); if (source instanceof SourceTypeIndexable) { rec.setIndex(((SourceTypeIndexable) source).getOffset()); } else { rec.setIndex(rec.getIndex() + read); } rec.setEnd(end); if (log.isDebugEnabled()) { log.debug("save " + key + " @ " + rec.getIndex() + " [read=" + read + "] " + (end ? " [end]" : " [more]")); } db.put(new DBKey(0, key), rec); } catch (Exception ex) { throw new RuntimeException(ex); } } }