/* * Copyright 2013 Future Systems * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.krakenapps.logstorage.engine; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.Date; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicInteger; import org.apache.felix.ipojo.annotations.Component; import org.apache.felix.ipojo.annotations.Invalidate; import org.apache.felix.ipojo.annotations.Provides; import org.apache.felix.ipojo.annotations.Requires; import org.apache.felix.ipojo.annotations.Validate; import org.krakenapps.confdb.Config; import org.krakenapps.confdb.ConfigDatabase; import org.krakenapps.confdb.ConfigService; import org.krakenapps.confdb.Predicate; import org.krakenapps.confdb.Predicates; import org.krakenapps.logstorage.BatchIndexingStatus; import org.krakenapps.logstorage.BatchIndexingTask; import org.krakenapps.logstorage.IndexTokenizer; import org.krakenapps.logstorage.IndexTokenizerRegistry; import org.krakenapps.logstorage.Log; import org.krakenapps.logstorage.LogCallback; import org.krakenapps.logstorage.LogCursor; import org.krakenapps.logstorage.LogIndexCursor; import org.krakenapps.logstorage.LogIndexQuery; import org.krakenapps.logstorage.LogIndexSchema; import org.krakenapps.logstorage.LogIndexer; import org.krakenapps.logstorage.LogIndexerStatus; import org.krakenapps.logstorage.LogRetentionPolicy; import org.krakenapps.logstorage.LogStorage; import org.krakenapps.logstorage.LogTableEventListener; import org.krakenapps.logstorage.LogTableRegistry; import org.krakenapps.logstorage.index.InvertedIndexFileSet; import org.krakenapps.logstorage.index.InvertedIndexItem; import org.krakenapps.logstorage.index.InvertedIndexUtil; import org.krakenapps.logstorage.index.InvertedIndexWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @since 0.9 * @author xeraph */ @Component(name = "logstorage-indexer") @Provides public class LogIndexerEngine implements LogIndexer { private final Logger logger = LoggerFactory.getLogger(LogIndexerEngine.class); private final File indexBaseDir; private final File queueDir; @Requires private IndexTokenizerRegistry tokenizerRegistry; @Requires private LogStorage storage; @Requires private LogTableRegistry tableRegistry; @Requires private ConfigService conf; // table name to index config mappings private ConcurrentMap<String, Set<LogIndexSchema>> tableIndexes; // memory-buffered indexes private ConcurrentMap<OnlineIndexerKey, OnlineIndexer> onlineIndexers; // background index building private ExecutorService executor; // for realtime log indexing private LogReceiver receiver; // drop index when table drops, cache table name-id mappings private TableEventListener tableRegistryListener; // periodically flush and evict indexers private IndexerSweeper sweeper; private Thread sweeperThread; // to assign unique index id private AtomicInteger indexIdCounter; // cache table name->id mappings for index drop private ConcurrentMap<String, Integer> tableNameIdMap; private ConcurrentMap<BatchIndexKey, BatchIndexingTask> batchJobs; // index id, prevent also online indexing private CopyOnWriteArraySet<Integer> deleteLocks; // index id private CopyOnWriteArraySet<Integer> mergeLocks; public LogIndexerEngine() { indexBaseDir = new File(System.getProperty("kraken.data.dir"), "kraken-logstorage/index"); queueDir = new File(System.getProperty("kraken.data.dir"), "kraken-logstorage/index/queue"); queueDir.mkdirs(); receiver = new LogReceiver(); tableRegistryListener = new TableEventListener(); tableIndexes = new ConcurrentHashMap<String, Set<LogIndexSchema>>(); onlineIndexers = new ConcurrentHashMap<OnlineIndexerKey, OnlineIndexer>(); tableNameIdMap = new ConcurrentHashMap<String, Integer>(); batchJobs = new ConcurrentHashMap<BatchIndexKey, BatchIndexingTask>(); deleteLocks = new CopyOnWriteArraySet<Integer>(); mergeLocks = new CopyOnWriteArraySet<Integer>(); sweeper = new IndexerSweeper(); indexIdCounter = new AtomicInteger(); } @Validate public void start() { tableIndexes.clear(); onlineIndexers.clear(); tableNameIdMap.clear(); // build threads int cpuCount = Runtime.getRuntime().availableProcessors(); executor = Executors.newFixedThreadPool(cpuCount, new ThreadFactory() { @Override public Thread newThread(Runnable r) { return new Thread(r, "Batch Log Indexer"); } }); // load table name-id mappings for (String tableName : tableRegistry.getTableNames()) tableNameIdMap.put(tableName, tableRegistry.getTableId(tableName)); // load index configurations ConfigDatabase db = conf.ensureDatabase("kraken-logstorage"); Collection<LogIndexSchema> indexes = db.findAll(LogIndexSchema.class).getDocuments(LogIndexSchema.class); for (LogIndexSchema index : indexes) { Set<LogIndexSchema> s = tableIndexes.get(index.getTableName()); if (s == null) { s = new CopyOnWriteArraySet<LogIndexSchema>(); tableIndexes.put(index.getTableName(), s); } s.add(index); if (indexIdCounter.get() < index.getId()) indexIdCounter.set(index.getId()); } // start index sweeper sweeperThread = new Thread(sweeper, "LogStorage IndexWriter Sweeper"); sweeperThread.start(); // to listen drop event and drop all related indexes tableRegistry.addListener(tableRegistryListener); // receive all logs and index storage.addLogListener(receiver); } @Invalidate public void stop() { if (storage != null) storage.removeLogListener(receiver); if (tableRegistry != null) tableRegistry.removeListener(tableRegistryListener); sweeper.doStop = true; sweeperThread.interrupt(); executor.shutdownNow(); } @Override public void createIndex(LogIndexSchema config) { if (config == null) throw new IllegalArgumentException("config can not be null"); if (!tableRegistry.exists(config.getTableName())) throw new IllegalStateException("table does not exist: " + config.getTableName()); // assign new index id config.setId(indexIdCounter.incrementAndGet()); // check duplicated index ConfigDatabase db = conf.ensureDatabase("kraken-logstorage"); Predicate cond = Predicates.or( Predicates.field("id", config.getId()), Predicates.and(Predicates.field("table_name", config.getTableName()), Predicates.field("index_name", config.getIndexName()))); if (db.findOne(LogIndexSchema.class, cond) != null) throw new IllegalStateException("same index id (" + config.getId() + ") or name (" + config.getIndexName() + ") already exist in metadata"); // register index Set<LogIndexSchema> indexes = tableIndexes.get(config.getTableName()); if (indexes == null) { indexes = new CopyOnWriteArraySet<LogIndexSchema>(); tableIndexes.putIfAbsent(config.getTableName(), indexes); } // ensure index directory int tableId = tableRegistry.getTableId(config.getTableName()); File dir = new File(indexBaseDir, tableId + "/" + config.getId()); dir.mkdirs(); // save index metadata db.add(config); indexes.add(config); enqueueBatchIndexJobs(config); logger.info("kraken logstorage: created index => " + config); } private void enqueueBatchIndexJobs(LogIndexSchema config) { BatchIndexingTask task = new BatchIndexingTask(); task.setTableName(config.getTableName()); task.setIndexName(config.getIndexName()); task.setTableId(tableNameIdMap.get(config.getTableName())); task.setIndexId(config.getId()); task.setMinDay(config.getMinIndexDay()); batchJobs.put(new BatchIndexKey(config.getTableName(), config.getIndexName()), task); List<BatchIndexingStatus> builders = new ArrayList<BatchIndexingStatus>(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); List<Date> days = DateUtil.filt(storage.getLogDates(task.getTableName()), task.getMinDay(), task.getMaxDay()); for (Date day : days) { try { int tableId = tableRegistry.getTableId(task.getTableName()); String prefix = tableId + "-" + config.getId() + "-" + dateFormat.format(day) + "-"; File indexFile = File.createTempFile(prefix, ".pos", queueDir); File dataFile = File.createTempFile(prefix, ".seg", queueDir); BatchIndexingStatus s = new BatchIndexingStatus(); s.setTask(task); s.setDay(day); s.setFiles(new InvertedIndexFileSet(indexFile, dataFile)); task.getBuilds().put(day, s); builders.add(s); } catch (IOException e) { logger.error("kraken logstorage: cannot queue batch indexing for table " + config.getTableName() + ", index " + config.getIndexName(), e); } } for (BatchIndexingStatus s : builders) executor.execute(new IndexRunner(s)); } @Override public void dropIndex(String tableName, String indexName) { // check metadata LogIndexSchema found = getIndexConfig(tableName, indexName); // check database metadata ConfigDatabase db = conf.ensureDatabase("kraken-logstorage"); Config c = db.findOne(LogIndexSchema.class, Predicates.field("id", found.getId())); if (c == null) throw new IllegalStateException("index metadata not found, table=" + tableName + ", index=" + indexName); // set delete lock try { deleteLocks.add(found.getId()); // remove from memory and database Set<LogIndexSchema> s = tableIndexes.get(tableName); s.remove(found); db.remove(c); // cancel all batch jobs BatchIndexingTask task = batchJobs.get(new BatchIndexKey(tableName, indexName)); if (task != null) { task.setCanceled(true); } // evict online indexer for dropping index id for (OnlineIndexer indexer : new ArrayList<OnlineIndexer>(onlineIndexers.values())) { if (indexer.id != found.getId()) continue; onlineIndexers.remove(new OnlineIndexerKey(indexer.id, indexer.day, indexer.tableId, indexer.tableName, indexer.indexName)); logger.trace("kraken logstorage: closing online indexer [{}, {}] due to [{}] table drop", new Object[] { found.getId(), found.getIndexName(), found.getTableName() }); indexer.close(); } // purge index files int tableId = tableNameIdMap.get(tableName); File dir = new File(indexBaseDir, tableId + "/" + found.getId()); File[] files = dir.listFiles(); if (files != null) { for (File f : files) { String fileName = f.getName(); if (f.isFile() && (fileName.endsWith(".pos") || fileName.endsWith(".seg"))) { ensureDelete(f); } } } // try to delete empty directory dir.delete(); } finally { deleteLocks.remove(found.getId()); } } private LogIndexSchema getIndexSchema(String tableName, String indexName) { Set<LogIndexSchema> s = tableIndexes.get(tableName); if (s == null) throw new IllegalStateException("index not found, table=" + tableName + ", index=" + indexName); LogIndexSchema found = null; for (LogIndexSchema c : s) { if (c.getIndexName().equals(indexName)) found = c; } if (found == null) throw new IllegalStateException("index not found, table=" + tableName + ", index=" + indexName); return found; } private boolean ensureDelete(File f) { final int MAX_TIMEOUT = 30000; long begin = System.currentTimeMillis(); while (true) { if (f.delete()) { logger.trace("kraken logstorage: deleted index file [{}]", f.getAbsolutePath()); return true; } if (System.currentTimeMillis() - begin > MAX_TIMEOUT) { logger.error("kraken logstorage: delete timeout, cannot delete index file [{}]", f.getAbsolutePath()); return false; } } } @Override public void dropAllIndexes(String tableName) { Set<LogIndexSchema> indexes = tableIndexes.get(tableName); if (indexes == null) return; for (LogIndexSchema c : indexes) { try { dropIndex(tableName, c.getIndexName()); } catch (Throwable t) { logger.error("kraken logstorage: cannot drop index [" + c.getIndexName() + "] of table [" + tableName + "]", t); } } // try to delete index directory if empty int tableId = tableNameIdMap.get(tableName); File dir = new File(indexBaseDir, Integer.toString(tableId)); dir.delete(); } @Override public Set<String> getIndexNames(String tableName) { TreeSet<String> names = new TreeSet<String>(); Set<LogIndexSchema> indexes = tableIndexes.get(tableName); if (indexes == null) return names; for (LogIndexSchema c : indexes) names.add(c.getIndexName()); return names; } @Override public LogIndexSchema getIndexConfig(String tableName, String indexName) { Set<LogIndexSchema> indexes = tableIndexes.get(tableName); for (LogIndexSchema c : indexes) if (c.getIndexName().equals(indexName)) return c; return null; } @Override public List<Date> getIndexedDays(String tableName, String indexName) { if (!tableRegistry.exists(tableName)) return null; int tableId = tableRegistry.getTableId(tableName); LogIndexSchema config = getIndexConfig(tableName, indexName); if (config == null) return null; return getIndexedDays(tableId, config.getId()); } private List<Date> getIndexedDays(int tableId, int indexId) { File tableIndexDir = new File(indexBaseDir, tableId + "/" + indexId); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); ArrayList<Date> days = new ArrayList<Date>(); File[] files = tableIndexDir.listFiles(); if (files == null) return days; for (File f : files) { if (!f.isFile() || !f.canRead()) continue; String fileName = f.getName(); if (!fileName.endsWith(".seg")) continue; try { String dateString = fileName.substring(0, fileName.length() - 4); Date day = dateFormat.parse(dateString); days.add(day); logger.debug("kraken logstorage: fetched indexed days, file={}, day={}", f.getAbsolutePath(), day); } catch (Throwable t) { logger.error("kraken logstorage: cannot parse index file name", t); } } return days; } @Override public LogIndexCursor search(LogIndexQuery q) throws IOException { List<IndexCursorItem> cursorItems = new ArrayList<IndexCursorItem>(); for (Entry<String, Set<LogIndexSchema>> pair : tableIndexes.entrySet()) { String tableName = pair.getKey(); if (q.getTableName() != null && !q.getTableName().equals(tableName)) continue; for (LogIndexSchema c : pair.getValue()) { // skip delete locked index (no more access) if (deleteLocks.contains(c.getId())) { logger.trace("kraken logstorage: skipping deleted locked index, table={}, index={}", c.getTableName(), c.getIndexName()); continue; } // skip merging index if (mergeLocks.contains(c.getId())) { logger.trace("kraken logstorage: skipping merging index, table={}, index={}", c.getTableName(), c.getIndexName()); continue; } if (q.getIndexName() != null && !q.getIndexName().equals(c.getIndexName())) continue; Integer tableId = tableNameIdMap.get(tableName); if (tableId == null) { logger.warn("kraken logstorage: garbage index metadata found [table={}, index={}]", tableName, c.getIndexName()); continue; } List<InvertedIndexItem> buffer = getIndexBuffer(tableName); cursorItems.add(new IndexCursorItem(tableId, c.getId(), tableName, c.getIndexName(), buffer)); } } return new MergedIndexCursor(this, q, cursorItems, indexBaseDir); } private List<InvertedIndexItem> getIndexBuffer(String tableName) { List<InvertedIndexItem> l = new LinkedList<InvertedIndexItem>(); for (OnlineIndexer indexer : onlineIndexers.values()) { l.addAll(indexer.queue); } return l; } private class IndexRunner implements Runnable { private BatchIndexingStatus status; public IndexRunner(BatchIndexingStatus status) { this.status = status; } @Override public void run() { boolean fail = false; try { buildIndex(); } catch (Throwable t) { logger.error("kraken logstorage: indexing failed, " + status, t); fail = true; } finally { if (!fail) logger.trace("kraken logstorage: batch indexing is completed - [{}]", status); } } private void buildIndex() throws IOException { long begin = System.currentTimeMillis(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); BatchIndexingTask task = status.getTask(); logger.trace("kraken logstorage: building index for table [{}], day [{}]", task.getTableName(), dateFormat.format(status.getDay())); // open index writer LogCursor cursor = null; InvertedIndexWriter writer = null; try { cursor = storage.openCursor(task.getTableName(), status.getDay(), true); writer = new InvertedIndexWriter(status.getFiles()); // prepare tokenizer LogIndexSchema schema = getIndexSchema(task.getTableName(), task.getIndexName()); IndexTokenizer tok = tokenizerRegistry.newTokenizer(schema.getTokenizerName(), schema.getTokenizerConfigs()); while (cursor.hasNext()) { if (task.isCanceled()) break; Log log = cursor.next(); Set<String> tokens = tok.tokenize(log.getData()); if (tokens == null) continue; status.addLogCount(1); status.addTokenCount(tokens.size()); long timestamp = log.getDate().getTime(); writer.write(new InvertedIndexItem(task.getTableName(), timestamp, log.getId(), tokens.toArray(new String[0]))); } } finally { if (cursor != null) cursor.close(); if (writer != null) writer.close(); if (task.isCanceled()) { boolean indexDeleted = status.getIndexFile().delete(); boolean dataDeleted = status.getDataFile().delete(); logger.trace( "kraken logstorage: batch indexing is canceled, table [{}], index [{}], day [{}], index deleted={}, data deleted={}", new Object[] { task.getTableName(), task.getIndexName(), dateFormat.format(status.getDay()), indexDeleted, dataDeleted }); status.setDone(true); return; } // move to index directory (or copy if partition is different) long elapsed = System.currentTimeMillis() - begin; logger.trace("kraken logstorage: indexing completed for table [{}], day [{}], elapsed [{}]sec", new Object[] { task.getTableName(), dateFormat.format(status.getDay()), elapsed / 1000 }); File destIndexFile = getIndexFilePath(task.getTableId(), task.getIndexId(), status.getDay(), ".pos"); File destDataFile = getIndexFilePath(task.getTableId(), task.getIndexId(), status.getDay(), ".seg"); OnlineIndexerKey key = new OnlineIndexerKey(task.getIndexId(), status.getDay(), task.getTableId(), task.getTableName(), task.getIndexName()); if (!destIndexFile.exists() && !destDataFile.exists()) { move(status.getIndexFile(), destIndexFile); move(status.getDataFile(), destDataFile); } else { InvertedIndexFileSet newer = new InvertedIndexFileSet(destIndexFile, destDataFile); InvertedIndexFileSet older = new InvertedIndexFileSet(status.getIndexFile(), status.getDataFile()); merge(key, older, newer); } status.setDone(true); } } } private void merge(OnlineIndexerKey key, InvertedIndexFileSet older, InvertedIndexFileSet newer) throws IOException { // prevent flush and evict OnlineIndexer indexer = getOnlineIndexer(key); indexer.prepareMerge(); // do merge File mergedIndexFile = null; File mergedDataFile = null; try { // prevent read/write file access mergeLocks.add(key.indexId); File dir = newer.getIndexFile().getParentFile(); mergedIndexFile = File.createTempFile("index-", ".mpos", dir); mergedDataFile = File.createTempFile("index-", ".mseg", dir); InvertedIndexFileSet merged = new InvertedIndexFileSet(mergedIndexFile, mergedDataFile); InvertedIndexUtil.merge(older, newer, merged); boolean success = true; if (!ensureDelete(newer.getIndexFile())) { success = false; logger.error("kraken logstorage: cannot delete online index file, {}", newer.getIndexFile().getName()); } if (!ensureDelete(newer.getDataFile())) { success = false; logger.error("kraken logstorage: cannot delete online index file, {}", newer.getDataFile().getName()); } if (!ensureDelete(older.getIndexFile())) { success = false; logger.error("kraken logstorage: cannot delete batch index file, {}", older.getIndexFile().getName()); } if (!ensureDelete(older.getDataFile())) { success = false; logger.error("kraken logstorage: cannot delete batch index file, {}", older.getDataFile().getName()); } if (!merged.getIndexFile().renameTo(newer.getIndexFile())) { success = false; logger.error("kraken logstorage: cannot rename [{}] to [{}]", merged.getIndexFile().getName(), newer .getIndexFile().getName()); } if (!merged.getDataFile().renameTo(newer.getDataFile())) { success = false; logger.error("kraken logstorage: cannot rename [{}] to [{}]", merged.getDataFile().getName(), newer.getDataFile() .getName()); } if (success) logger.trace("kraken logstorage: merge success for {}", key); else logger.error("kraken logstorage: merge failed for {}", key); } catch (Throwable t) { logger.error("kraken logstorage: merge failed for " + key + ", deleting temp files", t); // purge temporary files if failed if (mergedIndexFile != null) mergedIndexFile.delete(); if (mergedDataFile != null) mergedDataFile.delete(); } finally { // allow index access mergeLocks.remove(key.indexId); indexer.finishMerge(); } } private void move(File src, File dst) { String srcPath = src.getAbsolutePath(); if (!src.exists()) throw new IllegalStateException("source file not found: " + srcPath); // try rename String dstPath = dst.getAbsolutePath(); if (src.renameTo(dst)) { logger.trace("kraken logstorage: moved index file [{}] to [{}]", srcPath, dstPath); return; } // if rename fails, copy and delete old file if (dst.exists()) { logger.warn("kraken logstorage: need to merge file [{}] with [{}], not supported yet", dstPath, srcPath); return; } FileInputStream is = null; FileOutputStream os = null; try { byte[] b = new byte[8096]; is = new FileInputStream(src); os = new FileOutputStream(dst); while (true) { int len = is.read(b); if (len <= 0) break; os.write(b, 0, len); } logger.trace("kraken logstorage: rename failed, copied index file [{}] to [{}], and deleted old one", srcPath, dstPath); } catch (IOException e) { logger.error("kraken logstorage: cannot copy file [" + srcPath + "] to [" + dstPath + "]", e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { } } if (os != null) { try { os.close(); } catch (IOException e) { } } boolean ret = src.delete(); if (!ret) logger.error("kraken logstorage: cannot delete temporary index file, {}", src.getAbsolutePath()); } } @Override public File getIndexDirectory(String tableName, String indexName) { LogIndexSchema schema = getIndexSchema(tableName, indexName); int tableId = tableRegistry.getTableId(tableName); return new File(indexBaseDir, tableId + "/" + schema.getId()); } private File getIndexFilePath(int tableId, int indexId, Date day, String suffix) { SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); String relativePath = tableId + "/" + indexId + "/" + dateFormat.format(day) + suffix; return new File(indexBaseDir, relativePath); } @Override public Date getPurgeBaseline(String tableName, String indexName) { LogRetentionPolicy p = storage.getRetentionPolicy(tableName); if (p == null || p.getRetentionDays() == 0) return null; List<Date> days = getIndexedDays(tableName, indexName); Date lastDay = getMaxDay(days.iterator()); if (lastDay == null) return null; return getBaseline(lastDay, p.getRetentionDays()); } private Date getMaxDay(Iterator<Date> days) { Date max = null; while (days.hasNext()) { Date day = days.next(); if (max == null) max = day; else if (max != null && day.after(max)) max = day; } return max; } private Date getBaseline(Date lastDay, int days) { Calendar c = Calendar.getInstance(); c.setTime(lastDay); c.add(Calendar.DAY_OF_MONTH, -days); c.set(Calendar.HOUR_OF_DAY, 0); c.set(Calendar.MINUTE, 0); c.set(Calendar.SECOND, 0); c.set(Calendar.MILLISECOND, 0); return c.getTime(); } @Override public void purge(String tableName, String indexName, Date fromDay, Date toDay) { LogIndexSchema schema = getIndexConfig(tableName, indexName); if (schema == null) throw new IllegalStateException("index not found for table=" + tableName + ", index=" + indexName); File dir = getIndexDirectory(tableName, indexName); if (dir == null) return; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); for (File f : dir.listFiles()) { if (!f.isFile()) continue; if (!f.getName().endsWith(".pos") && !f.getName().endsWith(".seg")) continue; String dayStr = null; Date day = null; try { dayStr = f.getName().substring(0, f.getName().indexOf('.')); day = dateFormat.parse(dayStr); } catch (ParseException e) { continue; } if (fromDay != null && day.before(fromDay)) continue; if (toDay != null && day.after(toDay)) continue; try { deleteLocks.add(schema.getId()); logger.trace("kraken logstorage: try to purge index [{}] of table [{}], day [{}]", new Object[] { schema.getIndexName(), schema.getTableName(), dayStr }); ensureDelete(f); } finally { deleteLocks.remove(schema.getId()); } } } private class LogReceiver implements LogCallback { @Override public void onLog(Log log) { Set<LogIndexSchema> indexes = tableIndexes.get(log.getTableName()); if (indexes == null) return; int tableId = tableRegistry.getTableId(log.getTableName()); for (LogIndexSchema index : indexes) { try { // skip if delete locked if (deleteLocks.contains(index.getId())) continue; OnlineIndexer indexer = getOnlineIndexer(new OnlineIndexerKey(index.getId(), log.getDay(), tableId, index.getTableName(), index.getIndexName())); indexer.write(log); } catch (IOException e) { String msg = "kraken logstorage: cannot index log, table " + index.getTableName() + ", index " + index.getIndexName(); logger.error(msg, e); } } } } @Override public List<LogIndexerStatus> getIndexerStatuses() { List<LogIndexerStatus> indexers = new ArrayList<LogIndexerStatus>(onlineIndexers.size()); for (OnlineIndexer indexer : onlineIndexers.values()) { LogIndexerStatus s = new LogIndexerStatus(); s.setTableName(indexer.tableName); s.setIndexName(indexer.indexName); s.setDay(indexer.day); s.setQueueCount(indexer.queue.size()); s.setLastFlush(indexer.getLastFlush()); indexers.add(s); } return indexers; } private OnlineIndexer getOnlineIndexer(OnlineIndexerKey key) { OnlineIndexer online = onlineIndexers.get(key); if (online != null && online.isOpen()) return online; try { OnlineIndexer oldIndexer = onlineIndexers.get(key); if (oldIndexer != null) { synchronized (oldIndexer) { if (!oldIndexer.isOpen() && !oldIndexer.isClosed()) { // closing while (!oldIndexer.isClosed()) { try { oldIndexer.wait(1000); } catch (InterruptedException e) { } } while (onlineIndexers.get(key) == oldIndexer) { Thread.yield(); } online = loadNewIndexer(key); } else if (oldIndexer.isClosed()) { while (onlineIndexers.get(key) == oldIndexer) { Thread.yield(); } online = loadNewIndexer(key); } else { online = oldIndexer; } } } else { online = loadNewIndexer(key); } } catch (Exception e) { throw new IllegalStateException("cannot open indexer: " + key.indexId + ", date=" + key.day, e); } return online; } private OnlineIndexer loadNewIndexer(OnlineIndexerKey key) throws IOException { OnlineIndexer online = null; LogIndexSchema schema = getIndexSchema(key.tableName, key.indexName); IndexTokenizer tok = tokenizerRegistry.newTokenizer(schema.getTokenizerName(), schema.getTokenizerConfigs()); OnlineIndexer newWriter = new OnlineIndexer(key.tableName, key.indexName, key.tableId, key.indexId, key.day, tok); OnlineIndexer consensus = onlineIndexers.putIfAbsent(key, newWriter); if (consensus == null) online = newWriter; else { online = consensus; if (consensus != newWriter) newWriter.close(); } return online; } @Override public List<BatchIndexingTask> getBatchIndexingTasks() { // TODO: clone each items (prevent modification) return new ArrayList<BatchIndexingTask>(batchJobs.values()); } private class IndexerSweeper implements Runnable { private volatile boolean doStop = false; @Override public void run() { try { while (true) { try { if (doStop) break; Thread.sleep(1000); sweep(); cleanBatchJobs(); } catch (InterruptedException e) { logger.trace("kraken logstorage: indexer sweeper interrupted"); } catch (Exception e) { logger.error("krakne logstorage: indexer sweeper error", e); } } } finally { doStop = false; } logger.info("kraken logstorage: indexer sweeper stopped"); } private void sweep() { List<OnlineIndexer> evicts = new ArrayList<OnlineIndexer>(); long now = System.currentTimeMillis(); for (OnlineIndexer indexer : onlineIndexers.values()) { // skip delete locked index if (deleteLocks.contains(indexer.id)) continue; boolean doFlush = (now - indexer.getLastFlush().getTime()) > 10000 || indexer.needFlush(); if (doFlush) { try { logger.trace("kraken logstorage: flushing index [{}]", indexer.id); indexer.flush(); } catch (IOException e) { logger.error("kraken logstorage: cannot flush index " + indexer.id, e); } } // close file if indexer is in idle state int interval = (int) (now - indexer.lastAccess); if (interval > 30000 && !indexer.merging) evicts.add(indexer); } // evict for (OnlineIndexer indexer : evicts) { logger.trace("kraken logstorage: closing index writer [{}]", indexer); indexer.close(); onlineIndexers.remove(new OnlineIndexerKey(indexer.id, indexer.day, indexer.tableId, indexer.tableName, indexer.indexName)); } } private void cleanBatchJobs() { for (BatchIndexKey key : new ArrayList<BatchIndexKey>(batchJobs.keySet())) { BatchIndexingTask task = batchJobs.get(key); if (task.isDone()) { batchJobs.remove(key); logger.info("kraken logstorage: batch indexing job [table={}, index={}] is completed", key.tableName, key.indexName); } } } } private class OnlineIndexer { private int id; /** * only yyyy-MM-dd (excluding hour, min, sec, milli) */ private Date day; private int tableId; private String tableName; private String indexName; /** * is in closing state? */ private boolean closing; /** * maintain last write access time. idle indexer should be evicted */ private long lastAccess = System.currentTimeMillis(); private boolean merging; // waiting flush queue private List<InvertedIndexItem> queue; private InvertedIndexWriter writer; private IndexTokenizer tokenizer; private File indexFile; private File dataFile; public OnlineIndexer(String tableName, String indexName, int tableId, int indexId, Date day, IndexTokenizer tokenizer) throws IOException { this.tableName = tableName; this.indexName = indexName; this.tableId = tableId; this.id = indexId; this.day = day; this.tokenizer = tokenizer; this.queue = new ArrayList<InvertedIndexItem>(10000); this.indexFile = getIndexFilePath(tableId, indexId, day, ".pos"); this.dataFile = getIndexFilePath(tableId, indexId, day, ".seg"); this.writer = new InvertedIndexWriter(indexFile, dataFile); } public void prepareMerge() { synchronized (this) { this.merging = true; writer.close(); } } public void finishMerge() throws IOException { synchronized (this) { writer = new InvertedIndexWriter(indexFile, dataFile); this.merging = false; } } public boolean isOpen() { return writer != null && closing == false; } public boolean isClosed() { return closing == true && writer == null; } public Date getLastFlush() { return writer.getLastFlush(); } public void write(Log log) throws IOException { if (logger.isDebugEnabled()) logger.debug("kraken logstorage: write to index, {}", log.getData()); Set<String> tokens = tokenizer.tokenize(log.getData()); if (tokens == null) return; long timestamp = log.getDate().getTime(); synchronized (this) { queue.add(new InvertedIndexItem(log.getTableName(), timestamp, log.getId(), tokens.toArray(new String[0]))); if (logger.isDebugEnabled()) logger.debug("kraken logstorage: queued tokens for index, {}", tokens); if (needFlush()) flush(); } } public boolean needFlush() { return queue.size() > 10000; } public void flush() throws IOException { if (merging) { logger.trace("kraken logstorage: merging in-progress, ignore flush index [{}]", id); return; } if (logger.isTraceEnabled()) { SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); logger.trace("kraken logstorage: flushing index [{}], day [{}]", id, dateFormat.format(day)); } synchronized (this) { for (InvertedIndexItem item : queue) writer.write(item); queue.clear(); writer.flush(); notifyAll(); } } public void close() { if (closing) return; try { synchronized (this) { closing = true; flush(); writer.close(); notifyAll(); writer = null; } } catch (IOException e) { logger.error("cannot close online index writer", e); } } @Override public String toString() { SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); return "id=" + id + ", day=" + dateFormat.format(day); } } private class TableEventListener implements LogTableEventListener { @Override public void onCreate(String tableName, Map<String, String> tableMetadata) { tableNameIdMap.put(tableName, tableRegistry.getTableId(tableName)); } @Override public void onDrop(String tableName) { // cancel index build job for (BatchIndexKey key : new ArrayList<BatchIndexKey>(batchJobs.keySet())) { if (!key.tableName.equals(tableName)) continue; BatchIndexingTask task = batchJobs.remove(key); task.setCanceled(true); } // delete index logger.info("kraken logstorage: dropping all indexes of table " + tableName); dropAllIndexes(tableName); tableNameIdMap.remove(tableName); } } }