/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * © 2012 LinkedIn Corp. All Rights Reserved. */ package com.senseidb.indexing; import com.senseidb.metrics.MetricFactory; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import javax.management.StandardMBean; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; import org.apache.log4j.Logger; import org.json.JSONObject; import proj.zoie.api.DataConsumer; import proj.zoie.api.DataConsumer.DataEvent; import proj.zoie.api.DataProvider; import proj.zoie.api.Zoie; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieIndexReader; import proj.zoie.impl.indexing.StreamDataProvider; import proj.zoie.impl.indexing.ZoieConfig; import proj.zoie.mbean.DataProviderAdmin; import proj.zoie.mbean.DataProviderAdminMBean; import com.browseengine.bobo.api.BoboIndexReader; import com.senseidb.conf.SenseiSchema; import com.senseidb.gateway.SenseiGateway; import com.senseidb.jmx.JmxUtil; import com.senseidb.metrics.MetricsConstants; import com.senseidb.plugin.SenseiPluginRegistry; import com.senseidb.search.node.SenseiIndexingManager; import com.senseidb.search.plugin.PluggableSearchEngineManager; import com.senseidb.util.JSONUtil.FastJSONArray; import com.senseidb.util.JSONUtil.FastJSONObject; import com.yammer.metrics.Metrics; import com.yammer.metrics.core.Timer; import com.yammer.metrics.core.Meter; import com.yammer.metrics.core.MetricName; public class DefaultStreamingIndexingManager implements SenseiIndexingManager<JSONObject> { private static final Logger logger = Logger.getLogger(DefaultStreamingIndexingManager.class); public static final String CONFIG_PREFIX = "sensei.index.manager.default"; private static final String MAX_PARTITION_ID = "maxpartition.id"; private static final String EVTS_PER_MIN = "eventsPerMin"; private static final String BATCH_SIZE = "batchSize"; private static final String EVENT_CREATED_TIMESTAMP_FIELD = "eventCreatedTimestampField"; private Meter _providerBatchSizeMeter; private Meter _eventMeter; private Meter _updateBatchSizeMeter; private Meter _indexSizeMeter; private long _lastMeasureTime; private static final long MEASURE_INTERVAL = 1000 * 60; // 1 minute private Timer _indexingLatencyTimer; private StreamDataProvider<JSONObject> _dataProvider; private String _oldestSinceKey; private String _eventCreatedTimestampField; private final SenseiSchema _senseiSchema; private final Configuration _myconfig; private Map<Integer, Zoie<BoboIndexReader, JSONObject>> _zoieSystemMap; private final LinkedHashMap<Integer, Collection<DataEvent<JSONObject>>> _dataCollectorMap; private final SenseiGateway<?> _gateway; private final ShardingStrategy _shardingStrategy; private final Comparator<String> _versionComparator; private final PluggableSearchEngineManager pluggableSearchEngineManager; private SenseiPluginRegistry pluginRegistry; public DefaultStreamingIndexingManager(SenseiSchema schema,Configuration senseiConfig, SenseiPluginRegistry pluginRegistry, SenseiGateway<?> gateway, ShardingStrategy shardingStrategy, PluggableSearchEngineManager pluggableSearchEngineManager){ _dataProvider = null; _myconfig = senseiConfig.subset(CONFIG_PREFIX); _eventCreatedTimestampField = _myconfig.getString(EVENT_CREATED_TIMESTAMP_FIELD, null); this.pluginRegistry = pluginRegistry; _oldestSinceKey = null; _senseiSchema = schema; _zoieSystemMap = null; _dataCollectorMap = new LinkedHashMap<Integer, Collection<DataEvent<JSONObject>>>(); _gateway = gateway; this.pluggableSearchEngineManager = pluggableSearchEngineManager; if (_gateway!=null){ _versionComparator = _gateway.getVersionComparator(); } else{ _versionComparator = ZoieConfig.DEFAULT_VERSION_COMPARATOR; } _shardingStrategy = shardingStrategy; } public void updateOldestSinceKey(String sinceKey){ if(_oldestSinceKey == null){ _oldestSinceKey = sinceKey; if (_dataProvider != null) { _dataProvider.setStartingOffset(_oldestSinceKey); } } else if(sinceKey!=null && _versionComparator.compare(sinceKey, _oldestSinceKey) <0 ){ _oldestSinceKey = sinceKey; if (_dataProvider != null) { _dataProvider.setStartingOffset(_oldestSinceKey); } } } private Meter registerMeter(String name, String eventType) { return MetricFactory.newMeter(new MetricName(MetricsConstants.Domain, "meter", name, "indexing-manager"), eventType, TimeUnit.SECONDS); } private Timer registerTimer(String name) { return MetricFactory.newTimer(new MetricName(MetricsConstants.Domain, "timer", name, "indexing-manager"), TimeUnit.MILLISECONDS, TimeUnit.SECONDS); } @Override public void initialize( Map<Integer, Zoie<BoboIndexReader, JSONObject>> zoieSystemMap) throws Exception { int maxPartitionId = _myconfig.getInt(MAX_PARTITION_ID)+1; String uidField = _senseiSchema.getUidField(); DataDispatcher consumer = new DataDispatcher(maxPartitionId,uidField); _zoieSystemMap = zoieSystemMap; Iterator<Integer> it = zoieSystemMap.keySet().iterator(); while(it.hasNext()){ int part = it.next(); Zoie<BoboIndexReader,JSONObject> zoie = zoieSystemMap.get(part); updateOldestSinceKey(zoie.getVersion()); _dataCollectorMap.put(part, new LinkedList<DataEvent<JSONObject>>()); } if (pluggableSearchEngineManager != null && pluggableSearchEngineManager.getOldestVersion() != null && !("".equals(pluggableSearchEngineManager.getOldestVersion()))) { updateOldestSinceKey(pluggableSearchEngineManager.getOldestVersion()); } _dataProvider = buildDataProvider(); if (_dataProvider!=null){ _dataProvider.setDataConsumer(consumer); } } @Override public DataProvider<JSONObject> getDataProvider() { return _dataProvider; } private StreamDataProvider<JSONObject> buildDataProvider() throws ConfigurationException{ StreamDataProvider<JSONObject> dataProvider = null; if (_gateway!=null){ try{ dataProvider = _gateway.buildDataProvider(_senseiSchema, _oldestSinceKey,_shardingStrategy,_zoieSystemMap.keySet()); long maxEventsPerMin = _myconfig.getLong(EVTS_PER_MIN,40000); dataProvider.setMaxEventsPerMinute(maxEventsPerMin); int batchSize = _myconfig.getInt(BATCH_SIZE,1); dataProvider.setBatchSize(batchSize); } catch(Exception e){ throw new ConfigurationException(e.getMessage(),e); } try { StandardMBean dataProviderMbean = new StandardMBean(new DataProviderAdmin(dataProvider), DataProviderAdminMBean.class); JmxUtil.registerMBean(dataProviderMbean, "indexing-manager","stream-data-provider"); } catch (Exception e) { logger.error(e.getMessage(),e); } } return dataProvider; } @Override public void shutdown() { if (pluggableSearchEngineManager != null) { pluggableSearchEngineManager.close(); } if (_dataProvider!=null){ _dataProvider.stop(); } if (_providerBatchSizeMeter != null) { _providerBatchSizeMeter.stop(); } if (_updateBatchSizeMeter != null) { _updateBatchSizeMeter.stop(); } if (_indexSizeMeter != null) { _indexSizeMeter.stop(); } if (_eventMeter != null) { _eventMeter.stop(); } } @Override public void start() throws Exception { if (_dataProvider==null){ logger.warn("no data stream configured, no indexing events are flowing."); } else{ _providerBatchSizeMeter = registerMeter("provider-batch-size", "provide-batch-size"); _updateBatchSizeMeter = registerMeter("update-batch-size", "update-batch-size"); _eventMeter = registerMeter("indexing-events", "indexing-events"); _indexSizeMeter = registerMeter("index-size", "index-size"); _indexingLatencyTimer = registerTimer("indexing-latency"); _dataProvider.start(); } } @Override public void syncWithVersion(long timeToWait, String version) throws ZoieException { Iterator<Integer> itr = _zoieSystemMap.keySet().iterator(); while (itr.hasNext()) { int part_num = itr.next(); Zoie<BoboIndexReader,JSONObject> dataConsumer = _zoieSystemMap.get(part_num); if (dataConsumer != null) { dataConsumer.syncWithVersion(timeToWait, version); } } } private class DataDispatcher implements DataConsumer<JSONObject> { int _maxPartitionId; // the total number of partitions over all the nodes; private final String _uidField; private volatile String _currentVersion; public DataDispatcher(int maxPartitionId,String uidField){ _maxPartitionId = maxPartitionId; _uidField = uidField; _currentVersion = null; } private void reportIndexingLatency(JSONObject obj) { if (_eventCreatedTimestampField != null) { long createdTimestamp = obj.optLong(_eventCreatedTimestampField); if (createdTimestamp > 0) { _indexingLatencyTimer.update(System.currentTimeMillis() - createdTimestamp, TimeUnit.MILLISECONDS); } } } private JSONObject rewriteData(JSONObject obj, int partNum) { String type = obj.optString(SenseiSchema.EVENT_TYPE_FIELD, null); JSONObject event = obj.optJSONObject(SenseiSchema.EVENT_FIELD); if (event == null) event = obj; else if (type != null) { try { event.put(SenseiSchema.EVENT_TYPE_FIELD, type); } catch(Exception e) { logger.error("Should never happen", e); } } reportIndexingLatency(event); if (SenseiSchema.EVENT_TYPE_UPDATE.equalsIgnoreCase(type)) { Zoie<BoboIndexReader, JSONObject> zoie = _zoieSystemMap.get(partNum); List<ZoieIndexReader<BoboIndexReader>> readers; try { readers = zoie.getIndexReaders(); } catch(Exception e) { logger.error(e.getMessage(), e); return null; } if (readers == null) { logger.error("Cannot found original doc for and update event: " + obj); return null; } try { byte[] src = null; long uid = Long.parseLong(event.getString(_senseiSchema.getUidField())); for (ZoieIndexReader<BoboIndexReader> reader : readers) { src = reader.getStoredValue(uid); if (src != null) break; } byte[] data = null; if (_senseiSchema.isCompressSrcData()) data = DefaultJsonSchemaInterpreter.decompress(src); else data = src; if (data == null) { logger.error("Cannot found original doc for and update event: " + obj); return null; } JSONObject newEvent = new FastJSONObject(new String(data, "UTF-8")); Iterator<String> keys = event.keys(); while(keys.hasNext()) { String key = keys.next(); newEvent.put(key, event.get(key)); } event = newEvent; } catch (Exception e) { logger.error(e.getMessage(), e); return null; } finally { zoie.returnIndexReaders(readers); } } return event; } @Override public void consume(Collection<proj.zoie.api.DataConsumer.DataEvent<JSONObject>> data) throws ZoieException { _updateBatchSizeMeter.mark(data.size()); _providerBatchSizeMeter.mark(_dataProvider.getBatchSize()); _eventMeter.mark(_dataProvider.getEventCount()); try{ for(DataEvent<JSONObject> dataEvt : data){ JSONObject obj = dataEvt.getData(); if (obj == null) // Just ignore this event. continue; String version = dataEvt.getVersion(); _currentVersion = (_versionComparator.compare(_currentVersion, version) < 0) ? version : _currentVersion; if (pluggableSearchEngineManager != null && pluggableSearchEngineManager.acceptEventsForAllPartitions()) { obj = pluggableSearchEngineManager.update(obj, _currentVersion); } int routeToPart = _shardingStrategy.caculateShard(_maxPartitionId, obj); Collection<DataEvent<JSONObject>> partDataSet = _dataCollectorMap.get(routeToPart); if (partDataSet != null) { JSONObject rewrited = obj; if (pluggableSearchEngineManager != null && !pluggableSearchEngineManager.acceptEventsForAllPartitions()) { rewrited = pluggableSearchEngineManager.update(obj, dataEvt.getVersion()); } rewrited = rewriteData(obj, routeToPart); if (rewrited != null) { if (rewrited != obj) dataEvt = new DataEvent<JSONObject>(rewrited, dataEvt.getVersion(), dataEvt.getWeight()); partDataSet.add(dataEvt); } } } long indexSize = 0; long now = System.currentTimeMillis(); boolean measureIndexSize = now - _lastMeasureTime > MEASURE_INTERVAL ? true : false; _lastMeasureTime = now; Iterator<Integer> it = _zoieSystemMap.keySet().iterator(); while(it.hasNext()){ int part_num = it.next(); Zoie<BoboIndexReader,JSONObject> dataConsumer = _zoieSystemMap.get(part_num); if (dataConsumer!=null){ LinkedList<DataEvent<JSONObject>> partDataSet = (LinkedList<DataEvent<JSONObject>>) _dataCollectorMap.get(part_num); if (partDataSet != null) { if (partDataSet.size() == 0) { JSONObject markerObj = new FastJSONObject(); //markerObj.put(_senseiSchema.getSkipField(), "true"); markerObj.put(SenseiSchema.EVENT_TYPE_FIELD, SenseiSchema.EVENT_TYPE_SKIP); markerObj.put(_uidField, 0L); // Add a dummy uid partDataSet.add(new DataEvent<JSONObject>(markerObj, _currentVersion)); } else if (_currentVersion != null && !_currentVersion.equals(partDataSet.getLast().getVersion())) { DataEvent<JSONObject> last = partDataSet.pollLast(); partDataSet.add(new DataEvent<JSONObject>(last.getData(), _currentVersion, last.getWeight())); } dataConsumer.consume(partDataSet); } if (measureIndexSize) indexSize += dataConsumer.getAdminMBean().getDiskIndexSizeBytes(); } _dataCollectorMap.put(part_num, new LinkedList<DataEvent<JSONObject>>()); if (measureIndexSize) _indexSizeMeter.mark(indexSize); } } catch(Exception e){ throw new ZoieException(e.getMessage(),e); } } @Override public String getVersion() { return _currentVersion; } @Override public Comparator<String> getVersionComparator() { return _versionComparator; } } }