package org.lilyproject.indexer.hbase.mapper; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.base.Predicate; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.ngdata.hbaseindexer.Configurable; import com.ngdata.hbaseindexer.parse.SolrUpdateWriter; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrInputDocument; import org.lilyproject.client.LilyClient; import org.lilyproject.hbaseindex.IndexNotFoundException; import org.lilyproject.indexer.derefmap.DependantRecordIdsIterator; import org.lilyproject.indexer.derefmap.DependencyEntry; import org.lilyproject.indexer.derefmap.DerefMap; import org.lilyproject.indexer.derefmap.DerefMapHbaseImpl; import org.lilyproject.indexer.engine.SolrDocumentBuilder; import org.lilyproject.indexer.engine.ValueEvaluator; import org.lilyproject.indexer.model.api.LResultToSolrMapper; import org.lilyproject.indexer.model.indexerconf.DynamicFieldNameTemplateResolver; import org.lilyproject.indexer.model.indexerconf.DynamicIndexField; import org.lilyproject.indexer.model.indexerconf.IndexCase; import org.lilyproject.indexer.model.indexerconf.IndexField; import org.lilyproject.indexer.model.indexerconf.IndexerConfException; import org.lilyproject.indexer.model.indexerconf.LilyIndexerConf; import org.lilyproject.indexer.model.indexerconf.MappingNode; import org.lilyproject.indexer.model.util.IndexRecordFilterUtil; import org.lilyproject.repository.api.AbsoluteRecordId; import org.lilyproject.repository.api.FieldType; import org.lilyproject.repository.api.FieldTypeNotFoundException; import org.lilyproject.repository.api.IdGenerator; import org.lilyproject.repository.api.IdRecord; import org.lilyproject.repository.api.LRepository; import org.lilyproject.repository.api.LTable; import org.lilyproject.repository.api.Record; import org.lilyproject.repository.api.RecordId; import org.lilyproject.repository.api.RecordNotFoundException; import org.lilyproject.repository.api.RepositoryException; import org.lilyproject.repository.api.RepositoryManager; import org.lilyproject.repository.api.SchemaId; import org.lilyproject.repository.api.Scope; import org.lilyproject.repository.api.ValueType; import org.lilyproject.repository.api.VersionNotFoundException; import org.lilyproject.repository.impl.RecordDecoder; import org.lilyproject.repository.impl.id.AbsoluteRecordIdImpl; import org.lilyproject.repository.impl.id.SchemaIdImpl; import org.lilyproject.sep.LilyEventPublisherManager; import org.lilyproject.util.Pair; import org.lilyproject.util.hbase.HBaseTableFactory; import org.lilyproject.util.hbase.HBaseTableFactoryImpl; import org.lilyproject.util.hbase.LilyHBaseSchema; import org.lilyproject.util.hbase.RepoAndTableUtil; import org.lilyproject.util.io.Closer; import org.lilyproject.util.repo.RecordEvent; import org.lilyproject.util.repo.RecordEventHelper; import org.lilyproject.util.repo.VTaggedRecord; import org.lilyproject.util.zookeeper.ZooKeeperItf; import static org.lilyproject.util.repo.RecordEvent.Type.CREATE; import static org.lilyproject.util.repo.RecordEvent.Type.DELETE; import static org.lilyproject.util.repo.RecordEvent.Type.INDEX; public class LilyResultToSolrMapper implements LResultToSolrMapper,Configurable { private final Log log = LogFactory.getLog(getClass()); private String repositoryName; private String indexName; private ZooKeeperItf zooKeeperItf; private RecordDecoder recordDecoder; private RepositoryManager repositoryManager; private LRepository repository; private IdGenerator idGenerator; private LilyIndexerConf lilyIndexerConf; private ValueEvaluator valueEvaluator; private DerefMap derefMap; private LilyEventPublisherManager eventPublisherManager; private String subscriptionId; private boolean enableDerefMap = true; public LilyResultToSolrMapper(String indexName, LilyIndexerConf lilyIndexerConf, RepositoryManager repositoryManager, ZooKeeperItf zooKeeperItf) { setIndexName(indexName); this.repositoryManager = repositoryManager; this.lilyIndexerConf = lilyIndexerConf; this.zooKeeperItf = zooKeeperItf; } @Override public void configure(Map<String, String> params) { try { String repoParam = Optional.fromNullable(params.get(LResultToSolrMapper.REPO_KEY)).or(RepoAndTableUtil.DEFAULT_REPOSITORY); setRepositoryName(repoParam); enableDerefMap = Boolean.parseBoolean(Optional.fromNullable(params.get(LResultToSolrMapper.ENABLE_DEREFMAP_KEY)).or("true")); init(); } catch (Exception e) { log.error(e); } } protected void init () throws RepositoryException, InterruptedException, IndexerConfException, IndexNotFoundException, IOException { repository = repositoryManager.getRepository(repositoryName != null ? repositoryName : RepoAndTableUtil.DEFAULT_REPOSITORY); idGenerator = repository.getIdGenerator(); valueEvaluator = new ValueEvaluator(lilyIndexerConf); recordDecoder = new RecordDecoder(repository.getTypeManager(), repository.getIdGenerator(), repository.getRecordFactory()); if (lilyIndexerConf.containsDerefExpressions() && enableDerefMap) { HBaseTableFactory tableFactory = new HBaseTableFactoryImpl(LilyClient.getHBaseConfiguration(zooKeeperItf)); eventPublisherManager = new LilyEventPublisherManager(tableFactory); derefMap = DerefMapHbaseImpl.create(repository.getRepositoryName(), indexName, LilyClient.getHBaseConfiguration(zooKeeperItf), null, repository.getIdGenerator()); } } public void stop () { Closer.close(eventPublisherManager); Closer.close(repository); Closer.close(repositoryManager); Closer.close(zooKeeperItf); } @Override public boolean containsRequiredData(Result result) { try { // If we have a request to reindex then all the information we need should be in the payload if (result.containsColumn(LilyHBaseSchema.RecordCf.DATA.bytes, LilyHBaseSchema.RecordColumn.PAYLOAD.bytes)) { RecordEvent event = new RecordEvent(result.getFamilyMap(LilyHBaseSchema.RecordCf.DATA.bytes) .get(LilyHBaseSchema.RecordColumn.PAYLOAD.bytes), idGenerator); return event.getType().equals(INDEX); } } catch (IOException e) { log.error("Unable to decode record event", e); } // in other cases force a get for safe measure. return false; } @Override public boolean isRelevantKV(final KeyValue kv) { if (!Arrays.equals(LilyHBaseSchema.RecordCf.DATA.bytes,kv.getFamily())) { return false; } final byte[] qualifier = kv.getQualifier(); if (qualifier[0] == LilyHBaseSchema.RecordColumn.DATA_PREFIX) { SchemaId fieldId = new SchemaIdImpl(Bytes.tail(qualifier, qualifier.length - 1)); FieldType fieldType = null; try { fieldType = repository.getTypeManager().getFieldTypeById(fieldId); } catch (IllegalArgumentException e) { // no qualifier return false; } catch (FieldTypeNotFoundException e) { // field doesn't exist return false; } catch (RepositoryException e) { log.warn(e); } catch (InterruptedException e) { log.warn(e); } if (fieldType == null) { return false; } StaticFieldTypeFinder finder = new StaticFieldTypeFinder(fieldType); lilyIndexerConf.getIndexFields().visitAll(finder); if (finder.foundRelevant) { return true; } for (DynamicIndexField indexField : lilyIndexerConf.getDynamicFields()) { if (indexField.matches(fieldType).match) { return true; } } } else if (qualifier[0] == LilyHBaseSchema.RecordColumn.SYSTEM_PREFIX) { List<LilyHBaseSchema.RecordColumn> columns = Arrays.asList(LilyHBaseSchema.RecordColumn.values()); Collection<LilyHBaseSchema.RecordColumn> filtered = Collections2.filter(columns, new Predicate<LilyHBaseSchema.RecordColumn>() { @Override public boolean apply(LilyHBaseSchema.RecordColumn input) { return Arrays.equals(input.bytes, qualifier); } }); return !filtered.isEmpty(); } else { // is this a lily key value? } return false; } @Override public Get getGet(byte[] row) { return new Get(row); } @Override public void map(Result result, SolrUpdateWriter solrUpdateWriter) { try { Record record = recordDecoder.decodeRecord(result); RecordEvent event = new RecordEvent(result.getFamilyMap(LilyHBaseSchema.RecordCf.DATA.bytes) .get(LilyHBaseSchema.RecordColumn.PAYLOAD.bytes), idGenerator); String tableName = event.getTableName(); LTable table = repository.getTable(tableName != null ? tableName : LilyHBaseSchema.Table.RECORD.name); if (event.getType().equals(INDEX)) { if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: reindex requested for these vtags: %2$s", record.getId(), vtagSetToNameString(event.getVtagsToIndex()))); } RecordEventHelper eventHelper = new RecordEventHelper(event, null, repository.getTypeManager()); VTaggedRecord vtRecord = new VTaggedRecord(record.getId(), eventHelper, table, repository); IndexCase indexCase = lilyIndexerConf.getIndexCase(table.getTableName(), vtRecord.getRecord()); Set<SchemaId> vtagsToIndex = event.getVtagsToIndex(); if (indexCase == null) { return; } // Only keep vtags which should be indexed vtagsToIndex.retainAll(indexCase.getVersionTags()); // Only keep vtags which exist on the record vtagsToIndex.retainAll(vtRecord.getVTags().keySet()); log.debug(vtagsToIndex.toString()); index(table, vtRecord, vtagsToIndex, solrUpdateWriter); } else if (event.getType().equals(DELETE)) { solrUpdateWriter.deleteByQuery("lily.id:" + ClientUtils.escapeQueryChars(record.getId().toString())); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: deleted from index (if present) because of " + "delete record event", record.getId())); } // After this we can go to update denormalized data if (derefMap != null) { updateDenormalizedData(repository.getRepositoryName(), event.getTableName(), record.getId(), null, null); } } else { // CREATE or UPDATE VTaggedRecord vtRecord; // Based on the partial old/new record state stored in the RecordEvent, determine whether we // now match a different IndexCase than before, and if so, if the new case would have less vtags // than the old one, perform the necessary deletes on Solr. Pair<Record,Record> oldAndNewRecords = IndexRecordFilterUtil.getOldAndNewRecordForRecordFilterEvaluation(record.getId(), event, repository); Record oldRecord = oldAndNewRecords.getV1(); Record newRecord = oldAndNewRecords.getV2(); IndexCase caseOld = oldRecord != null ? lilyIndexerConf.getIndexCase( event.getTableName(), oldRecord) : null; IndexCase caseNew = newRecord != null ? lilyIndexerConf.getIndexCase( event.getTableName(), newRecord) : null; if (oldRecord != null && newRecord != null) { if (caseOld != null && caseNew != null) { Set<SchemaId> droppedVtags = new HashSet<SchemaId>(caseOld.getVersionTags()); droppedVtags.removeAll(caseNew.getVersionTags()); if (droppedVtags.size() > 0) { // Perform deletes for (SchemaId vtag : droppedVtags) { solrUpdateWriter.deleteById(LilyResultToSolrMapper.getIndexId(tableName, record.getId(), vtag)); } } } } // This is an optimization: an IndexCase with empty vtags list means that this record is // included in this index only to trigger updating of denormalized data. boolean doIndexing = true; if (caseNew != null) { doIndexing = caseNew.getVersionTags().size() > 0; } else if (caseNew == null && caseOld != null) { // caseNew == null means either the record has been deleted, or means the record does // not match the recordFilter anymore. In either case, we only need to trigger update // of denormalized data (if the vtags list was empty on caseOld). doIndexing = caseOld.getVersionTags().size() > 0; } RecordEventHelper eventHelper = new RecordEventHelper(event, null, repository.getTypeManager()); if (doIndexing) { try { // Read the vtags of the record. Note that while this algorithm is running, the record can // meanwhile undergo changes. However, we continuously work with the snapshot of the vtags // mappings read here. The processing of later events will bring the index up to date with // any new changes. vtRecord = new VTaggedRecord(record.getId(), eventHelper, repository.getTable(event.getTableName()), repository); } catch (RecordNotFoundException e) { // The record has been deleted in the meantime. // For now, we do nothing, when the delete event is received the record will be removed // from the index (as well as update of denormalized data). // TODO: we should process all outstanding messages for the record (up to delete) in one go return; } handleRecordCreateUpdate(vtRecord, table, solrUpdateWriter); } if (derefMap != null) { updateDenormalizedData(repository.getRepositoryName(), event.getTableName(), record.getId(), eventHelper.getUpdatedFieldsByScope(), eventHelper.getModifiedVTags()); } } } catch (Exception e) { log.warn("Something went wrong while indexing", e); } } private void updateDenormalizedData(String repo, String table, RecordId recordId, Map<Scope, Set<FieldType>> updatedFieldsByScope, Set<SchemaId> changedVTagFields) throws RepositoryException, InterruptedException, IOException { Multimap<AbsoluteRecordId, SchemaId> referrersAndVTags = ArrayListMultimap.create(); Set<SchemaId> allVTags = lilyIndexerConf.getVtags(); if (log.isDebugEnabled()) { log.debug("Updating denormalized data for " + recordId + ", vtags: " + changedVTagFields); } // The reason to iterate over all vtags is because a field from a record without versions might be // dereferenced into multiple vtagged versions of another record, and we don't know what the [indexed] // vtags of that other record are. for (SchemaId vtag : allVTags) { if ((changedVTagFields != null && changedVTagFields.contains(vtag)) || updatedFieldsByScope == null) { // changed vtags or delete: reindex regardless of fields AbsoluteRecordId absRecordId = new AbsoluteRecordIdImpl(table, recordId); DependantRecordIdsIterator dependants = derefMap.findDependantsOf(absRecordId); if (log.isDebugEnabled()) { log.debug("changed vtag: dependants of " + recordId + ": " + depIds(derefMap.findDependantsOf(absRecordId))); } while (dependants.hasNext()) { referrersAndVTags.put(dependants.next(), vtag); } } else { // vtag didn't change, but some fields did change: Set<SchemaId> fields = new HashSet<SchemaId>(); for (Scope scope : updatedFieldsByScope.keySet()) { fields.addAll(toSchemaIds(updatedFieldsByScope.get(scope))); } AbsoluteRecordId absRecordId = new AbsoluteRecordIdImpl(table, recordId); final DependantRecordIdsIterator dependants = derefMap.findDependantsOf(absRecordId, fields, vtag); while (dependants.hasNext()) { referrersAndVTags.put(dependants.next(), vtag); } } } if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: found %2$s records (times vtags) to be updated because they " + "might contain outdated denormalized data." + " The records are: %3$s", recordId, referrersAndVTags.keySet().size(), referrersAndVTags.keySet())); } // // Now add an index message to each of the found referrers, their actual indexing // will be triggered by the message queue. // for (AbsoluteRecordId referrer : referrersAndVTags.keySet()) { RecordEvent payload = new RecordEvent(); payload.setTableName(referrer.getTable()); payload.setType(INDEX); for (SchemaId vtag : referrersAndVTags.get(referrer)) { payload.addVTagToIndex(vtag); } RecordEvent.IndexRecordFilterData filterData = new RecordEvent.IndexRecordFilterData(); filterData.setSubscriptionInclusions(ImmutableSet.of(this.subscriptionId)); payload.setIndexRecordFilterData(filterData); try { eventPublisherManager.getEventPublisher(repo, referrer.getTable() ).publishEvent(referrer.getRecordId().toBytes(), payload.toJsonBytes()); } catch (Exception e) { // We failed to put the message: this is pretty important since it means the record's index // won't get updated, therefore log as error, but after this we continue with the next one. log.error("Error putting index message on queue of record " + referrer, e); //metrics.errors.inc(); } //metrics.lastReindexRequestedTimestamp.set(System.currentTimeMillis()); } } private Set<SchemaId> toSchemaIds(Set<FieldType> fieldTypes) { return new HashSet<SchemaId>(Collections2.transform(fieldTypes, new Function<FieldType, SchemaId>() { @Override public SchemaId apply(FieldType input) { return input.getId(); } })); } private List<AbsoluteRecordId> depIds(DependantRecordIdsIterator dependants) throws IOException { List<AbsoluteRecordId> recordIds = Lists.newArrayList(); while (dependants.hasNext()) { recordIds.add(dependants.next()); } return recordIds; } @Override public LRepository getRepository() { return this.repository; } protected static String getIndexId(String table, RecordId recordId, SchemaId vtag) { return table + "-" + recordId + "-" + vtag.toString(); } private String evalName(DynamicIndexField dynField, DynamicIndexField.DynamicIndexFieldMatch match, FieldType fieldType) { // Calculate the name, then add the value Map<String, Object> nameContext = new HashMap<String, Object>(); nameContext.put("namespace", fieldType.getName().getNamespace()); nameContext.put("name", fieldType.getName().getName()); ValueType valueType = fieldType.getValueType(); nameContext.put("type", formatValueTypeName(valueType)); nameContext.put("baseType", valueType.getBaseName().toLowerCase()); // If there's no nested value type, revert to the current value type. This is practical for dynamic // fields that match on types like "*,LIST<+>". ValueType nestedValueType = valueType.getNestedValueType() != null ? valueType.getNestedValueType() : valueType; nameContext.put("nestedType", formatValueTypeName(nestedValueType)); nameContext.put("nestedBaseType", nestedValueType.getBaseName().toLowerCase()); nameContext.put("deepestNestedBaseType", valueType.getDeepestValueType().getBaseName().toLowerCase()); boolean isList = valueType.getBaseName().equals("LIST"); nameContext.put("multiValue", isList); nameContext.put("list", isList); if (match.nameMatch != null) { nameContext.put("nameMatch", match.nameMatch); } if (match.namespaceMatch != null) { nameContext.put("namespaceMatch", match.namespaceMatch); } return dynField.getNameTemplate().format(new DynamicFieldNameTemplateResolver(nameContext)); } private String formatValueTypeName(ValueType valueType) { StringBuilder builder = new StringBuilder(); while (valueType != null) { if (builder.length() > 0) { builder.append("_"); } builder.append(valueType.getBaseName().toLowerCase()); valueType = valueType.getNestedValueType(); } return builder.toString(); } private void processDependencies(String table, Record record, SchemaId vtag, SolrDocumentBuilder solrDocumentBuilder) throws IOException, RepositoryException, InterruptedException { if (log.isDebugEnabled()) { log.debug("Constructed Solr doc: " + solrDocumentBuilder.build()); log.debug("Updating dependencies for " + record.getId()); logDependencies(record.getId(), solrDocumentBuilder.getDependencies()); } if (derefMap != null) { derefMap.updateDependants(new AbsoluteRecordIdImpl(table, record.getId()), vtag, solrDocumentBuilder.getDependencies()); } } private void logDependencies(RecordId recordId, Map<DependencyEntry, Set<SchemaId>> dependencies) { for (DependencyEntry entry : dependencies.keySet()) { StringBuilder line = new StringBuilder(); line.append(recordId).append(" | ") .append(entry.getDependency()) .append("|").append(entry.getMoreDimensionedVariants()) .append("|").append(dependencies.get(entry)); log.debug(line.toString()); } } private void handleRecordCreateUpdate(VTaggedRecord vtRecord, LTable table, SolrUpdateWriter solrUpdateWriter) throws Exception { RecordEvent event = vtRecord.getRecordEvent(); Map<Long, Set<SchemaId>> vtagsByVersion = vtRecord.getVTagsByVersion(); // Determine the IndexCase: // The indexing of all versions is determined by the record type of the non-versioned scope. // This makes that the indexing behavior of all versions is equal, and can be changed (the // record type of the versioned scope is immutable). IndexCase indexCase = lilyIndexerConf.getIndexCase(table.getTableName(), vtRecord.getRecord()); if (indexCase == null) { // The record should not be indexed // The record might however have been previously indexed, therefore we need to perform a delete // (note that IndexAwareMQFeeder only sends events to us if either the current or old record // state matched the inclusion rules) solrUpdateWriter.deleteByQuery("lily.id:" + ClientUtils.escapeQueryChars(vtRecord.getId().toString())); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: no index case found, record deleted from index.", vtRecord.getId())); } // The data from this record might be denormalized into other index entries // After this we go to update denormalized data } else { final Set<SchemaId> vtagsToIndex; if (event.getType().equals(CREATE)) { // New record: just index everything vtagsToIndex = Sets.intersection(indexCase.getVersionTags(), vtRecord.getVTags().keySet()); //vtagsToIndex = indexer.retainExistingVtagsOnly(indexCase.getVersionTags(), vtRecord); // After this we go to the indexing } else if (event.getRecordTypeChanged()) { // When the record type changes, the rules to index (= the IndexCase) change // Delete everything: we do not know the previous record type, so we do not know what // version tags were indexed, so we simply delete everything solrUpdateWriter.deleteByQuery("lily.id:" + ClientUtils.escapeQueryChars(vtRecord.getId().toString())); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: deleted existing entries from index (if present) " + "because of record type change", vtRecord.getId())); } // Reindex all needed vtags vtagsToIndex = Sets.intersection(indexCase.getVersionTags(), vtRecord.getVTags().keySet()); // After this we go to the indexing } else { // a normal update // // Handle changes to non-versioned fields // if (lilyIndexerConf.changesAffectIndex(vtRecord, Scope.NON_VERSIONED)) { vtagsToIndex = Sets.intersection(indexCase.getVersionTags(), vtRecord.getVTags().keySet()); // After this we go to the treatment of changed vtag fields if (log.isDebugEnabled()) { log.debug( String.format("Record %1$s: non-versioned fields changed, will reindex all vtags.", vtRecord.getId())); } } else { vtagsToIndex = new HashSet<SchemaId>(); } // // Handle changes to versioned(-mutable) fields // // If there were non-versioned fields changed, then we already reindex all versions // so this can be skipped. // // In the case of newly created versions that should be indexed: this will often be // accompanied by corresponding changes to vtag fields, which are handled next, and in which case // it would work as well if this code would not be here. // if (vtagsToIndex.isEmpty() && (event.getVersionCreated() != -1 || event.getVersionUpdated() != -1)) { if (lilyIndexerConf.changesAffectIndex(vtRecord, Scope.VERSIONED) || lilyIndexerConf.changesAffectIndex(vtRecord, Scope.VERSIONED_MUTABLE)) { long version = event.getVersionCreated() != -1 ? event.getVersionCreated() : event.getVersionUpdated(); if (vtagsByVersion.containsKey(version)) { Set<SchemaId> tmp = new HashSet<SchemaId>(); tmp.addAll(indexCase.getVersionTags()); tmp.retainAll(vtagsByVersion.get(version)); vtagsToIndex.addAll(tmp); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: versioned(-mutable) fields changed, will " + "index for all tags of modified version %2$s that require indexing: %3$s", vtRecord.getId(), version, Joiner.on(",").join(vtagsToIndex))); } } } } // // Handle changes to vtag fields themselves // Map<SchemaId, Long> vtags = vtRecord.getVTags(); Set<SchemaId> changedVTagFields = vtRecord.getRecordEventHelper().getModifiedVTags(); // Remove the vtags which are going to be reindexed anyway changedVTagFields.removeAll(vtagsToIndex); for (SchemaId vtag : changedVTagFields) { if (vtags.containsKey(vtag) && indexCase.getVersionTags().contains(vtag)) { if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: will index for created or updated vtag %2$s", vtRecord.getId(), this.safeLoadTagName(vtag))); } vtagsToIndex.add(vtag); } else { // The vtag does not exist anymore on the document, or does not need to be indexed: delete from index solrUpdateWriter.deleteById(LilyResultToSolrMapper.getIndexId(table.getTableName(), vtRecord.getId(), vtag)); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s: deleted from index for deleted vtag %2$s", vtRecord.getId(), this.safeLoadTagName(vtag))); } } } } // // Index // this.index(table, vtRecord, vtagsToIndex, solrUpdateWriter); } } private Map<Long, Set<SchemaId>> getVtagsByVersion(Set<SchemaId> vtagsToIndex, Map<SchemaId, Long> vtags) { Map<Long, Set<SchemaId>> result = new HashMap<Long, Set<SchemaId>>(); for (SchemaId vtag : vtagsToIndex) { Long version = vtags.get(vtag); if (version != null) { Set<SchemaId> vtagsOfVersion = result.get(version); if (vtagsOfVersion == null) { vtagsOfVersion = new HashSet<SchemaId>(); result.put(version, vtagsOfVersion); } vtagsOfVersion.add(vtag); } } return result; } private void index(LTable table, VTaggedRecord vtRecord, Set<SchemaId> vtagsToIndex, SolrUpdateWriter solrUpdateWriter) throws Exception { IdRecord idRecord = vtRecord.getRecord(); Map<Long, Set<SchemaId>> vtagsToIndexByVersion = getVtagsByVersion(vtagsToIndex, vtRecord.getVTags()); for (Map.Entry<Long, Set<SchemaId>> entry : vtagsToIndexByVersion.entrySet()) { IdRecord version = null; try { version = vtRecord.getIdRecord(entry.getKey()); } catch (VersionNotFoundException e) { // ok } catch (RecordNotFoundException e) { // ok } if (version == null) { // If the version does not exist, we pro-actively delete it, though the IndexUpdater should // do this any way when it later receives a message about the delete. for (SchemaId vtag : entry.getValue()) { solrUpdateWriter.deleteById(LilyResultToSolrMapper.getIndexId(table.getTableName(), vtRecord.getId(), vtag)); } if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s, version %2$s: does not exist, deleted index" + " entries for vtags %3$s", vtRecord.getId().toString(), entry.getKey(), vtagSetToNameString(entry.getValue()))); } } else { for (SchemaId vtag : entry.getValue()) { SolrDocumentBuilder solrDocumentBuilder = new SolrDocumentBuilder(repository, lilyIndexerConf.getRecordFilter(), lilyIndexerConf.getSystemFields(), valueEvaluator, table.getTableName(), version, getIndexId(table.getTableName(), vtRecord.getId(), vtag), vtag, entry.getKey()); lilyIndexerConf.getIndexFields().collectIndexUpdate(solrDocumentBuilder); if (!lilyIndexerConf.getDynamicFields().isEmpty()) { for (Map.Entry<SchemaId, Object> field : idRecord.getFieldsById().entrySet()) { FieldType fieldType = repository.getTypeManager().getFieldTypeById(field.getKey()); for (DynamicIndexField dynField : lilyIndexerConf.getDynamicFields()) { DynamicIndexField.DynamicIndexFieldMatch match = dynField.matches(fieldType); if (match.match) { String fieldName = evalName(dynField, match, fieldType); List<String> values = valueEvaluator.format(table.getTableName(), vtRecord.getRecord(), fieldType, dynField.extractContext(), dynField.getFormatter(), repository); solrDocumentBuilder.addField(fieldName, values); if (!dynField.getContinue()) { // stop on first match, unless continue attribute is true break; } } } } } if (solrDocumentBuilder.isEmptyDocument()) { // No single field was added to the Solr document. // In this case we do not add it to the index. // Besides being somewhat logical, it should also be noted that if a record would not contain // any (modified) fields that serve as input to indexFields, we would never have arrived here // anyway. It is only because some fields potentially would resolve to a value (potentially: // because with deref-expressions we are never sure) that we did. // There can be a previous entry in the index which we should try to delete solrUpdateWriter.deleteById(LilyResultToSolrMapper.getIndexId(table.getTableName(), vtRecord.getId(), vtag)); //metrics.deletesById.inc(); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s, vtag %2$s: no index fields produced output, " + "removed from index if present", vtRecord.getId(), safeLoadTagName(vtag))); } processDependencies(table.getTableName(), idRecord, vtag, solrDocumentBuilder); } else { SolrInputDocument solrDoc = solrDocumentBuilder.build(); processDependencies(table.getTableName(), idRecord, vtag, solrDocumentBuilder); solrUpdateWriter.add(solrDoc); //log.debug("index response " + solrShardMgr.getSolrClient(record.getId()).add(solrDoc).toString()); //metrics.adds.inc(); if (log.isDebugEnabled()) { log.debug(String.format("Record %1$s, vtag %2$s: indexed, doc = %3$s", vtRecord.getId(), safeLoadTagName(vtag), solrDoc)); } } } } } } protected String vtagSetToNameString(Set<SchemaId> vtags) { StringBuilder builder = new StringBuilder(); for (SchemaId vtag : vtags) { if (builder.length() > 0) { builder.append(", "); } builder.append(safeLoadTagName(vtag)); } return builder.toString(); } private static class StaticFieldTypeFinder implements Predicate<MappingNode> { boolean foundRelevant = false; final FieldType fieldType; private StaticFieldTypeFinder(FieldType fieldType) { this.fieldType = fieldType; } @Override public boolean apply(MappingNode mappingNode) { if (mappingNode instanceof IndexField) { IndexField indexField = (IndexField)mappingNode; if (fieldType.equals(indexField.getValue().getTargetFieldType())) { foundRelevant = true; } } return !foundRelevant; } } /** * Lookup name of field type, for use in debug logs. Beware, this might be slow. */ protected String safeLoadTagName(SchemaId fieldTypeId) { if (fieldTypeId == null) { return "null"; } try { return this.repository.getTypeManager().getFieldTypeById(fieldTypeId).getName().getName(); } catch (Throwable t) { return "failed to load name"; } } protected void setRepositoryManager(RepositoryManager repositoryManager) { this.repositoryManager = repositoryManager; } protected void setRepositoryName(String repositoryName) { this.repositoryName = repositoryName; } protected void setIndexName(String indexName) { this.indexName = indexName; subscriptionId = "Indexer_" + indexName; } }