/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.usergrid.corepersistence.pipeline.read.search; import java.util.ArrayList; import java.util.List; import java.util.UUID; import org.apache.usergrid.corepersistence.index.IndexLocationStrategyFactory; import org.apache.usergrid.persistence.index.*; import org.apache.usergrid.persistence.index.impl.IndexProducer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.usergrid.corepersistence.pipeline.read.AbstractFilter; import org.apache.usergrid.corepersistence.pipeline.read.FilterResult; import org.apache.usergrid.persistence.collection.EntityCollectionManager; import org.apache.usergrid.persistence.collection.EntityCollectionManagerFactory; import org.apache.usergrid.persistence.collection.MvccLogEntry; import org.apache.usergrid.persistence.collection.VersionSet; import org.apache.usergrid.persistence.core.scope.ApplicationScope; import org.apache.usergrid.persistence.model.entity.Id; import com.fasterxml.uuid.UUIDComparator; import com.google.inject.Inject; import rx.Observable; /** * Responsible for verifying candidate result versions, then emitting the Ids of these versions Input is a batch of * candidate results, output is a stream of validated Ids */ public class CandidateIdFilter extends AbstractFilter<FilterResult<Candidate>, FilterResult<Id>> { private final EntityCollectionManagerFactory entityCollectionManagerFactory; private final EntityIndexFactory entityIndexFactory; private final IndexLocationStrategyFactory indexLocationStrategyFactory; private final IndexProducer indexProducer; @Inject public CandidateIdFilter( final EntityCollectionManagerFactory entityCollectionManagerFactory, final EntityIndexFactory entityIndexFactory, final IndexLocationStrategyFactory indexLocationStrategyFactory, final IndexProducer indexProducer) { this.entityCollectionManagerFactory = entityCollectionManagerFactory; this.entityIndexFactory = entityIndexFactory; this.indexLocationStrategyFactory = indexLocationStrategyFactory; this.indexProducer = indexProducer; } @Override public Observable<FilterResult<Id>> call( final Observable<FilterResult<Candidate>> filterResultObservable ) { /** * A bit kludgy from old 1.0 -> 2.0 apis. Refactor this as we clean up our lower levels and create new results * objects */ final ApplicationScope applicationScope = pipelineContext.getApplicationScope(); final EntityCollectionManager entityCollectionManager = entityCollectionManagerFactory.createCollectionManager( applicationScope ); final EntityIndex applicationIndex = entityIndexFactory.createEntityIndex(indexLocationStrategyFactory.getIndexLocationStrategy(applicationScope)); final Observable<FilterResult<Id>> searchIdSetObservable = filterResultObservable.buffer( pipelineContext.getLimit() ).flatMap( candidateResults -> { //flatten toa list of ids to load final Observable<List<Id>> candidateIds = Observable.from( candidateResults ).map( candidate -> candidate.getValue().getCandidateResult().getId() ).toList(); //load the ids final Observable<VersionSet> versionSetObservable = candidateIds.flatMap( ids -> entityCollectionManager.getLatestVersion( ids ) ); //now we have a collection, validate our canidate set is correct. return versionSetObservable.map( entitySet -> new EntityCollector( applicationIndex.createBatch(), entitySet, candidateResults, indexProducer ) ).doOnNext( entityCollector -> entityCollector.merge() ).flatMap( entityCollector -> Observable.from( entityCollector.collectResults() ) ); } ); return searchIdSetObservable; } /** * Map a new cp entity to an old entity. May be null if not present */ private static final class EntityCollector { private static final Logger logger = LoggerFactory.getLogger( EntityCollector.class ); private List<FilterResult<Id>> results = new ArrayList<>(); private final EntityIndexBatch batch; private final List<FilterResult<Candidate>> candidateResults; private final IndexProducer indexProducer; private final VersionSet versionSet; public EntityCollector( final EntityIndexBatch batch, final VersionSet versionSet, final List<FilterResult<Candidate>> candidateResults, final IndexProducer indexProducer ) { this.batch = batch; this.versionSet = versionSet; this.candidateResults = candidateResults; this.indexProducer = indexProducer; this.results = new ArrayList<>( versionSet.size() ); } /** * Merge our candidates and our entity set into results */ public void merge() { for ( final FilterResult<Candidate> candidateResult : candidateResults ) { validate( candidateResult ); } indexProducer.put( batch.build()).toBlocking().lastOrDefault(null);//want to rethrow if batch fails } public List<FilterResult<Id>> collectResults() { return results; } /** * Validate each candidate results vs the data loaded from cass */ private void validate( final FilterResult<Candidate> filterCandidate ) { final CandidateResult candidateResult = filterCandidate.getValue().getCandidateResult(); final SearchEdge searchEdge = filterCandidate.getValue().getSearchEdge(); final MvccLogEntry logEntry = versionSet.getMaxVersion( candidateResult.getId() ); final UUID candidateVersion = candidateResult.getVersion(); final UUID entityVersion = logEntry.getVersion(); final Id entityId = logEntry.getEntityId(); //entity is newer than ES version if ( UUIDComparator.staticCompare( entityVersion, candidateVersion ) > 0 ) { logger.warn( "Deindexing stale entity on edge {} for entityId {} and version {}", searchEdge, entityId, entityVersion ); batch.deindex( searchEdge, entityId, entityVersion ); return; } //ES is newer than cass, it means we haven't repaired the record in Cass, we don't want to //remove the ES record, since the read in cass should cause a read repair, just ignore if ( UUIDComparator.staticCompare( candidateVersion, entityVersion ) > 0 ) { logger.warn( "Found a newer version in ES over cassandra for edge {} for entityId {} and version {}. Repair should be run", searchEdge, entityId, entityVersion ); } //they're the same add it final FilterResult<Id> result = new FilterResult<>( entityId, filterCandidate.getPath() ); results.add( result ); } } }