/* * Copyright (c) 2013-2017 Cinchapi Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cinchapi.concourse.server.storage.db; import static com.cinchapi.concourse.server.GlobalState.STOPWORDS; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import javax.annotation.concurrent.ThreadSafe; import com.cinchapi.concourse.annotate.DoNotInvoke; import com.cinchapi.concourse.annotate.PackagePrivate; import com.cinchapi.concourse.server.model.Position; import com.cinchapi.concourse.server.model.PrimaryKey; import com.cinchapi.concourse.server.model.Text; import com.cinchapi.concourse.server.model.Value; import com.cinchapi.concourse.server.storage.Action; import com.cinchapi.concourse.thrift.Type; import com.cinchapi.concourse.util.ConcurrentSkipListMultiset; import com.cinchapi.concourse.util.TStrings; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.collect.SortedMultiset; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * A Block that stores SearchRevision data to be used in a SearchRecord. * <p> * Text is indexed in a block such that that a value matches a query if it * contains a sequence of terms where each term or a substring of that term * matches the term in the same relative position of the query (i.e. if the * query is for 'fo ar' then value 'foo bar' will match, etc). * </p> * <p> * </p> * * @author Jeff Nelson */ @ThreadSafe @PackagePrivate final class SearchBlock extends Block<Text, Text, Position> { /** * The executor service that is responsible for multithread search indexing. * <p> * The executor is static (and therefore shared by each SearchBlock) because * only one search block at a time should be mutable and able to process * inserts. * </p> */ private static final ExecutorService indexer = Executors .newFixedThreadPool(Runtime.getRuntime().availableProcessors(), new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("Search Indexer" + " %d").build()); @SuppressWarnings("rawtypes") @Override protected SortedMultiset<Revision<Text, Text, Position>> createBackingStore( Comparator<Revision> comparator) { return ConcurrentSkipListMultiset.create(comparator); } /** * DO NOT CALL!! * * @param id * @param directory * @param diskLoad */ @PackagePrivate @DoNotInvoke SearchBlock(String id, String directory, boolean diskLoad) { super(id, directory, diskLoad); this.concurrent = true; } /** * DO NOT CALL. Use {@link #insert(Text, Value, PrimaryKey)} instead. */ @Override @DoNotInvoke public final SearchRevision insert(Text locator, Text key, Position value, long version, Action type) { throw new UnsupportedOperationException(); } /** * Insert a revision for {@code key} as {@code value} in {@code record} at * {@code version} * * @param key * @param value * @param record * @param version * @param type */ public final void insert(Text key, Value value, PrimaryKey record, long version, Action type) { Preconditions.checkState(mutable, "Cannot modify a block that is not mutable"); if(value.getType() == Type.STRING) { String string = value.getObject().toString().toLowerCase(); // CON-10 String[] toks = string .split(TStrings.REGEX_GROUP_OF_ONE_OR_MORE_WHITESPACE_CHARS); int pos = 0; List<Future<?>> futures = Lists.newArrayList(); for (String tok : toks) { futures.addAll(process(key, tok, pos, record, version, type)); ++pos; } for (Future<?> future : futures) { // wait for completion try { future.get(); } catch (ExecutionException | InterruptedException e) { throw Throwables.propagate(e); } } } } @Override protected SearchRevision makeRevision(Text locator, Text key, Position value, long version, Action type) { return Revision .createSearchRevision(locator, key, value, version, type); } @Override protected Class<SearchRevision> xRevisionClass() { return SearchRevision.class; } /** * Call super.{@link #insert(Text, Text, Position, long)} * * @param locator * @param key * @param value * @param version * @param type */ private final void doInsert(Text locator, Text key, Position value, long version, Action type) { super.insertUnsafe(locator, key, value, version, type); } /** * Calculate all possible substrings for {@code term} and submit a task to * the {@link #indexer} that will store a revision for the {@code term} at * {@code position} for {@code key} in {@code record} at {@code version}. * * @param key * @param term * @param position * @param record * @param version * @param type * @return {@link Future Futures} that can be used to wait for all the * submitted tasks to complete */ private List<Future<?>> process(final Text key, final String term, final int position, final PrimaryKey record, final long version, final Action type) { if(!STOPWORDS.contains(term)) { int upperBound = (int) Math.pow(term.length(), 2); List<Future<?>> futures = Lists .newArrayListWithCapacity(upperBound); // The set of substrings that have been indexed from {@code term} at // {@code position} for {@code key} in {@code record} at {@code // version}. This is used to ensure that we do not add duplicate // indexes (i.e. 'abrakadabra') Set<String> indexed = Sets.newHashSetWithExpectedSize(upperBound); for (int i = 0; i < term.length(); ++i) { for (int j = i + 1; j < term.length() + 1; ++j) { final String substring = term.substring(i, j).trim(); if(!Strings.isNullOrEmpty(substring) && !STOPWORDS.contains(substring) && !indexed.contains(substring)) { indexed.add(substring); futures.add(indexer.submit(new Runnable() { @Override public void run() { doInsert(key, Text.wrap(substring), Position.wrap(record, position), version, type); } })); } } } indexed = null; // make eligible for immediate GC return futures; } else { return Collections.emptyList(); } } }