PrimaryNode.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.replicator.nrt;

import java.io.IOException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.RAMFile;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ThreadInterruptedException;

/*
 * This just asks IndexWriter to open new NRT reader, in order to publish a new NRT point.  This could be improved, if we separated out 1)
 * nrt flush (and incRef the SIS) from 2) opening a new reader, but this is tricky with IW's concurrency, and it would also be hard-ish to share
 * IW's reader pool with our searcher manager.  So we do the simpler solution now, but that adds some unecessary latency to NRT refresh on
 * replicas since step 2) could otherwise be done concurrently with replicas copying files over.
 */

/** Node that holds an IndexWriter, indexing documents into its local index.
 *
 * @lucene.experimental */

public abstract class PrimaryNode extends Node {

  // Current NRT segment infos, incRef'd with IndexWriter.deleter:
  private SegmentInfos curInfos;

  protected final IndexWriter writer;

  // IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
  private CopyState copyState;

  protected final long primaryGen;

  /** Contains merged segments that have been copied to all running replicas (as of when that merge started warming). */
  final Set<String> finishedMergedFiles = Collections.synchronizedSet(new HashSet<String>());

  private final AtomicInteger copyingCount = new AtomicInteger();

  public PrimaryNode(IndexWriter writer, int id, long primaryGen, long forcePrimaryVersion,
                     SearcherFactory searcherFactory, PrintStream printStream) throws IOException {
    super(id, writer.getDirectory(), searcherFactory, printStream);
    message("top: now init primary");
    this.writer = writer;
    this.primaryGen = primaryGen;

    try {
      // So that when primary node's IndexWriter finishes a merge, but before it cuts over to the merged segment,
      // it copies it out to the replicas.  This ensures the whole system's NRT latency remains low even when a
      // large merge completes:
      writer.getConfig().setMergedSegmentWarmer(new PreCopyMergedSegmentWarmer(this));

      message("IWC:\n" + writer.getConfig());
      message("dir:\n" + writer.getDirectory());
      message("commitData: " + writer.getLiveCommitData());

      // Record our primaryGen in the userData, and set initial version to 0:
      Map<String,String> commitData = new HashMap<>();
      Iterable<Map.Entry<String,String>> iter = writer.getLiveCommitData();
      if (iter != null) {
        for(Map.Entry<String,String> ent : iter) {
          commitData.put(ent.getKey(), ent.getValue());
        }
      }
      commitData.put(PRIMARY_GEN_KEY, Long.toString(primaryGen));
      if (commitData.get(VERSION_KEY) == null) {
        commitData.put(VERSION_KEY, "0");
        message("add initial commitData version=0");
      } else {
        message("keep current commitData version=" + commitData.get(VERSION_KEY));
      }
      writer.setLiveCommitData(commitData.entrySet(), false);

      // We forcefully advance the SIS version to an unused future version.  This is necessary if the previous primary crashed and we are
      // starting up on an "older" index, else versions can be illegally reused but show different results:
      if (forcePrimaryVersion != -1) {
        message("now forcePrimaryVersion to version=" + forcePrimaryVersion);
        writer.advanceSegmentInfosVersion(forcePrimaryVersion);
      }

      mgr = new SearcherManager(writer, true, true, searcherFactory);
      setCurrentInfos(Collections.<String>emptySet());
      message("init: infos version=" + curInfos.getVersion());

      IndexSearcher s = mgr.acquire();
      try {
        // TODO: this is test code specific!!
        message("init: marker count: " + s.count(new TermQuery(new Term("marker", "marker"))));
      } finally {
        mgr.release(s);
      }

    } catch (Throwable t) {
      message("init: exception");
      t.printStackTrace(printStream);
      throw new RuntimeException(t);
    }
  }

  /** Returns the current primary generation, which is incremented each time a new primary is started for this index */
  public long getPrimaryGen() {
    return primaryGen;
  }

  // TODO: in the future, we should separate "flush" (returns an incRef'd SegmentInfos) from "refresh" (open new NRT reader from
  // IndexWriter) so that the latter can be done concurrently while copying files out to replicas, minimizing the refresh time from the
  // replicas.  But fixing this is tricky because e.g. IndexWriter may complete a big merge just after returning the incRef'd SegmentInfos
  // and before we can open a new reader causing us to close the just-merged readers only to then open them again from the (now stale)
  // SegmentInfos.  To fix this "properly" I think IW.inc/decRefDeleter must also incread the ReaderPool entry

  /** Flush all index operations to disk and opens a new near-real-time reader.
   *  new NRT point, to make the changes visible to searching.  Returns true if there were changes. */
  public boolean flushAndRefresh() throws IOException {
    message("top: now flushAndRefresh");
    Set<String> completedMergeFiles;
    synchronized(finishedMergedFiles) {
      completedMergeFiles = Collections.unmodifiableSet(new HashSet<>(finishedMergedFiles));
    }
    mgr.maybeRefreshBlocking();
    boolean result = setCurrentInfos(completedMergeFiles);
    if (result) {
      message("top: opened NRT reader version=" + curInfos.getVersion());
      finishedMergedFiles.removeAll(completedMergeFiles);
      message("flushAndRefresh: version=" + curInfos.getVersion() + " completedMergeFiles=" + completedMergeFiles + " finishedMergedFiles=" + finishedMergedFiles);
    } else {
      message("top: no changes in flushAndRefresh; still version=" + curInfos.getVersion());
    }
    return result;
  }

  public long getCopyStateVersion() {
    return copyState.version;
  }

  public synchronized long getLastCommitVersion() {
    Iterable<Map.Entry<String,String>> iter = writer.getLiveCommitData();
    assert iter != null;
    for(Map.Entry<String,String> ent : iter) {
      if (ent.getKey().equals(VERSION_KEY)) {
        return Long.parseLong(ent.getValue());
      }
    }

    // In ctor we always install an initial version:
    throw new AssertionError("missing VERSION_KEY");
  }

  @Override
  public void commit() throws IOException {
    Map<String,String> commitData = new HashMap<>();
    commitData.put(PRIMARY_GEN_KEY, Long.toString(primaryGen));
    // TODO (opto): it's a bit wasteful that we put "last refresh" version here, not the actual version we are committing, because it means
    // on xlog replay we are replaying more ops than necessary.
    commitData.put(VERSION_KEY, Long.toString(copyState.version));
    message("top: commit commitData=" + commitData);
    writer.setLiveCommitData(commitData.entrySet(), false);
    writer.commit();
  }

  /** IncRef the current CopyState and return it */
  public synchronized CopyState getCopyState() throws IOException {
    ensureOpen(false);
    //message("top: getCopyState replicaID=" + replicaID + " replicaNodeID=" + replicaNodeID + " version=" + curInfos.getVersion() + " infos=" + curInfos.toString());
    assert curInfos == copyState.infos;
    writer.incRefDeleter(copyState.infos);
    int count = copyingCount.incrementAndGet();
    assert count > 0;
    return copyState;
  }

  /** Called once replica is done (or failed) copying an NRT point */
  public void releaseCopyState(CopyState copyState) throws IOException {
    //message("top: releaseCopyState version=" + copyState.version);
    assert copyState.infos != null;
    writer.decRefDeleter(copyState.infos);
    int count = copyingCount.decrementAndGet();
    assert count >= 0;
  }

  @Override
  public boolean isClosed() {
    return isClosed(false);
  }

  boolean isClosed(boolean allowClosing) {
    return "closed".equals(state) || (allowClosing == false && "closing".equals(state));
  }

  private void ensureOpen(boolean allowClosing) {
    if (isClosed(allowClosing)) {
      throw new AlreadyClosedException(state);
    }
  }

  /** Steals incoming infos refCount; returns true if there were changes. */
  private synchronized boolean setCurrentInfos(Set<String> completedMergeFiles) throws IOException {

    IndexSearcher searcher = null;
    SegmentInfos infos;
    try {
      searcher = mgr.acquire();
      infos = ((StandardDirectoryReader) searcher.getIndexReader()).getSegmentInfos();
      // TODO: this is test code specific!!
      message("setCurrentInfos: marker count: " + searcher.count(new TermQuery(new Term("marker", "marker"))) + " version=" + infos.getVersion() + " searcher=" + searcher);
    } finally {
      if (searcher != null) {
        mgr.release(searcher);
      }
    }
    if (curInfos != null && infos.getVersion() == curInfos.getVersion()) {
      // no change
      message("top: skip switch to infos: version=" + infos.getVersion() + " is unchanged: " + infos.toString());
      return false;
    }

    SegmentInfos oldInfos = curInfos;
    writer.incRefDeleter(infos);
    curInfos = infos;
    if (oldInfos != null) {
      writer.decRefDeleter(oldInfos);
    }

    message("top: switch to infos=" + infos.toString() + " version=" + infos.getVersion());

    // Serialize the SegmentInfos:
    RAMOutputStream out = new RAMOutputStream(new RAMFile(), true);
    infos.write(dir, out);
    byte[] infosBytes = new byte[(int) out.getFilePointer()];
    out.writeTo(infosBytes, 0);

    Map<String,FileMetaData> filesMetaData = new HashMap<String,FileMetaData>();
    for(SegmentCommitInfo info : infos) {
      for(String fileName : info.files()) {
        FileMetaData metaData = readLocalFileMetaData(fileName);
        // NOTE: we hold a refCount on this infos, so this file better exist:
        assert metaData != null;
        assert filesMetaData.containsKey(fileName) == false;
        filesMetaData.put(fileName, metaData);
      }
    }

    lastFileMetaData = Collections.unmodifiableMap(filesMetaData);

    message("top: set copyState primaryGen=" + primaryGen + " version=" + infos.getVersion() + " files=" + filesMetaData.keySet());
    copyState = new CopyState(lastFileMetaData,
                              infos.getVersion(), infos.getGeneration(), infosBytes, completedMergeFiles,
                              primaryGen, curInfos);
    return true;
  }

  private synchronized void waitForAllRemotesToClose() throws IOException {

    // Wait for replicas to finish or crash:
    while (true) {
      int count = copyingCount.get();
      if (count == 0) {
        return;
      }
      message("pendingCopies: " + count);

      try {
        wait(10);
      } catch (InterruptedException ie) {
        throw new ThreadInterruptedException(ie);
      }
    }
  }

  @Override
  public void close() throws IOException {
    state = "closing";
    message("top: close primary");

    synchronized (this) {
      waitForAllRemotesToClose();
      if (curInfos != null) {
        writer.decRefDeleter(curInfos);
        curInfos = null;
      }
    }

    mgr.close();

    writer.rollback();
    dir.close();

    state = "closed";
  }

  /** Called when a merge has finished, but before IW switches to the merged segment */
  protected abstract void preCopyMergedSegmentFiles(SegmentCommitInfo info, Map<String,FileMetaData> files) throws IOException;
}