TraceContext.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.util.trace;

import com.linkedin.pinot.common.request.InstanceRequest;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The main entry point for servers to record the trace information.
 *
 * To enable tracing, the request handler thread should register the requestId by calling {@link #register(InstanceRequest)}.
 *
 * For any other {@link Runnable} jobs the request handler creates and will be executed in other threads,
 * They need to be changed to {@link TraceRunnable} interface which handles register/unregister automatically.
 *
 * At the end of tracing a request, the request handler thread should unregister by calling
 * {@link #unregister(InstanceRequest)} to avoid any resource leaks.
 *
 * For place where trace info needs to be recorded, just call {@link TraceContext#logInfo(CONSTANT, long)},
 * {@link TraceContext#logInfo(CONSTANT, InstanceRequest)}, {@link TraceContext#logException(String, String)} or
 * {@link TraceContext#logLatency(String, long)}.
 */
public class TraceContext {

  /**
   * Only for tests
   */
  protected static class Info {
    public CONSTANT message;
    public Long requestId;

    public Info(CONSTANT message, Long requestId) {
      this.message = message;
      this.requestId = requestId;
    }

    @Override
    public String toString() {
      return "[" + requestId + "] " + message;
    }
  }

  /**
   * Enum order matters!
   */
  protected enum CONSTANT {
    // control constants
    REGISTER_REQUEST,
    REGISTER_THREAD_TO_REQUEST,
    UNREGISTER_THREAD_FROM_REQUEST,
    UNREGISTER_REQUEST,

    // info constants
    START_NEW_TRACE,

    // failure constants
    UNREGISTER_REQUEST_FAILURE,
    REGISTER_THREAD_FAILURE,

    // trace log failure constants
    REQUEST_FOR_THREAD_NOT_FOUND,

    // retrieve trace failure
    RETRIEVE_TRACE_FAILURE;
  }

  private static final Logger LOGGER = LoggerFactory.getLogger(TraceContext.class);

  public static boolean TEST_ENABLED = false; // whether to log test-related info for this class

  private static final ThreadLocal<Trace> _localTrace = new ThreadLocal<Trace>() {
    @Override
    protected Trace initialValue() {
      return null;
    }
  };
  private static final ThreadLocal<Boolean> _isActive = new ThreadLocal<Boolean>() {
    @Override
    protected Boolean initialValue() {
      return false;
    }
  };
  private static final ThreadLocal<InstanceRequest> _request = new ThreadLocal<InstanceRequest>() {
    @Override
    protected InstanceRequest initialValue() {
      return null;
    }
  };

  /**
   * Requests may arrive simultaneously, so we need a concurrent collection to manage these requests.
   *
   * Also, each requests may issue a number of concurrent {@link Runnable} jobs,
   * so a thread-safe queue is needed for collecting trace info from threads.
   */
  private static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>> allTraceInfoMap;

  /**
   * Simple way to log all info into a same concurrent list, only used when {@link #TEST_ENABLED}
   * Map a thread to all info it logged in a linked deque.
   */
  private static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>> tidToTestInfoMap;

  static {
    reset();
  }

  /**
   * This method is served for debugging purpose, no need to explicitly call it.
   */
  protected static void reset() {
    allTraceInfoMap = new ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>>();
    if (TEST_ENABLED) {
      tidToTestInfoMap = new ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>>();
    }
  }

  /**
   * call this method to log info for this class, requestId -1 means requestId is invalid.
   *
   * @param info
   * @param requestId
   */
  private static void logInfo(CONSTANT info, long requestId) {
    Long tid = Thread.currentThread().getId();
    LOGGER.debug("[TID: {}] {} in Request: {}", tid, info, requestId);
    if (TEST_ENABLED) {
      tidToTestInfoMap.putIfAbsent(tid, new ConcurrentLinkedDeque<Info>());
      tidToTestInfoMap.get(tid).offerLast(new Info(info, requestId));
    }
  }

  private static void logInfo(CONSTANT info, InstanceRequest request) {
    logInfo(info, (request == null) ? -1 : request.getRequestId());
  }

  /**
   * Register current thread to a given requestId.
   * It must be called as the first statement in any job if trace needed.
   *
   * @param request
   * @param parent
   */
  protected static void registerThreadToRequest(InstanceRequest request, Trace parent) {
    ConcurrentLinkedDeque<Trace> traceInfo = allTraceInfoMap.get(request.getRequestId());

    if (traceInfo != null && _request.get() == null) {
      logInfo(CONSTANT.REGISTER_THREAD_TO_REQUEST, request);
      long threadId = Thread.currentThread().getId();
      Trace trace = new Trace(threadId, parent);
      _localTrace.set(trace);
      _request.set(request);
      traceInfo.offerLast(trace);
    } else {
      logInfo(CONSTANT.REGISTER_THREAD_FAILURE, request);
    }
  }

  /**
   * Threads are managed by {@link ExecutorService}, so that a thread may be reused and assigned to different requests.
   * It is always necessary to call this method at the end of tracing so any ThreadLocal fields are clean.
   *
   */
  protected static void unregisterThreadFromRequest() {
    logInfo(CONSTANT.UNREGISTER_THREAD_FROM_REQUEST, _request.get());
    _request.remove();
    _isActive.remove();
    _localTrace.remove();
  }

  protected static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>> getTestInfoMap() {
    return tidToTestInfoMap;
  }

  protected static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>> getAllTraceInfoMap() {
    return allTraceInfoMap;
  }

  protected static InstanceRequest getRequestForCurrentThread() {
    return _request.get();
  }

  protected static Trace getLocalTraceForCurrentThread() {
    return _localTrace.get();
  }

  // ------ Public APIs ------

  /**
   * We do not want to mix one request with others, so each request should start the trace by calling this method.
   *
   * Test whether the requestId exists in the ConcurrentHashMap is meaningless since the return may not up-to-date.
   * As a result, just make sure NOT to call this method for same requestId in the same request handler thread twice,
   * we also register the calling thread to the request.
   *
   */
  public static void register(InstanceRequest request) {
    logInfo(CONSTANT.REGISTER_REQUEST, request);
    allTraceInfoMap.put(request.getRequestId(), new ConcurrentLinkedDeque<Trace>());
    registerThreadToRequest(request, null);
  }

  /**
   * Call this method at the end of request processing to avoid resource leak!
   * Remember to save trace info of current request before calling,
   * we also unregister the calling thread from the request.
   *
   * @param request
   */
  public static void unregister(InstanceRequest request) {
    unregisterThreadFromRequest();
    logInfo(CONSTANT.UNREGISTER_REQUEST, request);
    if (!TEST_ENABLED) {
      ConcurrentLinkedDeque<Trace> traces = allTraceInfoMap.remove(request.getRequestId());
      if (traces == null) {
        logInfo(CONSTANT.UNREGISTER_REQUEST_FAILURE, request);
        return;
      }
      traces.clear(); // release all references, this may be optional
    }
  }

  /**
   * Thread-safe log operation, all fields referenced in this method should be ThreadLocal.
   *
   * @param operator
   * @param latencyMs
   */
  public static void logLatency(String operator, long latencyMs) {
    if (shouldTrace()) {
      _localTrace.get().log(operator + "Time", latencyMs);
    }
  }

  public static void logException(String key, String value) {
    if (shouldTrace()) {
      _localTrace.get().log(key, value);
    }
  }

  private static boolean shouldTrace() {
    if (_request.get() == null) {
      logInfo(CONSTANT.REQUEST_FOR_THREAD_NOT_FOUND, null);
      return false;
    }

    if (!_request.get().isEnableTrace()) {
      return false;
    }

    if (!_isActive.get()) {
      logInfo(CONSTANT.START_NEW_TRACE, _request.get());
      _isActive.set(true);
    }

    return true;
  }

  /**
   * This method does not guarantee it returns the complete trace info.
   * It is caller's responsibility to make sure all threads belong to requestId
   * finish their work before this method is called.
   *
   * This method won't throw any exceptions, so feel free to use it safely.
   *
   * Eg. the caller could use a {@link CountDownLatch} to wait for all jobs finished.
   *
   * @param requestId
   * @return
   */
  public static String getTraceInfoOfRequestId(Long requestId) {
    try {
      ConcurrentLinkedDeque<Trace> deque = allTraceInfoMap.get(requestId);
      return (deque == null) ? "{ \"error\": \"trace for " + requestId + " not found\" }" : TraceUtils.getTraceString(deque);
    } catch (Exception e) {
      logInfo(CONSTANT.RETRIEVE_TRACE_FAILURE, requestId);
      return "{ \"error\": \"retrieve trace for " + requestId + " failed\" }";
    }
  }
}