/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.util.trace; import com.linkedin.pinot.common.request.InstanceRequest; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The main entry point for servers to record the trace information. * * To enable tracing, the request handler thread should register the requestId by calling {@link #register(InstanceRequest)}. * * For any other {@link Runnable} jobs the request handler creates and will be executed in other threads, * They need to be changed to {@link TraceRunnable} interface which handles register/unregister automatically. * * At the end of tracing a request, the request handler thread should unregister by calling * {@link #unregister(InstanceRequest)} to avoid any resource leaks. * * For place where trace info needs to be recorded, just call {@link TraceContext#logInfo(CONSTANT, long)}, * {@link TraceContext#logInfo(CONSTANT, InstanceRequest)}, {@link TraceContext#logException(String, String)} or * {@link TraceContext#logLatency(String, long)}. */ public class TraceContext { /** * Only for tests */ protected static class Info { public CONSTANT message; public Long requestId; public Info(CONSTANT message, Long requestId) { this.message = message; this.requestId = requestId; } @Override public String toString() { return "[" + requestId + "] " + message; } } /** * Enum order matters! */ protected enum CONSTANT { // control constants REGISTER_REQUEST, REGISTER_THREAD_TO_REQUEST, UNREGISTER_THREAD_FROM_REQUEST, UNREGISTER_REQUEST, // info constants START_NEW_TRACE, // failure constants UNREGISTER_REQUEST_FAILURE, REGISTER_THREAD_FAILURE, // trace log failure constants REQUEST_FOR_THREAD_NOT_FOUND, // retrieve trace failure RETRIEVE_TRACE_FAILURE; } private static final Logger LOGGER = LoggerFactory.getLogger(TraceContext.class); public static boolean TEST_ENABLED = false; // whether to log test-related info for this class private static final ThreadLocal<Trace> _localTrace = new ThreadLocal<Trace>() { @Override protected Trace initialValue() { return null; } }; private static final ThreadLocal<Boolean> _isActive = new ThreadLocal<Boolean>() { @Override protected Boolean initialValue() { return false; } }; private static final ThreadLocal<InstanceRequest> _request = new ThreadLocal<InstanceRequest>() { @Override protected InstanceRequest initialValue() { return null; } }; /** * Requests may arrive simultaneously, so we need a concurrent collection to manage these requests. * * Also, each requests may issue a number of concurrent {@link Runnable} jobs, * so a thread-safe queue is needed for collecting trace info from threads. */ private static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>> allTraceInfoMap; /** * Simple way to log all info into a same concurrent list, only used when {@link #TEST_ENABLED} * Map a thread to all info it logged in a linked deque. */ private static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>> tidToTestInfoMap; static { reset(); } /** * This method is served for debugging purpose, no need to explicitly call it. */ protected static void reset() { allTraceInfoMap = new ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>>(); if (TEST_ENABLED) { tidToTestInfoMap = new ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>>(); } } /** * call this method to log info for this class, requestId -1 means requestId is invalid. * * @param info * @param requestId */ private static void logInfo(CONSTANT info, long requestId) { Long tid = Thread.currentThread().getId(); LOGGER.debug("[TID: {}] {} in Request: {}", tid, info, requestId); if (TEST_ENABLED) { tidToTestInfoMap.putIfAbsent(tid, new ConcurrentLinkedDeque<Info>()); tidToTestInfoMap.get(tid).offerLast(new Info(info, requestId)); } } private static void logInfo(CONSTANT info, InstanceRequest request) { logInfo(info, (request == null) ? -1 : request.getRequestId()); } /** * Register current thread to a given requestId. * It must be called as the first statement in any job if trace needed. * * @param request * @param parent */ protected static void registerThreadToRequest(InstanceRequest request, Trace parent) { ConcurrentLinkedDeque<Trace> traceInfo = allTraceInfoMap.get(request.getRequestId()); if (traceInfo != null && _request.get() == null) { logInfo(CONSTANT.REGISTER_THREAD_TO_REQUEST, request); long threadId = Thread.currentThread().getId(); Trace trace = new Trace(threadId, parent); _localTrace.set(trace); _request.set(request); traceInfo.offerLast(trace); } else { logInfo(CONSTANT.REGISTER_THREAD_FAILURE, request); } } /** * Threads are managed by {@link ExecutorService}, so that a thread may be reused and assigned to different requests. * It is always necessary to call this method at the end of tracing so any ThreadLocal fields are clean. * */ protected static void unregisterThreadFromRequest() { logInfo(CONSTANT.UNREGISTER_THREAD_FROM_REQUEST, _request.get()); _request.remove(); _isActive.remove(); _localTrace.remove(); } protected static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Info>> getTestInfoMap() { return tidToTestInfoMap; } protected static ConcurrentHashMap<Long, ConcurrentLinkedDeque<Trace>> getAllTraceInfoMap() { return allTraceInfoMap; } protected static InstanceRequest getRequestForCurrentThread() { return _request.get(); } protected static Trace getLocalTraceForCurrentThread() { return _localTrace.get(); } // ------ Public APIs ------ /** * We do not want to mix one request with others, so each request should start the trace by calling this method. * * Test whether the requestId exists in the ConcurrentHashMap is meaningless since the return may not up-to-date. * As a result, just make sure NOT to call this method for same requestId in the same request handler thread twice, * we also register the calling thread to the request. * */ public static void register(InstanceRequest request) { logInfo(CONSTANT.REGISTER_REQUEST, request); allTraceInfoMap.put(request.getRequestId(), new ConcurrentLinkedDeque<Trace>()); registerThreadToRequest(request, null); } /** * Call this method at the end of request processing to avoid resource leak! * Remember to save trace info of current request before calling, * we also unregister the calling thread from the request. * * @param request */ public static void unregister(InstanceRequest request) { unregisterThreadFromRequest(); logInfo(CONSTANT.UNREGISTER_REQUEST, request); if (!TEST_ENABLED) { ConcurrentLinkedDeque<Trace> traces = allTraceInfoMap.remove(request.getRequestId()); if (traces == null) { logInfo(CONSTANT.UNREGISTER_REQUEST_FAILURE, request); return; } traces.clear(); // release all references, this may be optional } } /** * Thread-safe log operation, all fields referenced in this method should be ThreadLocal. * * @param operator * @param latencyMs */ public static void logLatency(String operator, long latencyMs) { if (shouldTrace()) { _localTrace.get().log(operator + "Time", latencyMs); } } public static void logException(String key, String value) { if (shouldTrace()) { _localTrace.get().log(key, value); } } private static boolean shouldTrace() { if (_request.get() == null) { logInfo(CONSTANT.REQUEST_FOR_THREAD_NOT_FOUND, null); return false; } if (!_request.get().isEnableTrace()) { return false; } if (!_isActive.get()) { logInfo(CONSTANT.START_NEW_TRACE, _request.get()); _isActive.set(true); } return true; } /** * This method does not guarantee it returns the complete trace info. * It is caller's responsibility to make sure all threads belong to requestId * finish their work before this method is called. * * This method won't throw any exceptions, so feel free to use it safely. * * Eg. the caller could use a {@link CountDownLatch} to wait for all jobs finished. * * @param requestId * @return */ public static String getTraceInfoOfRequestId(Long requestId) { try { ConcurrentLinkedDeque<Trace> deque = allTraceInfoMap.get(requestId); return (deque == null) ? "{ \"error\": \"trace for " + requestId + " not found\" }" : TraceUtils.getTraceString(deque); } catch (Exception e) { logInfo(CONSTANT.RETRIEVE_TRACE_FAILURE, requestId); return "{ \"error\": \"retrieve trace for " + requestId + " failed\" }"; } } }