/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.disruption;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.InternalTestCluster;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
/**
* Suspends all threads on the specified node in order to simulate a long gc.
*/
public class LongGCDisruption extends SingleNodeDisruption {
private final static Pattern[] unsafeClasses = new Pattern[]{
// logging has shared JVM locks - we may suspend a thread and block other nodes from doing their thing
Pattern.compile("Logger")
};
protected final String disruptedNode;
private Set<Thread> suspendedThreads;
public LongGCDisruption(Random random, String disruptedNode) {
super(random);
this.disruptedNode = disruptedNode;
}
@Override
public synchronized void startDisrupting() {
if (suspendedThreads == null) {
suspendedThreads = new HashSet<>();
stopNodeThreads(disruptedNode, suspendedThreads);
} else {
throw new IllegalStateException("can't disrupt twice, call stopDisrupting() first");
}
}
@Override
public synchronized void stopDisrupting() {
if (suspendedThreads != null) {
resumeThreads(suspendedThreads);
suspendedThreads = null;
}
}
@Override
public void removeAndEnsureHealthy(InternalTestCluster cluster) {
removeFromCluster(cluster);
ensureNodeCount(cluster);
}
@Override
public TimeValue expectedTimeToHeal() {
return TimeValue.timeValueMillis(0);
}
@SuppressForbidden(reason = "stops/resumes threads intentionally")
protected boolean stopNodeThreads(String node, Set<Thread> nodeThreads) {
Thread[] allThreads = null;
while (allThreads == null) {
allThreads = new Thread[Thread.activeCount()];
if (Thread.enumerate(allThreads) > allThreads.length) {
// we didn't make enough space, retry
allThreads = null;
}
}
boolean stopped = false;
final String nodeThreadNamePart = "[" + node + "]";
for (Thread thread : allThreads) {
if (thread == null) {
continue;
}
String name = thread.getName();
if (name.contains(nodeThreadNamePart)) {
if (thread.isAlive() && nodeThreads.add(thread)) {
stopped = true;
thread.suspend();
// double check the thread is not in a shared resource like logging. If so, let it go and come back..
boolean safe = true;
safe:
for (StackTraceElement stackElement : thread.getStackTrace()) {
String className = stackElement.getClassName();
for (Pattern unsafePattern : unsafeClasses) {
if (unsafePattern.matcher(className).find()) {
safe = false;
break safe;
}
}
}
if (!safe) {
thread.resume();
nodeThreads.remove(thread);
}
}
}
}
return stopped;
}
@SuppressForbidden(reason = "stops/resumes threads intentionally")
protected void resumeThreads(Set<Thread> threads) {
for (Thread thread : threads) {
thread.resume();
}
}
}