package com.linkedin.thirdeye.hadoop.push;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.io.IOUtils;
import org.apache.helix.AccessOption;
import org.apache.helix.BaseDataAccessor;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.PropertyKey;
import org.apache.helix.PropertyKey.Builder;
import org.apache.helix.ZNRecord;
import org.apache.helix.manager.zk.ZKHelixDataAccessor;
import org.apache.helix.manager.zk.ZNRecordSerializer;
import org.apache.helix.manager.zk.ZkBaseDataAccessor;
import org.apache.helix.manager.zk.ZkClient;
import org.apache.helix.model.ExternalView;
import org.apache.helix.model.IdealState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DeleteOverlappingSegmentsInPinot {
private static final Logger LOG = LoggerFactory.getLogger(DeleteOverlappingSegmentsInPinot.class);
public static void main(String[] args) throws Exception {
String zkUrl = args[0];
String zkCluster = args[1];
String tableName = args[2];
deleteOverlappingSegments(zkUrl, zkCluster, tableName);
}
private static IdealState computeNewIdealStateAfterDeletingOverlappingSegments(HelixDataAccessor helixDataAccessor, PropertyKey idealStatesKey) {
IdealState is = helixDataAccessor.getProperty(idealStatesKey);
// compute existing DAILY segments
Set<String> daysWithDailySegments = new HashSet<>();
for (String segmentName : is.getPartitionSet()) {
LOG.info("Segment Name : {}", segmentName);
if (segmentName.indexOf("DAILY") > -1) {
String[] splits = segmentName.split("_");
String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
LOG.info("Start : {} End : {}", startDay, endDay);
daysWithDailySegments.add(startDay);
}
}
// compute list of HOURLY segments to be deleted
Set<String> hourlySegmentsToDelete = new TreeSet<>();
for (String segmentName : is.getPartitionSet()) {
LOG.info("Segment name {}", segmentName);
if (segmentName.indexOf("HOURLY") > -1) {
String[] splits = segmentName.split("_");
String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
LOG.info("Start : {} End : {}", startDay, endDay);
if (daysWithDailySegments.contains(startDay)) {
hourlySegmentsToDelete.add(segmentName);
}
}
}
LOG.info("HOURLY segments that can be deleted: {}", hourlySegmentsToDelete.size());
LOG.info("Hourly segments to delete {}", hourlySegmentsToDelete.toString().replaceAll(",", "\n"));
IdealState newIdealState = new IdealState(is.getRecord());
for (String hourlySegmentToDelete : hourlySegmentsToDelete) {
newIdealState.getRecord().getMapFields().remove(hourlySegmentToDelete);
}
return newIdealState;
}
public static boolean deleteOverlappingSegments(String zkUrl, String zkCluster, String tableName) {
boolean updateSuccessful = false;
if (!tableName.endsWith("_OFFLINE")) {
tableName = tableName + "_OFFLINE";
}
ZkClient zkClient = new ZkClient(zkUrl);
ZNRecordSerializer zkSerializer = new ZNRecordSerializer();
zkClient.setZkSerializer(zkSerializer);
BaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(zkClient);
HelixDataAccessor helixDataAccessor = new ZKHelixDataAccessor(zkCluster, baseDataAccessor);
Builder keyBuilder = helixDataAccessor.keyBuilder();
PropertyKey idealStateKey = keyBuilder.idealStates(tableName);
PropertyKey externalViewKey = keyBuilder.externalView(tableName);
IdealState currentIdealState = helixDataAccessor.getProperty(idealStateKey);
byte[] serializeIS = zkSerializer.serialize(currentIdealState.getRecord());
String name = tableName + ".idealstate." + System.currentTimeMillis();
File outputFile = new File("/tmp", name);
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
IOUtils.write(serializeIS, fileOutputStream);
} catch (IOException e) {
LOG.error("Exception in delete overlapping segments", e);
return updateSuccessful;
}
LOG.info("Saved current idealstate to {}", outputFile);
IdealState newIdealState;
do {
newIdealState = computeNewIdealStateAfterDeletingOverlappingSegments(helixDataAccessor, idealStateKey);
LOG.info("Updating IdealState");
updateSuccessful = helixDataAccessor.getBaseDataAccessor().set(idealStateKey.getPath(), newIdealState.getRecord(), newIdealState.getRecord().getVersion(), AccessOption.PERSISTENT);
if (updateSuccessful) {
int numSegmentsDeleted = currentIdealState.getPartitionSet().size() - newIdealState.getPartitionSet().size();
LOG.info("Successfully updated IdealState: Removed segments: {}", (numSegmentsDeleted));
}
} while (!updateSuccessful);
try {
while (true) {
Thread.sleep(10000);
ExternalView externalView = helixDataAccessor.getProperty(externalViewKey);
IdealState idealState = helixDataAccessor.getProperty(idealStateKey);
Set<String> evPartitionSet = externalView.getPartitionSet();
Set<String> isPartitionSet = idealState.getPartitionSet();
if (evPartitionSet.equals(isPartitionSet)) {
LOG.info("Table {} has reached stable state. i.e segments in external view match idealstates", tableName);
break;
}
}
} catch (InterruptedException e) {
e.printStackTrace();
}
return updateSuccessful;
}
}