/** * Copyright (C) 2014-2015 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.thirdeye.hadoop.backfill; import java.io.File; import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.io.Files; import com.linkedin.pinot.common.utils.TarGzCompressionUtils; import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants; /** * Contains APIs which are used for backfilling the pinot segments with star tree index */ public class BackfillControllerAPIs { private static Logger LOGGER = LoggerFactory.getLogger(BackfillControllerAPIs.class); private HttpHost controllerHttpHost; private String tableName; private static String SEGMENTS_ENDPOINT = "segments/"; private static String TABLES_ENDPOINT = "tables/"; private static String METADATA_ENDPOINT = "metadata"; private static String UTF_8 = "UTF-8"; private static String SEGMENT_NAME = "segment.name"; private static String SEGMENT_TABLE_NAME = "segment.table.name"; private static String SEGMENT_END_TIME = "segment.end.time"; private static String SEGMENT_START_TIME = "segment.start.time"; private static String SEGMENT_TIME_UNIT = "segment.time.unit"; BackfillControllerAPIs(String controllerHost, int controllerPort, String tableName) { this.tableName = tableName; LOGGER.info("Connecting to {} {} table {}", controllerHost, controllerPort, tableName); controllerHttpHost = new HttpHost(controllerHost, controllerPort); } /** * Downloads a segment from the controller, given the table name and segment name * @param segmentName * @param hdfsSegmentPath * @throws IOException * @throws ArchiveException */ public void downloadSegment(String segmentName, Path hdfsSegmentPath) throws IOException, ArchiveException { FileSystem fs = FileSystem.get(new Configuration()); HttpClient controllerClient = new DefaultHttpClient(); HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8) + "/" + URLEncoder.encode(segmentName, UTF_8)); HttpResponse res = controllerClient.execute(controllerHttpHost, req); try { if (res.getStatusLine().getStatusCode() != 200) { throw new IllegalStateException(res.getStatusLine().toString()); } LOGGER.info("Fetching segment {}", segmentName); InputStream content = res.getEntity().getContent(); File tempDir = new File(Files.createTempDir(), "thirdeye_temp"); tempDir.mkdir(); LOGGER.info("Creating temporary dir for staging segments {}", tempDir); File tempSegmentDir = new File(tempDir, segmentName); File tempSegmentTar = new File(tempDir, segmentName + ThirdEyeConstants.TAR_SUFFIX); LOGGER.info("Downloading {} to {}", segmentName, tempSegmentTar); OutputStream out = new FileOutputStream(tempSegmentTar); IOUtils.copy(content, out); if (!tempSegmentTar.exists()) { throw new IllegalStateException("Download of " + segmentName + " unsuccessful"); } LOGGER.info("Extracting segment {} to {}", tempSegmentTar, tempDir); TarGzCompressionUtils.unTar(tempSegmentTar, tempDir); File[] files = tempDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return !name.endsWith(ThirdEyeConstants.TAR_SUFFIX) && new File(dir, name).isDirectory(); } }); if (files.length == 0) { throw new IllegalStateException("Failed to extract " + tempSegmentTar + " to " + tempDir); } else if (!files[0].getName().equals(tempSegmentDir.getName())){ LOGGER.info("Moving extracted segment to the segment dir {}", tempSegmentDir); FileUtils.moveDirectory(files[0], tempSegmentDir); } if (!tempSegmentDir.exists()) { throw new IllegalStateException("Failed to move " + files[0] + " to " + tempSegmentDir); } LOGGER.info("Copying segment from {} to hdfs {}", tempSegmentDir, hdfsSegmentPath); fs.copyFromLocalFile(new Path(tempSegmentDir.toString()), hdfsSegmentPath); Path hdfsSegmentDir = new Path(hdfsSegmentPath, segmentName); if (!fs.exists(hdfsSegmentDir)) { throw new IllegalStateException("Failed to copy segment " + segmentName + " from local path " + tempSegmentDir + " to hdfs path " + hdfsSegmentPath); } } finally { if (res.getEntity() != null) { EntityUtils.consume(res.getEntity()); } } LOGGER.info("Successfully downloaded segment {} to {}", segmentName, hdfsSegmentPath); } /** * Given a time range and list of all segments for a table, returns all segments which are in the time range * @param tableName * @param allSegments * @param startTime * @param endTime * @return * @throws Exception */ public List<String> findSegmentsInRange(String tableName, List<String> allSegments, long startTime, long endTime) throws Exception { List<String> segmentsInRange = new ArrayList<>(); for (String segmentName : allSegments) { Map<String, String> metadata = getSegmentMetadata(tableName, segmentName); long segmentStartTime = Long.valueOf(metadata.get(SEGMENT_START_TIME)); long segmentEndTime = Long.valueOf(metadata.get(SEGMENT_END_TIME)); String segmentTableName = metadata.get(SEGMENT_TABLE_NAME); // TODO: // Using time value directly for now, as we only have time unit and not time size in metadata // Once we have time size in metadata, we can accept the time in millis and then convert time from metadata accordingly if (segmentTableName.equals(tableName) && ((segmentStartTime >= startTime && segmentStartTime <= endTime) || (segmentEndTime >= startTime && segmentEndTime <= endTime))) { LOGGER.info("Segment name : {}, Segment start : {}, Segment end : {}, Segment table : {}", segmentName, segmentStartTime, segmentEndTime, segmentTableName); segmentsInRange.add(segmentName); } } return segmentsInRange; } /** * Fetches the list of all segment names for a table * @param tableName * @return * @throws IOException */ public List<String> getAllSegments(String tableName) throws IOException { List<String> allSegments = new ArrayList<>(); HttpClient controllerClient = new DefaultHttpClient(); HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8)); HttpResponse res = controllerClient.execute(controllerHttpHost, req); try { if (res.getStatusLine().getStatusCode() != 200) { throw new IllegalStateException(res.getStatusLine().toString()); } InputStream content = res.getEntity().getContent(); String response = IOUtils.toString(content); List<String> allSegmentsPaths = getSegmentsFromResponse(response); for (String segment : allSegmentsPaths) { allSegments.add(segment.substring(segment.lastIndexOf("/") + 1)); } LOGGER.info("All segments : {}", allSegments); } finally { if (res.getEntity() != null) { EntityUtils.consume(res.getEntity()); } } return allSegments; } /** * Returns the metadata of a segment, given the segment name and table name * @param tableName - table where segment resides * @param segmentName - name of the segment * @return * @throws IOException */ public Map<String, String> getSegmentMetadata(String tableName, String segmentName) throws IOException { Map<String, String> metadata = null; HttpClient controllerClient = new DefaultHttpClient(); HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tableName, UTF_8) + "/" + SEGMENTS_ENDPOINT + URLEncoder.encode(segmentName, UTF_8) + "/" + METADATA_ENDPOINT); HttpResponse res = controllerClient.execute(controllerHttpHost, req); try { if (res.getStatusLine().getStatusCode() != 200) { throw new IllegalStateException(res.getStatusLine().toString()); } InputStream content = res.getEntity().getContent(); String metadataResponse = IOUtils.toString(content); metadata = getMetadataFromResponse(metadataResponse); } finally { if (res.getEntity() != null) { EntityUtils.consume(res.getEntity()); } } return metadata; } private List<String> getSegmentsFromResponse(String response) { String[] allSegments = response.replaceAll("\\[|\\]|\"", "").split(","); return Arrays.asList(allSegments); } private Map<String, String> getMetadataFromResponse(String response) { Map<String, String> metadata = new HashMap<>(); String cleanUpResponse = response.replaceAll("\\[|\\]|\"|\\{|\\}|\\\\", ""); String[] allProperties = cleanUpResponse.replace("state:", "").split(","); for (String property : allProperties) { String[] tokens = property.split(":", 2); metadata.put(tokens[0], tokens[1]); } return metadata; } }