/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.analyzer.physical; import static com.linkedin.cubert.utils.JsonUtils.getText; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ArrayNode; import org.codehaus.jackson.node.ObjectNode; import com.linkedin.cubert.utils.FileSystemUtils; /** * Analyzes the plan for cached files. * * @author Maneesh Varshney * */ public class CachedFileAnalyzer implements PlanRewriter { private final Configuration conf = new JobConf(); private static String cleanLatestTag(String fname){ if (fname.contains("#LATEST")) return fname.replace("#LATEST", "!LATEST"); return fname; } private static String restoreLatestTag(String fname){ if (fname.contains("!LATEST")) return fname.replace("!LATEST", "#LATEST"); return fname; } @Override public JsonNode rewrite(JsonNode plan, Set<String> namesUsed, boolean debugMode, boolean revisit) throws IOException { FileSystem fs = FileSystem.get(conf); Map<String, String> symlinkMap = new HashMap<String, String>(); ObjectMapper mapper = new ObjectMapper(); int symlinkCounter = 0; for (JsonNode job : plan.path("jobs")) { if (job.has("cachedFiles") && !job.get("cachedFiles").isNull()) { ArrayNode cachedFiles = mapper.createArrayNode(); for (JsonNode file : job.path("cachedFiles")) { String filename = file.getTextValue(); filename = cleanLatestTag(filename); URI uri = null; String path, fragment; try { uri = new URI(filename); path = uri.getPath(); path = restoreLatestTag(path); fragment = uri.getFragment(); if (path.contains("#LATEST")) { path = FileSystemUtils.getLatestPath(fs, new Path(path)) .toString(); path = new URI(path).getPath(); } } catch (URISyntaxException e) { throw new PlanRewriteException(e); } // check if the fragment was already created earlier if (fragment == null) fragment = symlinkMap.get(path); // create a new one if (fragment == null) fragment = "cached_" + (symlinkCounter++); symlinkMap.put(path, fragment); // if (fs.isDirectory(new Path(path))) // { // Path childPath = null; // // FileStatus[] children = fs.globStatus(new Path(path + "/*")); // for (FileStatus child : children) // { // childPath = child.getPath(); // // if (fs.isDirectory(childPath)) // continue; // // String name = childPath.getName(); // if (name.startsWith("_") || name.startsWith(".")) // continue; // // break; // } // // if (childPath == null) // throw new IOException("No files found in directory: " + path); // // path = childPath.toString(); // } cachedFiles.add(path + "#" + fragment); } ((ObjectNode) job).put("cachedFiles", cachedFiles); } } new PhysicalPlanWalker(plan, new AddSymlinksToCachedPath(symlinkMap)).walk(); return plan; } static final class AddSymlinksToCachedPath extends PhysicalPlanVisitor { private final Map<String, String> map; private final Configuration conf = new JobConf(); AddSymlinksToCachedPath(Map<String, String> map) { this.map = map; } @Override public void visitOperator(JsonNode json, boolean isMapper) { String type = getText(json, "operator"); if (type.equals("LOAD_CACHED_FILE") || type.equals("DICT_ENCODE") || type.equals("DICT_DECODE")) { if (!json.has("path")) return; try { String originalPath = getText(json, "path"); originalPath = cleanLatestTag(originalPath); URI uri = new URI(originalPath); String path = uri.getPath(); path = restoreLatestTag(path); if (path.contains("#LATEST")) { path = FileSystemUtils.getLatestPath(FileSystem.get(conf), new Path(path)) .toString(); path = new URI(path).getPath(); } if (map.containsKey(path)) { String fragment = map.get(path); ((ObjectNode) json).put("path", path + "#" + fragment); } } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e){ throw new RuntimeException(e); } } } } }