/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.metadata; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.falcon.FalconException; import org.apache.falcon.hadoop.HadoopClientFactory; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.json.simple.JSONValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.util.HashMap; import java.util.Map; /** * Utility called in the post process of oozie workflow to record lineage information. */ public class LineageRecorder extends Configured implements Tool { private static final Logger LOG = LoggerFactory.getLogger(LineageRecorder.class); public static void main(String[] args) throws Exception { ToolRunner.run(new LineageRecorder(), args); } @Override public int run(String[] arguments) throws Exception { CommandLine command = getCommand(arguments); LOG.info("Parsing lineage metadata from: {}", command); Map<String, String> lineageMetadata = getLineageMetadata(command); LOG.info("Lineage Metadata: {}", lineageMetadata); String lineageFile = getFilePath(command.getOptionValue(LineageArgs.LOG_DIR.getOptionName()), command.getOptionValue(LineageArgs.ENTITY_NAME.getOptionName()) ); LOG.info("Persisting lineage metadata to: {}", lineageFile); persistLineageMetadata(lineageMetadata, lineageFile); return 0; } protected static CommandLine getCommand(String[] arguments) throws ParseException { Options options = new Options(); for (LineageArgs arg : LineageArgs.values()) { addOption(options, arg); } return new GnuParser().parse(options, arguments); } private static void addOption(Options options, LineageArgs arg) { addOption(options, arg, true); } private static void addOption(Options options, LineageArgs arg, boolean isRequired) { Option option = arg.getOption(); option.setRequired(isRequired); options.addOption(option); } protected Map<String, String> getLineageMetadata(CommandLine command) { Map<String, String> lineageMetadata = new HashMap<String, String>(); for (LineageArgs arg : LineageArgs.values()) { lineageMetadata.put(arg.getOptionName(), arg.getOptionValue(command)); } return lineageMetadata; } public static String getFilePath(String logDir, String entityName) { return logDir + entityName + "-lineage.json"; } /** * this method is invoked from with in the workflow. * * @param lineageMetadata metadata to persist * @param lineageFile file to serialize the metadata * @throws IOException * @throws FalconException */ protected void persistLineageMetadata(Map<String, String> lineageMetadata, String lineageFile) throws IOException, FalconException { OutputStream out = null; Path file = new Path(lineageFile); try { FileSystem fs = HadoopClientFactory.get().createFileSystem(file.toUri(), getConf()); out = fs.create(file); // making sure falcon can read this file FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); fs.setPermission(file, permission); out.write(JSONValue.toJSONString(lineageMetadata).getBytes()); } finally { if (out != null) { try { out.close(); } catch (IOException ignore) { // ignore } } } } public static Map<String, String> parseLineageMetadata(String lineageFile) throws FalconException { try { Path lineageDataPath = new Path(lineageFile); // file has 777 permissions FileSystem fs = HadoopClientFactory.get().createFileSystem(lineageDataPath.toUri()); if (fs.exists(lineageDataPath)) { BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(lineageDataPath))); return (Map<String, String>) JSONValue.parse(in); } return null; } catch (IOException e) { throw new FalconException("Error opening lineage file", e); } } }