/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. */ package org.apache.pig.pigunit.pig; import java.io.IOException; import java.io.Reader; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Slightly modified GruntParser that accepts a list of aliases to override. * * <p>This is a way to replace a pig query by another query. * * <p>For example, if we have this map of overrides: Map<alias,query> * <ul> * <li><A, A = LOAD '/path'> --> replace the alias A by A = LOAD '/path'</li> * <li><DUMP, > --> remove the DUMP queries</li> * </ul> * * <p>It might be possible to do the same thing in a less hacky way. * e.g. pig.registerQuery replace the query of a certain alias... */ public class GruntParser extends org.apache.pig.tools.grunt.GruntParser { /** A mapping <alias,query> to apply to the pig script. */ private final Map<String, String> aliasOverride; /** * Initializes the Pig parser with its list of aliases to override. * * @param stream The Pig script stream. * @param aliasOverride The list of aliases to override in the Pig script. */ public GruntParser(Reader stream, Map<String, String> aliasOverride) { super(stream); this.aliasOverride = aliasOverride; } /** * Pig relations that have been blanked are dropped. */ @Override protected void processPig(String cmd) throws IOException { String command = override(cmd); if (! command.equals("")) { super.processPig(command); } } /** * Overrides the relations of the pig script that we want to change. * * @param query The current pig query processed by the parser. * @return The same query, or a modified query, or blank. */ public String override(String query) { // a path to be prepended to all the file names in the script String fsRoot = System.getProperty("pigunit.filesystem.prefix"); if(fsRoot != null) { query = Pattern.compile("(LOAD\\s+'(([^:/?#]+)://)?)", Pattern.CASE_INSENSITIVE). matcher(query). replaceFirst("$1" + fsRoot); query = Pattern.compile("(STORE\\s+([^']+)\\s+INTO\\s+'(([^:/?#]+)://)?)", Pattern.CASE_INSENSITIVE). matcher(query). replaceFirst("$1" + fsRoot); } Map<String, String> metaData = new HashMap<String, String>(); for (Entry<String, String> alias : aliasOverride.entrySet()) { saveLastStoreAlias(query, metaData); if (query.toLowerCase().startsWith(alias.getKey().toLowerCase() + " ")) { System.out.println( String.format("%s\n--> %s", query, alias.getValue() == "" ? "none" : alias.getValue())); query = alias.getValue(); } } aliasOverride.putAll(metaData); return query; } /** * Saves the name of the alias of the last store. * * <p>Maybe better to replace it by PigServer.getPigContext().getLastAlias(). */ void saveLastStoreAlias(String cmd, Map<String, String> metaData) { if (cmd.toUpperCase().startsWith("STORE")) { Pattern outputFile = Pattern.compile("STORE +([^']+) INTO.*", Pattern.CASE_INSENSITIVE); Matcher matcher = outputFile.matcher(cmd); if (matcher.matches()) { metaData.put("LAST_STORE_ALIAS", matcher.group(1)); } } } }