/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.HashMap; import java.util.Properties; import org.junit.Test; import junit.framework.TestCase; import org.apache.pig.PigServer; import org.apache.pig.ExecType; import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.logicalLayer.* ; import org.apache.pig.impl.logicalLayer.parser.* ; public class TestPigScriptParser extends TestCase { @Test public void testParserWithEscapeCharacters() throws Exception { // All the needed variables Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>(); Map<OperatorKey, LogicalOperator> opTable = new HashMap<OperatorKey, LogicalOperator>() ; Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>() ; Map<String, String> fileNameMap = new HashMap<String, String>(); PigContext pigContext = new PigContext(ExecType.LOCAL, new Properties()) ; pigContext.connect(); String tempFile = this.prepareTempFile() ; // Start the real parsing job { // Initial statement String query = String.format("A = LOAD '%s' ;", Util.encodeEscape(tempFile)) ; ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable, aliasOp, fileNameMap) ; LogicalPlan lp = parser.Parse() ; } { // Normal condition String query = "B1 = filter A by $0 eq 'This is a test string' ;" ; checkParsedConstContent(aliases, opTable, pigContext, aliasOp, fileNameMap, query, "This is a test string") ; } { // single-quote condition String query = "B2 = filter A by $0 eq 'This is a test \\'string' ;" ; checkParsedConstContent(aliases, opTable, pigContext, aliasOp, fileNameMap, query, "This is a test 'string") ; } { // escaping dot // the reason we have 4 backslashes below is we really want to put two backslashes but // since this is to be represented in a Java String, we escape each backslash with one more // backslash - hence 4. In a pig script in a file, this would be // \\.string String query = "B2 = filter A by $0 eq 'This is a test \\\\.string' ;" ; checkParsedConstContent(aliases, opTable, pigContext, aliasOp, fileNameMap, query, "This is a test \\.string") ; } { // newline condition String query = "B3 = filter A by $0 eq 'This is a test \\nstring' ;" ; checkParsedConstContent(aliases, opTable, pigContext, aliasOp, fileNameMap, query, "This is a test \nstring") ; } { // Unicode String query = "B4 = filter A by $0 eq 'This is a test \\uD30C\\uC774string' ;" ; checkParsedConstContent(aliases, opTable, pigContext, aliasOp, fileNameMap, query, "This is a test \uD30C\uC774string") ; } } @Test public void testDefineUDF() throws Exception { String inputData[] = { "dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl" , "kas;dka;sd" , "jsjsjwww.xyz.com/sports" , "jsdLSJDcom/sports" , "wwwJxyzMcom/sports" }; File f = Util.createFile(inputData); String[] queryLines = new String[] { // the reason we have 4 backslashes below is we really want to put two backslashes but // since this is to be represented in a Java String, we escape each backslash with one more // backslash - hence 4. In a pig script in a file, this would be // www\\.xyz\\.com "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');" , "A = load 'file://" + f.getAbsolutePath() + "' using PigStorage() as (source : chararray);" , "B = foreach A generate minelogs(source) as sportslogs;" }; PigServer ps = new PigServer(ExecType.LOCAL); for (String line : queryLines) { ps.registerQuery(line); } Iterator<Tuple> it = ps.openIterator("B"); int[] expectedResults = new int[] {2,0,1,0,0}; int i = 0; while(it.hasNext()) { Tuple t = it.next(); assertEquals(expectedResults[i++], t.get(0)); } } private void checkParsedConstContent(Map<LogicalOperator, LogicalPlan> aliases, Map<OperatorKey, LogicalOperator> opTable, PigContext pigContext, Map<String, LogicalOperator> aliasOp, Map<String, String> fileNameMap, String query, String expectedContent) throws Exception { // Run the parser ByteArrayInputStream in = new ByteArrayInputStream(query.getBytes()); QueryParser parser = new QueryParser(in, pigContext, "scope", aliases, opTable, aliasOp, fileNameMap) ; LogicalPlan lp = parser.Parse() ; // Digging down the tree LogicalOperator root = lp.getRoots().get(0) ; LogicalOperator filter = lp.getSuccessors(root).get(0); LogicalPlan comparisonPlan = ((LOFilter)filter).getComparisonPlan(); List<LogicalOperator> comparisonPlanRoots = comparisonPlan.getRoots(); LogicalOperator compRootOne = comparisonPlanRoots.get(0); LogicalOperator compRootTwo = comparisonPlanRoots.get(1); // Here is the actual check logic if (compRootOne instanceof LOConst) { assertTrue("Must be equal", ((String)((LOConst)compRootOne).getValue()).equals(expectedContent)) ; } // If not left, it must be right. else { assertTrue("Must be equal", ((String)((LOConst)compRootTwo).getValue()).equals(expectedContent)) ; } } private String prepareTempFile() throws IOException { File inputFile = File.createTempFile("test", "txt"); inputFile.deleteOnExit() ; PrintStream ps = new PrintStream(new FileOutputStream(inputFile)); ps.println("hohoho") ; ps.close(); return inputFile.getPath() ; } }