/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.piggybank.test.storage;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.builtin.mock.Storage;
import org.apache.pig.data.Tuple;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static org.apache.pig.builtin.mock.Storage.resetData;
import static org.apache.pig.builtin.mock.Storage.tuple;
import static org.apache.pig.builtin.mock.Storage.map;
import static org.junit.Assert.assertEquals;
public class TestLogFormatLoader {
@Test
public void testLogFormatLoader() throws Exception {
final String logformat = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cookie}i\"";
final String testLine = "2001:980:91c0:1:8d31:a232:25e5:85d - - [05/Sep/2010:11:27:50 +0200] " +
"\"GET /b/ss/advbolprod2/1/H.22.1/s73176445413647?AQB=1&pccr=true&vidn=27F07A1B85012045-403" +
"&&ndh=1&t=19%2F5%2F2012%2023%3A51%3A27%202%20-120&ce=UTF-8&ns=bol&pageName=%2Fnl%2Fp%2Ffissler-" +
"speciaal-pannen-grillpan-28-x-28-cm%2F9200000002876066%2F&g=http%3A%2F%2Fwww.bol.com%2Fnl%2Fp%2F" +
"fissler-speciaal-pannen-grillpan-28-x-28-cm%2F9200000002876066%2F%3Fpromo%3Dkoken-pannen_303_hs-" +
"koken-pannen-afj-120601_B3_product_1_9200000002876066%26bltg.pg_nm%3Dkoken-pannen%26bltg.slt_id%3D" +
"303%26bltg.slt_nm%3Dhs-koken-pannen-afj-120601%26bltg.slt_p&r=http%3A%2F%2Fwww.bol.com%2Fnl%2Fm%2F" +
"koken-tafelen%2Fkoken-pannen%2FN%2F11766%2Findex.html%3Fblabla%3Dblablawashere&cc=EUR&ch=D%3Dv3&" +
"server=ps316&events=prodView%2Cevent1%2Cevent2%2Cevent31&products=%3B9200000002876066%3B%3B%3B%3B" +
"evar3%3Dkth%7Cevar8%3D9200000002876066_Fissler%20Speciaal%20Pannen%20-%20Grillpan%20-%2028%20x%2028" +
"%20cm%7Cevar35%3D170%7Cevar47%3DKTH%7Cevar9%3DNew%7Cevar40%3Dno%20reviews%2C%3B%3B%3B%3Bevent31%3D423" +
"&c1=catalog%3Akth%3Aproduct-detail&v1=D%3Dc1&h1=catalog%2Fkth%2Fproduct-detail&h2=D%3DpageName&v3=kth" +
"&l3=endeca_001-mensen_default%2Cendeca_exact-boeken_default%2Cendeca_verschijningsjaar_default%2C" +
"endeca_hardgoodscategoriesyn_default%2Cendeca_searchrank-hadoop_default%2Cendeca_genre_default%2C" +
"endeca_uitvoering_default&v4=ps316&v6=koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_" +
"9200000002876066&v10=Tu%2023%3A30&v12=logged%20in&v13=New&c25=niet%20ssl&c26=3631&" +
"c30=1.2.3.4.1323208998208762&v31=2000285551&c45=20120619235127&c46=20120501%204.3.4.1&" +
"c47=D%3Ds_vi&c49=%2Fnl%2Fcatalog%2Fproduct-detail.jsp&c50=%2Fnl%2Fcatalog%2Fproduct-detail.jsp&" +
"v51=www.bol.com&s=1280x800&c=24&j=1.7&v=N&k=Y&bw=1280&bh=272&p=Shockwave%20Flash%3B&AQE=1 " +
"HTTP/1.1\" 200 23617 \"http://www.google.nl/imgres?imgurl=http://daniel_en_sander.basjes.nl/" +
"fotos/geboorte-kaartje/geboortekaartje-binnenkant.jpg&imgrefurl=http://daniel_en_sander.basjes.nl/" +
"fotos/geboorte-kaartje&usg=__LDxRMkacRs6yLluLcIrwoFsXY6o=&h=521&w=1024&sz=41&hl=nl&start=13&zoom=1" +
"&um=1&itbs=1&tbnid=Sqml3uGbjoyBYM:&tbnh=76&tbnw=150&prev=/images%3Fq%3Dbinnenkant%2Bgeboortekaartje" +
"%26um%3D1%26hl%3Dnl%26sa%3DN%26biw%3D1882%26bih%3D1014%26tbs%3Disch:1\" " +
"\"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; nl-nl) AppleWebKit/533.17.8 (KHTML, like Gecko) " +
"Version/5.0.1 Safari/533.17.8\" \"jquery-ui-theme=Eggplant; BuI=SomeThing; " +
"Apache=127.0.0.1.1351111543699529\"";
PigServer pigServer = new PigServer(ExecType.LOCAL);
Storage.Data data = resetData(pigServer);
ArrayList<String[]> input = new ArrayList<String[]>();
input.add(new String[] { testLine });
String filename = TestHelper.createTempFile(input, " ");
filename = filename.replace("\\", "\\\\");
pigServer.registerQuery(
"Clicks = " +
" LOAD '" + filename + "' " +
" USING org.apache.pig.piggybank.storage.apachelog.LogFormatLoader(" +
" '"+logformat+"'," +
" 'IP:connection.client.host'," +
" 'TIME.STAMP:request.receive.time'," +
" '-map:request.firstline.uri.query.g:HTTP.URI'," +
" 'STRING:request.firstline.uri.query.g.query.promo'," +
" 'STRING:request.firstline.uri.query.g.query.*'," +
" 'STRING:request.firstline.uri.query.s'," +
" '-map:request.firstline.uri.query.r:HTTP.URI'," +
" 'STRING:request.firstline.uri.query.r.query.blabla'," +
" 'HTTP.COOKIE:request.cookies.bui'," +
" 'HTTP.USERAGENT:request.user-agent'" +
" )" +
" AS (" +
" ConnectionClientHost," +
" RequestReceiveTime," +
" Promo," +
" QueryParams:map[]," +
" ScreenResolution," +
" GoogleQuery," +
" BUI," +
" RequestUseragent" +
" );"
);
pigServer.registerQuery("STORE Clicks INTO 'Clicks' USING mock.Storage();");
List<Tuple> out = data.get("Clicks");
assertEquals(1, out.size());
Tuple actual = out.get(0);
Tuple expected = tuple(
"2001:980:91c0:1:8d31:a232:25e5:85d",
"05/Sep/2010:11:27:50 +0200",
"koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",
map(
"promo" , "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",
"bltg.pg_nm" , "koken-pannen",
"bltg.slt_nm" , "hs-koken-pannen-afj-120601",
"bltg.slt_id" , "303",
"bltg.slt_p" , ""
),
"1280x800",
"blablawashere",
"SomeThing",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; nl-nl) AppleWebKit/533.17.8 " +
"(KHTML, like Gecko) Version/5.0.1 Safari/533.17.8"
);
assertEquals(expected, actual);
}
}