/** * Copyright 2010 Nube Technologies * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. */ package co.nubetech.hiho.job; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.hsqldb.Server; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import co.nubetech.hiho.common.HIHOException; import co.nubetech.hiho.common.HihoTestCase; public class TestDBQueryInputJobWithCluster extends HihoTestCase { final static Logger logger = Logger .getLogger(co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.class); private static final String DB_URL = "jdbc:hsqldb:hsql://localhost/URLAccess"; private static final String DRIVER_CLASS = "org.hsqldb.jdbcDriver"; private static Server server; private static Connection connection; @Test public void testBasicTableImport() throws Exception{ DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", // "-jdbcUsername", "", // "-jdbcPassword", "", "-outputPath", "testBasicTableImport", "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputTableName", "Pageview", "-inputOrderBy", "pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res); //lets verify the result now FileSystem outputFS = getFileSystem(); //Path outputPath = getOutputDir(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000"); expectedOutput.add("/bDELIM2000"); expectedOutput.add("/cDELIM3000"); expectedOutput.add("/dDELIM4000"); expectedOutput.add("/eDELIM5000"); expectedOutput.add("/fDELIM6000"); expectedOutput.add("/gDELIM7000"); expectedOutput.add("/hDELIM8000"); expectedOutput.add("/iDELIM9000"); expectedOutput.add("/jDELIM10000"); int count = 0; for (FileStatus fileStat: status) { logger.debug("File status is " + fileStat.getPath()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line , expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); } //Still to complete @Test public void testBasicAvroTableImport() throws Exception{ DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url", "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment", "-outputStrategy", "AVRO", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy", "Pageview.pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res); //lets verify the result now FileSystem outputFS = getFileSystem(); //Path outputPath = getOutputDir(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); /* List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000"); expectedOutput.add("/bDELIM2000"); expectedOutput.add("/cDELIM3000"); expectedOutput.add("/dDELIM4000"); expectedOutput.add("/eDELIM5000"); expectedOutput.add("/fDELIM6000"); expectedOutput.add("/gDELIM7000"); expectedOutput.add("/hDELIM8000"); expectedOutput.add("/iDELIM9000"); expectedOutput.add("/jDELIM10000"); int count = 0; for (FileStatus fileStat: status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line , expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); */ } @Test public void testQueryBasedImport() throws Exception{ DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url", "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment", "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy", "Pageview.pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res); //lets verify the result now FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testQueryBasedImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000DELIM10"); expectedOutput.add("/bDELIM2000DELIM10"); expectedOutput.add("/cDELIM3000DELIM10"); expectedOutput.add("/dDELIM4000DELIM10"); expectedOutput.add("/eDELIM5000DELIM10"); expectedOutput.add("/fDELIM6000DELIM10"); expectedOutput.add("/gDELIM7000DELIM10"); expectedOutput.add("/hDELIM8000DELIM10"); expectedOutput.add("/iDELIM9000DELIM10"); expectedOutput.add("/jDELIM10000DELIM10"); int count = 0; for (FileStatus fileStat: status) { logger.debug("File status is " + fileStat.getPath()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line , expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); } @Test public void testGeneratePigScript() throws Exception, HIHOException { DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", "-inputLoadTo", "pig", "-inputLoadToPath", "/tmp", "-outputPath", "output", "-outputStrategy", "delimited", "-delimiter", ",", "-numberOfMappers", "2", "-inputTableName", "Pageview" , "-inputOrderBy", "pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res); // lets verify the result now File pigScript = new File("/tmp/pigScript.txt"); if(!(pigScript.exists())){ throw new HIHOException("Unable to generate Pig script"); } logger.debug("Pig script output is " + pigScript.exists()); } //still to complete. @Test public void testContentOfPigScript() throws ExecException, IOException { PigServer pigServer = new PigServer("LOCAL"); pigServer.registerScript("/tmp/pigScript.txt"); //pigServer.registerQuery("A = LOAD 'mapreduce.jdbc.hiho.input.outputPath' USING PigStorage(',') AS (URL:chararray,PAGEVIEW:long);"); /*pigServer.dumpSchema("A") ; String s; InputStream fileWithStdOutContents = new DataInputStream( new BufferedInputStream( new FileInputStream(new File("stdout.redirected")))); BufferedReader reader = new BufferedReader(new InputStreamReader(fileWithStdOutContents)); while ((s = reader.readLine()) != null) { assertTrue(s.equals("a: {field1: int,field2: float,field3: chararray}") == true); } reader.close();*/ } private static void startHsqldbServer() { server = new Server(); server.setDatabasePath(0, System.getProperty("test.build.data", "/tmp") + "/URLAccess"); server.setDatabaseName(0, "URLAccess"); server.start(); } private static void createConnection(String driverClassName, String url) throws Exception { Class.forName(driverClassName); connection = DriverManager.getConnection(url); connection.setAutoCommit(false); } @AfterClass public static void shutdown() { try { connection.commit(); connection.close(); } catch (Throwable ex) { logger.warn("Exception occurred while closing connection :" + ex); } finally { try { if (server != null) { server.shutdown(); } } catch (Throwable ex) { logger.warn("Exception occurred while shutting down HSQLDB :" + ex); } } } @BeforeClass public static void initialize() throws Exception { startHsqldbServer(); createConnection(DRIVER_CLASS, DB_URL); dropTables(); createTables(); populateAccess(); verify(); } private static void dropTables() { String dropAccess = "DROP TABLE Access"; String dropPageview = "DROP TABLE Pageview"; String dropPageComment="DROP Table PageComment"; Statement st = null; try { st = connection.createStatement(); st.executeUpdate(dropAccess); st.executeUpdate(dropPageview); st.executeUpdate(dropPageComment); connection.commit(); st.close(); } catch (SQLException ex) { try { if (st != null) { st.close(); } } catch (Exception e) { } } } private static void createTables() throws SQLException { String createAccess = "CREATE TABLE " + "Access(url VARCHAR(100) NOT NULL," + " referrer VARCHAR(100)," + " time BIGINT NOT NULL, " + " PRIMARY KEY (url, time))"; String createPageview = "CREATE TABLE " + "Pageview(url VARCHAR(100) NOT NULL," + " pageview BIGINT NOT NULL, " + " PRIMARY KEY (url))"; String createPageComment = "CREATE TABLE " + "PageComment(url VARCHAR(100) NOT NULL," + " commentCount BIGINT NOT NULL, " + " PRIMARY KEY (url))"; Statement st = connection.createStatement(); try { st.executeUpdate(createAccess); st.executeUpdate(createPageview); st.executeUpdate(createPageComment); connection.commit(); } finally { st.close(); } } /** * Populates the Access table with generated records. */ private static void populateAccess() throws SQLException { PreparedStatement statement = null; try { statement = connection .prepareStatement("INSERT INTO Access(url, referrer, time)" + " VALUES (?, ?, ?)"); Random random = new Random(); int time = random.nextInt(50) + 50; final int PROBABILITY_PRECISION = 100; // 1 / 100 final int NEW_PAGE_PROBABILITY = 15; // 15 / 100 // Pages in the site : String[] pages = { "/a", "/b", "/c", "/d", "/e", "/f", "/g", "/h", "/i", "/j" }; // linkMatrix[i] is the array of pages(indexes) that page_i links // to. int[][] linkMatrix = { { 1, 5, 7 }, { 0, 7, 4, 6, }, { 0, 1, 7, 8 }, { 0, 2, 4, 6, 7, 9 }, { 0, 1 }, { 0, 3, 5, 9 }, { 0 }, { 0, 1, 3 }, { 0, 2, 6 }, { 0, 2, 6 } }; // a mini model of user browsing a la pagerank int currentPage = random.nextInt(pages.length); String referrer = null; for (int i = 0; i < time; i++) { statement.setString(1, pages[currentPage]); statement.setString(2, referrer); statement.setLong(3, i); statement.execute(); int action = random.nextInt(PROBABILITY_PRECISION); // go to a new page with probability // NEW_PAGE_PROBABILITY / PROBABILITY_PRECISION if (action < NEW_PAGE_PROBABILITY) { currentPage = random.nextInt(pages.length); // a random page referrer = null; } else { referrer = pages[currentPage]; action = random.nextInt(linkMatrix[currentPage].length); currentPage = linkMatrix[currentPage][action]; } } PreparedStatement statement1 = connection .prepareStatement("INSERT INTO Pageview(url, pageview)" + " VALUES (?, ?)"); int i = 1; for (String page: pages) { statement1.setString(1, page); statement1.setInt(2, 1000*i); i++; statement1.execute(); } PreparedStatement statement2 = connection .prepareStatement("INSERT INTO PageComment(url, commentCount)" + " VALUES (?, ?)"); int j = 1; for (String page: pages) { statement2.setString(1, page); statement2.setInt(2, 10*j); i++; statement2.execute(); } connection.commit(); } catch (SQLException ex) { connection.rollback(); throw ex; } finally { if (statement != null) { statement.close(); } } } /** Verifies the results are correct */ private static boolean verify() throws SQLException { // check total num pageview String countAccessQuery = "SELECT COUNT(*) FROM Access"; String sumPageviewQuery = "SELECT SUM(pageview) FROM Pageview"; Statement st = null; ResultSet rs = null; try { st = connection.createStatement(); rs = st.executeQuery(countAccessQuery); rs.next(); long totalPageview = rs.getLong(1); rs = st.executeQuery(sumPageviewQuery); rs.next(); long sumPageview = rs.getLong(1); logger.info("totalPageview=" + totalPageview); logger.info("sumPageview=" + sumPageview); return totalPageview == sumPageview && totalPageview != 0; } finally { if (st != null) st.close(); if (rs != null) rs.close(); } } }