/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.shim.common; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; import java.io.IOException; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.Const; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.hadoop.shim.HadoopConfiguration; import org.pentaho.hadoop.shim.spi.MockHadoopShim; /** * There are tests of DistributedCacheUtil using hadoop local file system implementation. So these tests requires * additional settings to be run on Windows: it needs to have <b>hadoop.home.dir</b> variable pointed to dir with * <i>\bin\winutils.exe</i> * <p> * Depending on possible issues with hadoop file system on Windows any of these tests can be skipped. E.g. using the * following code below: * * <pre> * <code> * // Don't run this test on Windows env * assumeTrue( !isWindows() ); * </code> * </pre> */ public class DistributedCacheUtilImplOSDependentTest { private static HadoopConfiguration TEST_CONFIG; private static String PLUGIN_BASE = null; private static final String OS_NAME = System.getProperty( "os.name", "unknown" ); protected static boolean isWindows() { return OS_NAME.startsWith( "Windows" ); } @BeforeClass public static void setup() throws Exception { // Create some Hadoop configuration specific pmr libraries TEST_CONFIG = new HadoopConfiguration( DistributedCacheTestUtil.createTestHadoopConfiguration( "bin/test/" + DistributedCacheUtilImplOSDependentTest.class.getSimpleName() ), "test-config", "name", new MockHadoopShim() ); PLUGIN_BASE = System.getProperty( Const.PLUGIN_BASE_FOLDERS_PROP ); // Fake out the "plugins" directory for the project's root directory System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject( "." ).getURL().toURI().getPath() ); } @AfterClass public static void teardown() { if ( PLUGIN_BASE != null ) { System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, PLUGIN_BASE ); } } @Test public void stageForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); // Copy the contents of test folder FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path( "bin/test/stageArchiveForCacheTest" ); Path dest = new Path( root, "org/pentaho/mapreduce/" ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); DistributedCacheTestUtil.stageForCacheTester( ch, source, fs, root, dest, 6, 6 ); } finally { source.delete( new AllFileSelector() ); } } @Test public void stageForCache_destination_exists() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path( "bin/test/stageForCache_destination_exists" ); Path dest = new Path( root, "dest" ); fs.mkdirs( dest ); assertTrue( fs.exists( dest ) ); assertTrue( fs.getFileStatus( dest ).isDir() ); DistributedCacheTestUtil.stageForCacheTester( ch, source, fs, root, dest, 6, 6 ); } finally { source.delete( new AllFileSelector() ); } } @Test public void stagePluginsForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); Path pluginsDir = new Path( "bin/test/plugins-installation-dir" ); FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent(); try { ch.stagePluginsForCache( fs, pluginsDir, "bin/test/sample-folder" ); Path pluginInstallPath = new Path( pluginsDir, "bin/test/sample-folder" ); assertTrue( fs.exists( pluginInstallPath ) ); ContentSummary summary = fs.getContentSummary( pluginInstallPath ); assertEquals( 6, summary.getFileCount() ); assertEquals( 6, summary.getDirectoryCount() ); } finally { pluginDir.delete( new AllFileSelector() ); fs.delete( pluginsDir, true ); } } @Test public void findFiles_hdfs_native() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); // Copy the contents of test folder FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); Path root = new Path( "bin/test/stageArchiveForCacheTest" ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); Path dest = new Path( root, "org/pentaho/mapreduce/" ); try { try { ch.stageForCache( source, fs, dest, true ); List<Path> files = ch.findFiles( fs, dest, null ); assertEquals( 5, files.size() ); files = ch.findFiles( fs, dest, Pattern.compile( ".*jar$" ) ); assertEquals( 2, files.size() ); files = ch.findFiles( fs, dest, Pattern.compile( ".*folder$" ) ); assertEquals( 1, files.size() ); } finally { fs.delete( root, true ); } } finally { source.delete( new AllFileSelector() ); } } @Test public void installKettleEnvironment() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); // This "empty pmr" contains a lib/ folder but with no content FileObject pmrArchive = KettleVFS.getFileObject( getClass().getResource( "/empty-pmr.zip" ).toURI().getPath() ); FileObject bigDataPluginDir = DistributedCacheTestUtil.createTestFolderWithContent( DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME ); Path root = new Path( "bin/test/installKettleEnvironment" ); try { ch.installKettleEnvironment( pmrArchive, fs, root, bigDataPluginDir, null ); assertTrue( ch.isKettleEnvironmentInstalledAt( fs, root ) ); } finally { bigDataPluginDir.delete( new AllFileSelector() ); fs.delete( root, true ); } } @Test public void installKettleEnvironment_additional_plugins() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); // This "empty pmr" contains a lib/ folder but with no content FileObject pmrArchive = KettleVFS.getFileObject( getClass().getResource( "/empty-pmr.zip" ).toURI().getPath() ); FileObject bigDataPluginDir = DistributedCacheTestUtil.createTestFolderWithContent( DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME ); String pluginName = "additional-plugin"; FileObject additionalPluginDir = DistributedCacheTestUtil.createTestFolderWithContent( pluginName ); Path root = new Path( "bin/test/installKettleEnvironment" ); try { ch.installKettleEnvironment( pmrArchive, fs, root, bigDataPluginDir, "bin/test/" + pluginName ); assertTrue( ch.isKettleEnvironmentInstalledAt( fs, root ) ); assertTrue( fs.exists( new Path( root, "plugins/bin/test/" + pluginName ) ) ); } finally { bigDataPluginDir.delete( new AllFileSelector() ); additionalPluginDir.delete( new AllFileSelector() ); fs.delete( root, true ); } } @Test public void isPmrInstalledAt() throws IOException { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); Path root = new Path( "bin/test/ispmrInstalledAt" ); Path lib = new Path( root, "lib" ); Path plugins = new Path( root, "plugins" ); Path bigDataPlugin = new Path( plugins, DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME ); Path lockFile = ch.getLockFileAt( root ); FSDataOutputStream lockFileOut = null; FSDataOutputStream bigDataPluginFileOut = null; try { // Create all directories (parent directories created automatically) fs.mkdirs( lib ); fs.mkdirs( bigDataPlugin ); assertTrue( ch.isKettleEnvironmentInstalledAt( fs, root ) ); // If lock file is there pmr is not installed lockFileOut = fs.create( lockFile ); assertFalse( ch.isKettleEnvironmentInstalledAt( fs, root ) ); // Try to create a file instead of a directory for the pentaho-big-data-plugin. This should be detected. fs.delete( bigDataPlugin, true ); bigDataPluginFileOut = fs.create( bigDataPlugin ); assertFalse( ch.isKettleEnvironmentInstalledAt( fs, root ) ); } finally { lockFileOut.close(); bigDataPluginFileOut.close(); fs.delete( root, true ); } } @Test public void configureWithPmr() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl( TEST_CONFIG ); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); // This "empty pmr" contains a lib/ folder and some empty kettle-*.jar files but no actual content FileObject pmrArchive = KettleVFS.getFileObject( getClass().getResource( "/empty-pmr.zip" ).toURI().getPath() ); FileObject bigDataPluginDir = DistributedCacheTestUtil.createTestFolderWithContent( DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME ); Path root = new Path( "bin/test/installKettleEnvironment" ); try { ch.installKettleEnvironment( pmrArchive, fs, root, bigDataPluginDir, null ); assertTrue( ch.isKettleEnvironmentInstalledAt( fs, root ) ); ch.configureWithKettleEnvironment( conf, fs, root ); // Make sure our libraries are on the classpathi assertTrue( conf.get( "mapred.cache.files" ).contains( "lib/kettle-core.jar" ) ); assertTrue( conf.get( "mapred.cache.files" ).contains( "lib/kettle-engine.jar" ) ); assertTrue( conf.get( "mapred.job.classpath.files" ).contains( "lib/kettle-core.jar" ) ); assertTrue( conf.get( "mapred.job.classpath.files" ).contains( "lib/kettle-engine.jar" ) ); // Make sure the configuration specific jar made it! assertTrue( conf.get( "mapred.cache.files" ).contains( "lib/configuration-specific.jar" ) ); // Make sure our plugins folder is registered assertTrue( conf.get( "mapred.cache.files" ).contains( "#plugins" ) ); // Make sure our libraries aren't included twice assertFalse( conf.get( "mapred.cache.files" ).contains( "#lib" ) ); // We should not have individual files registered assertFalse( conf.get( "mapred.cache.files" ).contains( "pentaho-big-data-plugin/jar1.jar" ) ); assertFalse( conf.get( "mapred.cache.files" ).contains( "pentaho-big-data-plugin/jar2.jar" ) ); assertFalse( conf.get( "mapred.cache.files" ).contains( "pentaho-big-data-plugin/folder/file.txt" ) ); } finally { bigDataPluginDir.delete( new AllFileSelector() ); fs.delete( root, true ); } } }