/* * Copyright 2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.hadoop.store.output; import static org.hamcrest.CoreMatchers.everyItem; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.endsWith; import static org.junit.Assert.assertNotNull; import java.util.ArrayList; import java.util.Collection; import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.AnnotationConfigApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.task.TaskExecutor; import org.springframework.data.hadoop.fs.FsShell; import org.springframework.data.hadoop.store.AbstractStoreTests; import org.springframework.data.hadoop.store.DataStoreWriter; import org.springframework.data.hadoop.store.TestUtils; import org.springframework.data.hadoop.store.event.DefaultStoreEventPublisher; import org.springframework.data.hadoop.store.event.LoggingListener; import org.springframework.data.hadoop.store.event.StoreEventPublisher; import org.springframework.data.hadoop.store.partition.PartitionKeyResolver; import org.springframework.data.hadoop.store.partition.PartitionResolver; import org.springframework.data.hadoop.store.partition.PartitionStrategy; import org.springframework.data.hadoop.store.strategy.naming.FileNamingStrategy; import org.springframework.data.hadoop.store.strategy.naming.RollingFileNamingStrategy; import org.springframework.data.hadoop.store.strategy.rollover.RolloverStrategy; import org.springframework.data.hadoop.store.strategy.rollover.SizeRolloverStrategy; import org.springframework.data.hadoop.test.context.HadoopDelegatingSmartContextLoader; import org.springframework.data.hadoop.test.context.MiniHadoopCluster; import org.springframework.data.hadoop.test.tests.Assume; import org.springframework.data.hadoop.test.tests.TestGroup; import org.springframework.scheduling.TaskScheduler; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler; import org.springframework.test.annotation.DirtiesContext; import org.springframework.test.annotation.DirtiesContext.ClassMode; import org.springframework.test.context.ContextConfiguration; @ContextConfiguration(loader=HadoopDelegatingSmartContextLoader.class, classes = PartitionTextFileWriterSmokeTests.EmptyConfig.class) @MiniHadoopCluster @DirtiesContext(classMode=ClassMode.AFTER_EACH_TEST_METHOD) public class PartitionTextFileWriterSmokeTests extends AbstractStoreTests { @Autowired private ApplicationContext context; @Autowired private org.apache.hadoop.conf.Configuration hadoopConfiguration; private final static String PATH1 = "/tmp/PartitionTextFileWriterSmokeTests/testWritePartitions/default"; private final static String PATH2 = "/tmp/PartitionTextFileWriterSmokeTests/testWritePartitionsWithContextClose/default"; private final static String PATH3 = "/tmp/PartitionTextFileWriterSmokeTests/testWritePartitionsWithRolloverAndContextClose/default"; @Test public void testWritePartitions() throws Exception { Assume.group(TestGroup.PERFORMANCE); AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext(); ctx.setParent(context); ctx.register(BaseConfig.class, Config1.class); ctx.refresh(); int threads = 30; int count = 20000; int iterations = 10; @SuppressWarnings("unchecked") PartitionTextFileWriter<String> writer = ctx.getBean("writer1", PartitionTextFileWriter.class); assertNotNull(writer); for (int i = 0; i < iterations; i++) { doConcurrentWrites(writer, threads, count); // do sleeps to kick of idle timeout and // enough time to rename the file from used postfix Thread.sleep(2000); } Thread.sleep(3000); Map<Path, DataStoreWriter<String>> writers = TestUtils.readField("writers", writer); TestUtils.printLsR(PATH1, getConfiguration()); assertThat(writers.size(), is(0)); writer.flush(); writer.close(); // assuming items in DATA09ARRAY have same length assertThat(getTotalWritten(PATH1), is((long) count * (DATA10.length() + 1) * threads * iterations)); @SuppressWarnings("resource") FsShell shell = new FsShell(getConfiguration()); Collection<FileStatus> files = shell.ls(true, PATH1); Collection<String> names = statusesToNames(files); assertThat(names, everyItem(not(endsWith("tmp")))); ctx.close(); } @Test public void testWritePartitionsWithContextClose() throws Exception { Assume.group(TestGroup.PERFORMANCE); AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext(); ctx.setParent(context); ctx.register(BaseConfig.class, Config2.class); ctx.refresh(); int threads = 30; int count = 20000; int iterations = 10; @SuppressWarnings("unchecked") PartitionTextFileWriter<String> writer = ctx.getBean("writer1", PartitionTextFileWriter.class); assertNotNull(writer); for (int i = 0; i < iterations; i++) { doConcurrentWrites(writer, threads, count); } ctx.close(); Map<Path, DataStoreWriter<String>> writers = TestUtils.readField("writers", writer); TestUtils.printLsR(PATH2, getConfiguration()); assertThat(writers.size(), is(0)); // assuming items in DATA09ARRAY have same length assertThat(getTotalWritten(PATH2), is((long) count * (DATA10.length() + 1) * threads * iterations)); @SuppressWarnings("resource") FsShell shell = new FsShell(getConfiguration()); Collection<FileStatus> files = shell.ls(true, PATH2); Collection<String> names = statusesToNames(files); assertThat(names, everyItem(not(endsWith("tmp")))); } @Test public void testWritePartitionsWithRolloverAndContextClose() throws Exception { Assume.group(TestGroup.PERFORMANCE); AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext(); ctx.setParent(context); ctx.register(BaseConfig.class, Config3.class); ctx.refresh(); int threads = 30; int count = 20000; int iterations = 10; @SuppressWarnings("unchecked") PartitionTextFileWriter<String> writer = ctx.getBean("writer1", PartitionTextFileWriter.class); assertNotNull(writer); for (int i = 0; i < iterations; i++) { doConcurrentWrites(writer, threads, count); } Thread.sleep(3000); ctx.close(); Map<Path, DataStoreWriter<String>> writers = TestUtils.readField("writers", writer); TestUtils.printLsR(PATH3, getConfiguration()); assertThat(writers.size(), is(0)); // assuming items in DATA09ARRAY have same length assertThat(getTotalWritten(PATH3), is((long) count * (DATA10.length() + 1) * threads * iterations)); @SuppressWarnings("resource") FsShell shell = new FsShell(getConfiguration()); Collection<FileStatus> files = shell.ls(true, PATH3); Collection<String> names = statusesToNames(files); assertThat(names, everyItem(not(endsWith("tmp")))); } private long getTotalWritten(String path) { @SuppressWarnings("resource") FsShell shell = new FsShell(hadoopConfiguration); long total = 0; for (FileStatus s : shell.ls(true, path)) { if (s.isFile()) { total += s.getLen(); } } return total; } private static Collection<String> statusesToNames(Collection<FileStatus> statuses) { Collection<String> names = new ArrayList<String>(); for (FileStatus s : statuses) { String p = s.getPath().toString(); int index = p.indexOf('/', 8); names.add(p.substring(index)); } return names; } private void doConcurrentWrites(final PartitionTextFileWriter<String> writer, int threadCount, final int writeCount) { final CountDownLatch latch = new CountDownLatch(1); final ArrayList<Thread> joins = new ArrayList<Thread>(); for (int i = 0; i < threadCount; ++i) { Runnable runner = new Runnable() { public void run() { try { latch.await(); for (int j = 0; j < writeCount; j++) { writer.write(DATA09ARRAY[j%DATA09ARRAY.length]); } } catch (Exception ie) { } } }; Thread t = new Thread(runner, "SmokeThread" + i); joins.add(t); t.start(); } latch.countDown(); for (Thread t : joins) { try { t.join(); } catch (InterruptedException e) { } } } private static class TestPartitionStrategy implements PartitionStrategy<String, String> { TestPartitionResolver partitionResolver = new TestPartitionResolver(); TestPartitionKeyResolver keyResolver = new TestPartitionKeyResolver(); @Override public PartitionResolver<String> getPartitionResolver() { return partitionResolver; } @Override public PartitionKeyResolver<String, String> getPartitionKeyResolver() { return keyResolver; } } private static class TestPartitionResolver implements PartitionResolver<String> { @Override public Path resolvePath(String partitionKey) { return new Path(partitionKey); } } private static class TestPartitionKeyResolver implements PartitionKeyResolver<String, String> { @Override public String resolvePartitionKey(String entity) { return entity.substring(0, 2); } } @Configuration public static class Config1 { @Autowired private org.apache.hadoop.conf.Configuration hadoopConfiguration; @Bean public Path testBasePath() { return new Path(PATH1); } @Bean public RollingFileNamingStrategy fileNamingStrategy() { return new RollingFileNamingStrategy(); } @Bean public PartitionStrategy<String, String> partitionStrategy() { return new TestPartitionStrategy(); } @Bean public PartitionTextFileWriter<String> writer1() { PartitionTextFileWriter<String> writer = new PartitionTextFileWriter<String>(hadoopConfiguration, testBasePath(), null, partitionStrategy()); writer.setIdleTimeout(1000); writer.setFileNamingStrategyFactory(fileNamingStrategy()); writer.setInWritingSuffix(".tmp"); return writer; } } @Configuration public static class Config2 { @Autowired private org.apache.hadoop.conf.Configuration hadoopConfiguration; @Bean public Path testBasePath() { return new Path(PATH2); } @Bean public RollingFileNamingStrategy fileNamingStrategy() { return new RollingFileNamingStrategy(); } @Bean public PartitionStrategy<String, String> partitionStrategy() { return new TestPartitionStrategy(); } @Bean public PartitionTextFileWriter<String> writer1() { PartitionTextFileWriter<String> writer = new PartitionTextFileWriter<String>(hadoopConfiguration, testBasePath(), null, partitionStrategy()); writer.setIdleTimeout(60000); writer.setFileNamingStrategyFactory(fileNamingStrategy()); writer.setInWritingSuffix(".tmp"); return writer; } } @Configuration public static class Config3 { @Autowired private org.apache.hadoop.conf.Configuration hadoopConfiguration; @Bean public Path testBasePath() { return new Path(PATH3); } @Bean public FileNamingStrategy fileNamingStrategy() { return new RollingFileNamingStrategy(); } @Bean public RolloverStrategy rolloverStrategy() { return new SizeRolloverStrategy("1M"); } @Bean public PartitionStrategy<String, String> partitionStrategy() { return new TestPartitionStrategy(); } @Bean public PartitionTextFileWriter<String> writer1() { PartitionTextFileWriter<String> writer = new PartitionTextFileWriter<String>(hadoopConfiguration, testBasePath(), null, partitionStrategy()); writer.setIdleTimeout(1000); writer.setFileNamingStrategyFactory(fileNamingStrategy()); writer.setRolloverStrategyFactory(rolloverStrategy()); writer.setInWritingSuffix(".tmp"); return writer; } } @Configuration public static class BaseConfig { @Bean public TaskExecutor taskExecutor() { return new ThreadPoolTaskExecutor(); } @Bean public TaskScheduler taskScheduler() { return new ThreadPoolTaskScheduler(); } @Bean public StoreEventPublisher storeEventPublisher() { return new DefaultStoreEventPublisher(); } @Bean public LoggingListener loggingListener() { return new LoggingListener("INFO"); } } @Configuration static class EmptyConfig { } }