/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.hdfstests; import org.apache.commons.io.FileUtils; import org.apache.flink.api.common.JobID; import org.apache.flink.configuration.ConfigConstants; import org.apache.flink.core.fs.FileStatus; import org.apache.flink.core.fs.FileSystem; import org.apache.flink.core.fs.Path; import org.apache.flink.core.testutils.CommonTestUtils; import org.apache.flink.runtime.state.CheckpointStreamFactory; import org.apache.flink.runtime.state.StateBackendTestBase; import org.apache.flink.runtime.state.filesystem.FileStateHandle; import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.runtime.state.memory.ByteStreamStateHandle; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.Random; import java.util.UUID; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class FileStateBackendTest extends StateBackendTestBase<FsStateBackend> { private static File TEMP_DIR; private static String HDFS_ROOT_URI; private static MiniDFSCluster HDFS_CLUSTER; private static FileSystem FS; // ------------------------------------------------------------------------ // startup / shutdown // ------------------------------------------------------------------------ @BeforeClass public static void createHDFS() { try { TEMP_DIR = new File(ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH, UUID.randomUUID().toString()); Configuration hdConf = new Configuration(); hdConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEMP_DIR.getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(hdConf); HDFS_CLUSTER = builder.build(); HDFS_ROOT_URI = "hdfs://" + HDFS_CLUSTER.getURI().getHost() + ":" + HDFS_CLUSTER.getNameNodePort() + "/"; FS = FileSystem.get(new URI(HDFS_ROOT_URI)); } catch (Exception e) { e.printStackTrace(); fail("Could not create HDFS mini cluster " + e.getMessage()); } } @AfterClass public static void destroyHDFS() { try { HDFS_CLUSTER.shutdown(); FileUtils.deleteDirectory(TEMP_DIR); } catch (Exception ignored) {} } @Override protected FsStateBackend getStateBackend() throws Exception { URI stateBaseURI = new URI(HDFS_ROOT_URI + UUID.randomUUID().toString()); return new FsStateBackend(stateBaseURI); } // ------------------------------------------------------------------------ // Tests // ------------------------------------------------------------------------ // disable these because the verification does not work for this state backend @Override @Test public void testValueStateRestoreWithWrongSerializers() {} @Override @Test public void testListStateRestoreWithWrongSerializers() {} @Override @Test public void testReducingStateRestoreWithWrongSerializers() {} @Override @Test public void testMapStateRestoreWithWrongSerializers() {} @Test public void testStateOutputStream() { URI basePath = randomHdfsFileUri(); try { FsStateBackend backend = CommonTestUtils.createCopySerializable(new FsStateBackend(basePath, 15)); JobID jobId = new JobID(); CheckpointStreamFactory streamFactory = backend.createStreamFactory(jobId, "test_op"); // we know how FsCheckpointStreamFactory is implemented so we know where it // will store checkpoints Path checkpointPath = new Path(new Path(basePath), jobId.toString()); byte[] state1 = new byte[1274673]; byte[] state2 = new byte[1]; byte[] state3 = new byte[0]; byte[] state4 = new byte[177]; Random rnd = new Random(); rnd.nextBytes(state1); rnd.nextBytes(state2); rnd.nextBytes(state3); rnd.nextBytes(state4); long checkpointId = 97231523452L; CheckpointStreamFactory.CheckpointStateOutputStream stream1 = streamFactory.createCheckpointStateOutputStream(checkpointId, System.currentTimeMillis()); CheckpointStreamFactory.CheckpointStateOutputStream stream2 = streamFactory.createCheckpointStateOutputStream(checkpointId, System.currentTimeMillis()); CheckpointStreamFactory.CheckpointStateOutputStream stream3 = streamFactory.createCheckpointStateOutputStream(checkpointId, System.currentTimeMillis()); stream1.write(state1); stream2.write(state2); stream3.write(state3); FileStateHandle handle1 = (FileStateHandle) stream1.closeAndGetHandle(); ByteStreamStateHandle handle2 = (ByteStreamStateHandle) stream2.closeAndGetHandle(); ByteStreamStateHandle handle3 = (ByteStreamStateHandle) stream3.closeAndGetHandle(); // use with try-with-resources FileStateHandle handle4; try (CheckpointStreamFactory.CheckpointStateOutputStream stream4 = streamFactory.createCheckpointStateOutputStream(checkpointId, System.currentTimeMillis())) { stream4.write(state4); handle4 = (FileStateHandle) stream4.closeAndGetHandle(); } // close before accessing handle CheckpointStreamFactory.CheckpointStateOutputStream stream5 = streamFactory.createCheckpointStateOutputStream(checkpointId, System.currentTimeMillis()); stream5.write(state4); stream5.close(); try { stream5.closeAndGetHandle(); fail(); } catch (IOException e) { // uh-huh } validateBytesInStream(handle1.openInputStream(), state1); handle1.discardState(); assertFalse(isDirectoryEmpty(checkpointPath)); ensureFileDeleted(handle1.getFilePath()); validateBytesInStream(handle2.openInputStream(), state2); handle2.discardState(); // stream 3 has zero bytes, so it should not return anything assertNull(handle3); validateBytesInStream(handle4.openInputStream(), state4); handle4.discardState(); assertTrue(isDirectoryEmpty(checkpointPath)); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } } // ------------------------------------------------------------------------ // Utilities // ------------------------------------------------------------------------ private static void ensureFileDeleted(Path path) { try { assertFalse(FS.exists(path)); } catch (IOException ignored) {} } private static boolean isDirectoryEmpty(URI directory) { return isDirectoryEmpty(new Path(directory)); } private static boolean isDirectoryEmpty(Path directory) { try { FileStatus[] nested = FS.listStatus(directory); return nested == null || nested.length == 0; } catch (IOException e) { return true; } } private static URI randomHdfsFileUri() { String uriString = HDFS_ROOT_URI + UUID.randomUUID().toString(); try { return new URI(uriString); } catch (URISyntaxException e) { throw new RuntimeException("Invalid test directory URI: " + uriString, e); } } private static void validateBytesInStream(InputStream is, byte[] data) throws IOException { try { byte[] holder = new byte[data.length]; int pos = 0; int read; while (pos < holder.length && (read = is.read(holder, pos, holder.length - pos)) != -1) { pos += read; } assertEquals("not enough data", holder.length, pos); assertEquals("too much data", -1, is.read()); assertArrayEquals("wrong data", data, holder); } finally { is.close(); } } }