/* * Copyright 2014 GoDataDriven B.V. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.divolte.server.hdfs; import static io.divolte.server.hdfs.FileCreateAndSyncStrategy.HdfsOperationResult.*; import static io.divolte.server.processing.ItemProcessor.ProcessingDirective.*; import java.io.IOException; import java.util.Objects; import javax.annotation.ParametersAreNonnullByDefault; import javax.annotation.concurrent.NotThreadSafe; import io.divolte.server.config.HdfsSinkConfiguration; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.divolte.server.AvroRecordBuffer; import io.divolte.server.config.ValidatedConfiguration; import io.divolte.server.hdfs.FileCreateAndSyncStrategy.HdfsOperationResult; import io.divolte.server.processing.Item; import io.divolte.server.processing.ItemProcessor; @ParametersAreNonnullByDefault @NotThreadSafe public final class HdfsFlusher implements ItemProcessor<AvroRecordBuffer> { private final static Logger logger = LoggerFactory.getLogger(HdfsFlusher.class); private final FileCreateAndSyncStrategy fileStrategy; private HdfsOperationResult lastHdfsResult; public HdfsFlusher(final ValidatedConfiguration vc, final String name, final Schema schema) { Objects.requireNonNull(vc); final Configuration hdfsConfiguration = vc.configuration().global.hdfs.client .map(clientProperties -> { final Configuration configuration = new Configuration(false); for (final String propertyName : clientProperties.stringPropertyNames()) { configuration.set(propertyName, clientProperties.getProperty(propertyName)); } return configuration; }) .orElse(new Configuration()); /* * The HDFS client creates a JVM shutdown hook, which interferes with our own server shutdown hook. * This config option disabled the built in shutdown hook. We call FileSystem.closeAll() ourselves * in the server shutdown hook instead. */ hdfsConfiguration.setBoolean("fs.automatic.close", false); final FileSystem hadoopFs; try { hadoopFs = FileSystem.get(hdfsConfiguration); } catch (final IOException e) { /* * It is possible to create a FileSystem instance when HDFS is not available (e.g. NameNode down). * This exception only occurs when there is a configuration error in the URI (e.g. wrong scheme). * So we fail to start up in this case. Below we create the actual HDFS connection, by opening * files. If that fails, we do startup and initiate the regular retry cycle. */ logger.error("Could not initialize HDFS filesystem.", e); throw new RuntimeException("Could not initialize HDFS filesystem", e); } final short hdfsReplication = vc.configuration() .getSinkConfiguration(Objects.requireNonNull(name), HdfsSinkConfiguration.class).replication; fileStrategy = new SimpleRollingFileStrategy(vc, name, hadoopFs, hdfsReplication, Objects.requireNonNull(schema)); lastHdfsResult = fileStrategy.setup(); } @Override public void cleanup() { fileStrategy.cleanup(); } @Override public ProcessingDirective process(final Item<AvroRecordBuffer> item) { final AvroRecordBuffer record = item.payload; if (lastHdfsResult == SUCCESS) { return (lastHdfsResult = fileStrategy.append(record)) == SUCCESS ? CONTINUE : PAUSE; } else { return PAUSE; } } @Override public ProcessingDirective heartbeat() { lastHdfsResult = fileStrategy.heartbeat(); return lastHdfsResult == SUCCESS ? CONTINUE : PAUSE; } }