DatasetSink.java example

Explorer

flume-master
- flume-trunk
  - flume-ng-auth
    - src
      - main
        java
        org
        apache
        flume
        api
        SecureRpcClientFactory.java
        SecureThriftRpcClient.java
        auth
        FlumeAuthenticationUtil.java
        FlumeAuthenticator.java
        KerberosAuthenticator.java
        KerberosUser.java
        PrivilegedExecutor.java
        SecurityException.java
        SimpleAuthenticator.java
        UGIExecutor.java
      - test
        java
        org
        apache
        flume
        auth
        TestFlumeAuthenticator.java
  - flume-ng-channels
    - flume-file-channel
      - src
        main
        java
        org
        apache
        flume
        channel
        file
        BadCheckpointException.java
        CheckpointRebuilder.java
        Commit.java
        CorruptEventException.java
        EventQueueBackingStore.java
        EventQueueBackingStoreFactory.java
        EventQueueBackingStoreFile.java
        EventQueueBackingStoreFileV2.java
        EventQueueBackingStoreFileV3.java
        EventUtils.java
        FileChannel.java
        FileChannelConfiguration.java
        FlumeEvent.java
        FlumeEventPointer.java
        FlumeEventQueue.java
        Log.java
        LogFile.java
        LogFileFactory.java
        LogFileRetryableIOException.java
        LogFileV2.java
        LogFileV3.java
        LogRecord.java
        LogUtils.java
        NoopRecordException.java
        Pair.java
        Put.java
        ReplayHandler.java
        Rollback.java
        Serialization.java
        Take.java
        TransactionEventRecord.java
        TransactionIDOracle.java
        Writable.java
        WritableUtils.java
        WriteOrderOracle.java
        encryption
        AESCTRNoPaddingProvider.java
        CipherProvider.java
        CipherProviderFactory.java
        CipherProviderType.java
        DecryptionFailureException.java
        EncryptionConfiguration.java
        JCEFileKeyProvider.java
        KeyProvider.java
        KeyProviderFactory.java
        KeyProviderType.java
        instrumentation
        FileChannelCounter.java
        FileChannelCounterMBean.java
        proto
        ProtosFactory.java
        test
        java
        org
        apache
        flume
        channel
        file
        CountingSinkRunner.java
        CountingSourceRunner.java
        TestCheckpoint.java
        TestCheckpointRebuilder.java
        TestEventQueueBackingStoreFactory.java
        TestEventUtils.java
        TestFileChannel.java
        TestFileChannelBase.java
        TestFileChannelFormatRegression.java
        TestFileChannelRestart.java
        TestFileChannelRollback.java
        TestFlumeEvent.java
        TestFlumeEventPointer.java
        TestFlumeEventQueue.java
        TestIntegration.java
        TestLog.java
        TestLogFile.java
        TestLogRecord.java
        TestTransactionEventRecordV2.java
        TestTransactionEventRecordV3.java
        TestTransactionIDOracle.java
        TestUtils.java
        TestWriteOrderOracle.java
        encryption
        CipherProviderTestSuite.java
        EncryptionTestUtils.java
        TestAESCTRNoPaddingProvider.java
        TestFileChannelEncryption.java
        TestJCEFileKeyProvider.java
    - flume-jdbc-channel
      - src
        main
        java
        org
        apache
        flume
        channel
        jdbc
        ConfigurationConstants.java
        DatabaseType.java
        JdbcChannel.java
        JdbcChannelException.java
        JdbcChannelProvider.java
        JdbcChannelProviderFactory.java
        TransactionIsolation.java
        impl
        DerbySchemaHandler.java
        JdbcChannelProviderImpl.java
        JdbcTransactionFactory.java
        JdbcTransactionImpl.java
        MySQLSchemaHandler.java
        PersistableEvent.java
        SchemaHandler.java
        SchemaHandlerFactory.java
        test
        java
        org
        apache
        flume
        channel
        jdbc
        BaseJdbcChannelProviderTest.java
        MockEvent.java
        MockEventUtils.java
        TestDatabaseTypeEnum.java
        TestDerbySchemaHandlerQueries.java
        TestJdbcChannelProvider.java
        TestJdbcChannelProviderNoFK.java
        TestPersistentEvent.java
        TestTransactionIsolationLevelEnum.java
    - flume-kafka-channel
      - src
        main
        java
        org
        apache
        flume
        channel
        kafka
        KafkaChannel.java
        KafkaChannelConfiguration.java
        test
        java
        org
        apache
        flume
        channel
        kafka
        TestKafkaChannel.java
    - flume-spillable-memory-channel
      - src
        main
        java
        org
        apache
        flume
        channel
        SpillableMemoryChannel.java
        test
        java
        org
        apache
        flume
        channel
        TestSpillableMemoryChannel.java
  - flume-ng-clients
    - flume-ng-log4jappender
      - src
        main
        java
        org
        apache
        flume
        clients
        log4jappender
        LoadBalancingLog4jAppender.java
        Log4jAppender.java
        Log4jAvroHeaders.java
        test
        java
        org
        apache
        flume
        clients
        log4jappender
        TestLoadBalancingLog4jAppender.java
        TestLog4jAppender.java
        TestLog4jAppenderWithAvro.java
  - flume-ng-configuration
    - src
      - main
        java
        org
        apache
        flume
        Context.java
        conf
        BasicConfigurationConstants.java
        ComponentConfiguration.java
        ComponentConfigurationFactory.java
        ConfigurationException.java
        FlumeConfiguration.java
        FlumeConfigurationError.java
        FlumeConfigurationErrorType.java
        LogPrivacyUtil.java
        channel
        ChannelConfiguration.java
        ChannelSelectorConfiguration.java
        ChannelSelectorType.java
        ChannelType.java
        sink
        SinkConfiguration.java
        SinkGroupConfiguration.java
        SinkProcessorConfiguration.java
        SinkProcessorType.java
        SinkType.java
        source
        SourceConfiguration.java
        SourceType.java
      - test
        java
        org
        apache
        flume
        conf
        TestFlumeConfiguration.java
        source
        TestSourceConfiguration.java
  - flume-ng-core
    - src
      - main
        java
        org
        apache
        flume
        Channel.java
        ChannelException.java
        ChannelFactory.java
        ChannelFullException.java
        ChannelSelector.java
        Clock.java
        Constants.java
        CounterGroup.java
        EventDrivenSource.java
        NamedComponent.java
        PollableSource.java
        Sink.java
        SinkFactory.java
        SinkProcessor.java
        SinkRunner.java
        Source.java
        SourceFactory.java
        SourceRunner.java
        SystemClock.java
        Transaction.java
        VersionAnnotation.java
        annotations
        Disposable.java
        InterfaceAudience.java
        InterfaceStability.java
        Recyclable.java
        channel
        AbstractChannel.java
        AbstractChannelSelector.java
        BasicChannelSemantics.java
        BasicTransactionSemantics.java
        ChannelProcessor.java
        ChannelSelectorFactory.java
        ChannelUtils.java
        DefaultChannelFactory.java
        MemoryChannel.java
        MultiplexingChannelSelector.java
        PseudoTxnMemoryChannel.java
        ReplicatingChannelSelector.java
        client
        avro
        AvroCLIClient.java
        EventReader.java
        ReliableEventReader.java
        ReliableSpoolingFileEventReader.java
        SimpleTextLineEventReader.java
        conf
        Configurable.java
        ConfigurableComponent.java
        Configurables.java
        event
        EventHelper.java
        formatter
        output
        BucketPath.java
        DefaultPathManager.java
        EventFormatter.java
        PathManager.java
        PathManagerFactory.java
        PathManagerType.java
        RollTimePathManager.java
        TextDelimitedOutputFormatter.java
        instrumentation
        ChannelCounter.java
        ChannelCounterMBean.java
        ChannelProcessorCounter.java
        GangliaServer.java
        MonitorService.java
        MonitoredCounterGroup.java
        MonitoringType.java
        SinkCounter.java
        SinkCounterMBean.java
        SinkProcessorCounter.java
        SourceCounter.java
        SourceCounterMBean.java
        http
        HTTPMetricsServer.java
        kafka
        KafkaChannelCounter.java
        KafkaChannelCounterMBean.java
        KafkaSinkCounter.java
        KafkaSinkCounterMBean.java
        KafkaSourceCounter.java
        KafkaSourceCounterMBean.java
        util
        JMXPollUtil.java
        interceptor
        HostInterceptor.java
        Interceptor.java
        InterceptorBuilderFactory.java
        InterceptorChain.java
        InterceptorType.java
        RegexExtractorInterceptor.java
        RegexExtractorInterceptorMillisSerializer.java
        RegexExtractorInterceptorPassThroughSerializer.java
        RegexExtractorInterceptorSerializer.java
        RegexFilteringInterceptor.java
        RemoveHeaderInterceptor.java
        SearchAndReplaceInterceptor.java
        StaticInterceptor.java
        TimestampInterceptor.java
        lifecycle
        LifecycleAware.java
        LifecycleController.java
        LifecycleException.java
        LifecycleState.java
        LifecycleSupervisor.java
        serialization
        AbstractAvroEventSerializer.java
        AvroEventDeserializer.java
        AvroEventSerializerConfigurationConstants.java
        BodyTextEventSerializer.java
        DecodeErrorPolicy.java
        DurablePositionTracker.java
        EventDeserializer.java
        EventDeserializerFactory.java
        EventDeserializerType.java
        EventSerDe.java
        EventSerializer.java
        EventSerializerFactory.java
        EventSerializerType.java
        FlumeEventAvroEventSerializer.java
        HeaderAndBodyTextEventSerializer.java
        LengthMeasurable.java
        LineDeserializer.java
        PositionTracker.java
        RemoteMarkable.java
        Resettable.java
        ResettableFileInputStream.java
        ResettableInputStream.java
        Seekable.java
        sink
        AbstractRpcSink.java
        AbstractSink.java
        AbstractSinkProcessor.java
        AbstractSinkSelector.java
        AvroSink.java
        DefaultSinkFactory.java
        DefaultSinkProcessor.java
        FailoverSinkProcessor.java
        LoadBalancingSinkProcessor.java
        LoggerSink.java
        NullSink.java
        RollingFileSink.java
        SinkGroup.java
        SinkProcessorFactory.java
        ThriftSink.java
        source
        AbstractEventDrivenSource.java
        AbstractPollableSource.java
        AbstractSource.java
        AvroSource.java
        BasicSourceSemantics.java
        DefaultSourceFactory.java
        EventDrivenSourceRunner.java
        ExecSource.java
        ExecSourceConfigurationConstants.java
        MultiportSyslogTCPSource.java
        NetcatSource.java
        NetcatSourceConfigurationConstants.java
        PollableSourceConstants.java
        PollableSourceRunner.java
        SequenceGeneratorSource.java
        SpoolDirectorySource.java
        SpoolDirectorySourceConfigurationConstants.java
        StressSource.java
        SyslogParser.java
        SyslogSourceConfigurationConstants.java
        SyslogTcpSource.java
        SyslogUDPSource.java
        SyslogUtils.java
        ThriftSource.java
        http
        BLOBHandler.java
        HTTPBadRequestException.java
        HTTPSource.java
        HTTPSourceConfigurationConstants.java
        HTTPSourceHandler.java
        JSONHandler.java
        tools
        DirectMemoryUtils.java
        GetJavaProperty.java
        HTTPServerConstraintUtil.java
        PlatformDetect.java
        TimestampRoundDownUtil.java
        VersionInfo.java
      - test
        java
        org
        apache
        flume
        TestContext.java
        TestCounterGroup.java
        channel
        AbstractBasicChannelSemanticsTest.java
        MockChannel.java
        MockEvent.java
        TestBasicChannelSemantics.java
        TestChannelProcessor.java
        TestChannelUtils.java
        TestMemoryChannel.java
        TestMemoryChannelConcurrency.java
        TestMemoryChannelTransaction.java
        TestMultiplexingChannelSelector.java
        TestReplicatingChannelSelector.java
        client
        avro
        TestBufferedLineReader.java
        TestReliableSpoolingFileEventReader.java
        TestSpoolingFileLineReader.java
        event
        TestEventHelper.java
        formatter
        output
        TestBucketPath.java
        instrumentation
        TestMonitoredCounterGroup.java
        http
        TestHTTPMetricsServer.java
        kafka
        KafkaSourceCounterTest.java
        util
        JMXTestUtils.java
        TestJMXPollUtil.java
        interceptor
        CensoringInterceptor.java
        RemoveHeaderInterceptorTest.java
        TestCensoringInterceptor.java
        TestHostInterceptor.java
        TestRegexExtractorInterceptor.java
        TestRegexExtractorInterceptorMillisSerializer.java
        TestRegexExtractorInterceptorPassThroughSerializer.java
        TestRegexFilteringInterceptor.java
        TestSearchAndReplaceInterceptor.java
        TestStaticInterceptor.java
        TestTimestampInterceptor.java
        lifecycle
        TestLifecycleController.java
        TestLifecycleSupervisor.java
        serialization
        ResettableTestStringInputStream.java
        SyslogAvroEventSerializer.java
        TestAvroEventDeserializer.java
        TestBodyTextEventSerializer.java
        TestDurablePositionTracker.java
        TestFlumeEventAvroEventSerializer.java
        TestHeaderAndBodyTextEventSerializer.java
        TestLineDeserializer.java
        TestResettableFileInputStream.java
        TestSyslogAvroEventSerializer.java
        TransientPositionTracker.java
        sink
        FixedOrderSelector.java
        SinkProcessorFactoryTest.java
        TestAvroSink.java
        TestDefaultSinkFactory.java
        TestFailoverSinkProcessor.java
        TestLoadBalancingSinkProcessor.java
        TestLoggerSink.java
        TestRollingFileSink.java
        TestThriftSink.java
        source
        MockSource.java
        TestAbstractPollableSource.java
        TestAvroSource.java
        TestBasicSourceSemantics.java
        TestDefaultSourceFactory.java
        TestExecSource.java
        TestMultiportSyslogTCPSource.java
        TestNetcatSource.java
        TestPollableSourceRunner.java
        TestSequenceGeneratorSource.java
        TestSpoolDirectorySource.java
        TestStressSource.java
        TestSyslogParser.java
        TestSyslogTcpSource.java
        TestSyslogUdpSource.java
        TestSyslogUtils.java
        TestThriftSource.java
        http
        FlumeHttpServletRequestWrapper.java
        TestBLOBHandler.java
        TestHTTPSource.java
        TestJSONHandler.java
        tools
        TestTimestampRoundDownUtil.java
        TestVersionInfo.java
  - flume-ng-embedded-agent
    - src
      - main
        java
        org
        apache
        flume
        agent
        embedded
        EmbeddedAgent.java
        EmbeddedAgentConfiguration.java
        EmbeddedSource.java
        MaterializedConfigurationProvider.java
        MemoryConfigurationProvider.java
        package-info.java
      - test
        java
        org
        apache
        flume
        agent
        embedded
        TestEmbeddedAgent.java
        TestEmbeddedAgentConfiguration.java
        TestEmbeddedAgentEmbeddedSource.java
        TestEmbeddedAgentState.java
  - flume-ng-legacy-sources
    - flume-avro-source
      - src
        main
        java
        org
        apache
        flume
        source
        avroLegacy
        AvroLegacySource.java
        test
        java
        org
        apache
        flume
        source
        avroLegacy
        TestLegacyAvroSource.java
    - flume-thrift-source
      - src
        main
        java
        com
        cloudera
        flume
        handlers
        thrift
        EventStatus.java
        Priority.java
        ThriftFlumeEvent.java
        ThriftFlumeEventServer.java
        org
        apache
        flume
        source
        thriftLegacy
        ThriftLegacySource.java
        test
        java
        org
        apache
        flume
        source
        thriftLegacy
        TestThriftLegacySource.java
  - flume-ng-node
    - src
      - main
        java
        org
        apache
        flume
        node
        AbstractConfigurationProvider.java
        AbstractZooKeeperConfigurationProvider.java
        Application.java
        ConfigurationProvider.java
        EnvVarResolverProperties.java
        MaterializedConfiguration.java
        PollingPropertiesFileConfigurationProvider.java
        PollingZooKeeperConfigurationProvider.java
        PropertiesFileConfigurationProvider.java
        SimpleMaterializedConfiguration.java
        StaticZooKeeperConfigurationProvider.java
      - test
        java
        org
        apache
        flume
        node
        TestAbstractConfigurationProvider.java
        TestAbstractZooKeeperConfigurationProvider.java
        TestApplication.java
        TestEnvVarResolverProperties.java
        TestPollingPropertiesFileConfigurationProvider.java
        TestPollingZooKeeperConfigurationProvider.java
        TestPropertiesFileConfigurationProvider.java
        TestStaticZooKeeperConfigurationProvider.java
        source
        TestNetcatSource.java
  - flume-ng-sdk
    - src
      - main
        java
        org
        apache
        flume
        Event.java
        EventDeliveryException.java
        FlumeException.java
        api
        AbstractRpcClient.java
        FailoverRpcClient.java
        HostInfo.java
        LoadBalancingRpcClient.java
        NettyAvroRpcClient.java
        RpcClient.java
        RpcClientConfigurationConstants.java
        RpcClientFactory.java
        ThriftRpcClient.java
        event
        EventBuilder.java
        JSONEvent.java
        SimpleEvent.java
        thrift
        Status.java
        ThriftFlumeEvent.java
        ThriftSourceProtocol.java
        util
        OrderSelector.java
        RandomOrderSelector.java
        RoundRobinOrderSelector.java
        SpecificOrderIterator.java
      - test
        java
        org
        apache
        flume
        api
        RpcTestUtils.java
        TestFailoverRpcClient.java
        TestLoadBalancingRpcClient.java
        TestNettyAvroRpcClient.java
        TestRpcClientFactory.java
        TestThriftRpcClient.java
        ThriftTestingSource.java
        event
        TestEventBuilder.java
  - flume-ng-sinks
    - flume-dataset-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        kite
        DatasetSink.java
        DatasetSinkConstants.java
        NonRecoverableEventException.java
        parser
        AvroParser.java
        EntityParser.java
        EntityParserFactory.java
        policy
        FailurePolicy.java
        FailurePolicyFactory.java
        RetryPolicy.java
        SavePolicy.java
        test
        java
        org
        apache
        flume
        sink
        kite
        TestDatasetSink.java
    - flume-hdfs-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        hdfs
        AbstractHDFSWriter.java
        AvroEventSerializer.java
        BucketClosedException.java
        BucketWriter.java
        HDFSCompressedDataStream.java
        HDFSDataStream.java
        HDFSEventSink.java
        HDFSSequenceFile.java
        HDFSTextSerializer.java
        HDFSWritableSerializer.java
        HDFSWriter.java
        HDFSWriterFactory.java
        KerberosUser.java
        SequenceFileSerializer.java
        SequenceFileSerializerFactory.java
        SequenceFileSerializerType.java
        test
        java
        org
        apache
        flume
        sink
        hdfs
        HDFSBadDataStream.java
        HDFSTestSeqWriter.java
        HDFSTestWriterFactory.java
        MockDataStream.java
        MockFileSystem.java
        MockFsDataOutputStream.java
        MockHDFSWriter.java
        MyCustomSerializer.java
        TestAvroEventSerializer.java
        TestBucketWriter.java
        TestHDFSCompressedDataStream.java
        TestHDFSEventSink.java
        TestHDFSEventSinkOnMiniCluster.java
        TestSequenceFileSerializerFactory.java
        TestUseRawLocalFileSystem.java
    - flume-hive-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        hive
        Config.java
        HiveDelimitedTextSerializer.java
        HiveEventSerializer.java
        HiveJsonSerializer.java
        HiveSink.java
        HiveWriter.java
        test
        java
        org
        apache
        flume
        sink
        hive
        TestHiveSink.java
        TestHiveWriter.java
        TestUtil.java
    - flume-http-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        http
        HttpSink.java
        package-info.java
        test
        java
        org
        apache
        flume
        sink
        http
        TestHttpSink.java
        TestHttpSinkIT.java
    - flume-irc-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        irc
        IRCSink.java
        test
        java
        org
        apache
        flume
        sink
        irc
        TestIRCSink.java
    - flume-ng-elasticsearch-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        elasticsearch
        AbstractElasticSearchIndexRequestBuilderFactory.java
        ContentBuilderUtil.java
        ElasticSearchDynamicSerializer.java
        ElasticSearchEventSerializer.java
        ElasticSearchIndexRequestBuilderFactory.java
        ElasticSearchLogStashEventSerializer.java
        ElasticSearchSink.java
        ElasticSearchSinkConstants.java
        EventSerializerIndexRequestBuilderFactory.java
        IndexNameBuilder.java
        SimpleIndexNameBuilder.java
        TimeBasedIndexNameBuilder.java
        TimestampedEvent.java
        client
        ElasticSearchClient.java
        ElasticSearchClientFactory.java
        ElasticSearchRestClient.java
        ElasticSearchTransportClient.java
        NoSuchClientTypeException.java
        RoundRobinList.java
        test
        java
        org
        apache
        flume
        sink
        elasticsearch
        AbstractElasticSearchSinkTest.java
        TestElasticSearchDynamicSerializer.java
        TestElasticSearchIndexRequestBuilderFactory.java
        TestElasticSearchLogStashEventSerializer.java
        TestElasticSearchSink.java
        TestElasticSearchSinkCreation.java
        TimeBasedIndexNameBuilderTest.java
        TimestampedEventTest.java
        client
        RoundRobinListTest.java
        TestElasticSearchClientFactory.java
        TestElasticSearchRestClient.java
        TestElasticSearchTransportClient.java
    - flume-ng-hbase-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        hbase
        AsyncHBaseSink.java
        AsyncHbaseEventSerializer.java
        BatchAware.java
        HBaseSink.java
        HBaseSinkConfigurationConstants.java
        HbaseEventSerializer.java
        RegexHbaseEventSerializer.java
        SimpleAsyncHbaseEventSerializer.java
        SimpleHbaseEventSerializer.java
        SimpleRowKeyGenerator.java
        test
        java
        org
        apache
        flume
        sink
        hbase
        IncrementAsyncHBaseSerializer.java
        IncrementHBaseSerializer.java
        MockSimpleHbaseEventSerializer.java
        TestAsyncHBaseSink.java
        TestHBaseSink.java
        TestHBaseSinkCreation.java
        TestRegexHbaseEventSerializer.java
    - flume-ng-kafka-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        kafka
        KafkaSink.java
        KafkaSinkConstants.java
        test
        java
        org
        apache
        flume
        sink
        kafka
        TestConstants.java
        TestKafkaSink.java
        util
        KafkaConsumer.java
        KafkaLocal.java
        TestUtil.java
        ZooKeeperLocal.java
    - flume-ng-morphline-solr-sink
      - src
        main
        java
        org
        apache
        flume
        sink
        solr
        morphline
        BlobDeserializer.java
        BlobHandler.java
        MorphlineHandler.java
        MorphlineHandlerImpl.java
        MorphlineInterceptor.java
        MorphlineSink.java
        MorphlineSolrSink.java
        UUIDInterceptor.java
        test
        java
        org
        apache
        flume
        sink
        solr
        morphline
        EmbeddedSource.java
        FlumeHttpServletRequestWrapper.java
        ResettableTestStringInputStream.java
        TestBlobDeserializer.java
        TestBlobHandler.java
        TestEnvironment.java
        TestMorphlineInterceptor.java
        TestMorphlineSolrSink.java
        TestUUIDInterceptor.java
  - flume-ng-sources
    - flume-jms-source
      - src
        main
        java
        org
        apache
        flume
        source
        jms
        DefaultJMSMessageConverter.java
        InitialContextFactory.java
        JMSDestinationLocator.java
        JMSDestinationType.java
        JMSMessageConsumer.java
        JMSMessageConsumerFactory.java
        JMSMessageConverter.java
        JMSSource.java
        JMSSourceConfiguration.java
        test
        java
        org
        apache
        flume
        source
        jms
        JMSMessageConsumerTestBase.java
        TestDefaultJMSMessageConverter.java
        TestIntegrationActiveMQ.java
        TestJMSMessageConsumer.java
        TestJMSSource.java
        TestJMSSourceCreation.java
    - flume-kafka-source
      - src
        main
        java
        org
        apache
        flume
        source
        kafka
        KafkaSource.java
        KafkaSourceConstants.java
        test
        java
        org
        apache
        flume
        source
        kafka
        KafkaSourceEmbeddedKafka.java
        KafkaSourceEmbeddedZookeeper.java
        TestKafkaSource.java
    - flume-scribe-source
      - src
        main
        java
        org
        apache
        flume
        source
        scribe
        LogEntry.java
        ResultCode.java
        Scribe.java
        ScribeSource.java
        test
        java
        org
        apache
        flume
        source
        scribe
        TestScribeSource.java
    - flume-taildir-source
      - src
        main
        java
        org
        apache
        flume
        source
        taildir
        ReliableTaildirEventReader.java
        TailFile.java
        TaildirMatcher.java
        TaildirSource.java
        TaildirSourceConfigurationConstants.java
        test
        java
        org
        apache
        flume
        source
        taildir
        TestTaildirEventReader.java
        TestTaildirMatcher.java
        TestTaildirSource.java
    - flume-twitter-source
      - src
        main
        java
        org
        apache
        flume
        source
        twitter
        TwitterSource.java
        test
        java
        org
        apache
        flume
        source
        twitter
        TestTwitterSource.java
  - flume-ng-tests
    - src
      - main
        java
        org
        apache
        flume
        Dummy.java
      - test
        java
        org
        apache
        flume
        test
        agent
        TestFileChannel.java
        TestRpcClient.java
        TestRpcClientCommunicationFailure.java
        TestSpooldirSource.java
        TestSyslogSource.java
        util
        StagedInstall.java
        SyslogAgent.java
  - flume-shared
    - flume-shared-kafka-test
      - src
        main
        java
        org
        apache
        flume
        shared
        kafka
        test
        KafkaPartitionTestUtil.java
        PartitionOption.java
        PartitionTestScenario.java
  - flume-tools
    - src
      - main
        java
        org
        apache
        flume
        tools
        EventValidator.java
        FileChannelIntegrityTool.java
        FlumeTool.java
        FlumeToolType.java
        FlumeToolsMain.java
      - test
        java
        org
        apache
        flume
        tools
        TestFileChannelIntegrityTool.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flume.sink.kite;

import org.apache.flume.auth.FlumeAuthenticationUtil;
import org.apache.flume.auth.PrivilegedExecutor;
import org.apache.flume.sink.kite.parser.EntityParserFactory;
import org.apache.flume.sink.kite.parser.EntityParser;
import org.apache.flume.sink.kite.policy.FailurePolicy;
import org.apache.flume.sink.kite.policy.FailurePolicyFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;

import java.net.URI;
import java.security.PrivilegedAction;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetNotFoundException;
import org.kitesdk.data.DatasetWriter;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.Flushable;
import org.kitesdk.data.Syncable;
import org.kitesdk.data.View;
import org.kitesdk.data.spi.Registration;
import org.kitesdk.data.URIBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
import org.kitesdk.data.Format;
import org.kitesdk.data.Formats;

/**
 * Sink that writes events to a Kite Dataset. This sink will parse the body of
 * each incoming event and store the resulting entity in a Kite Dataset. It
 * determines the destination Dataset by opening a dataset URI
 * {@code kite.dataset.uri} or opening a repository URI, {@code kite.repo.uri},
 * and loading a Dataset by name, {@code kite.dataset.name}, and namespace,
 * {@code kite.dataset.namespace}.
 */
public class DatasetSink extends AbstractSink implements Configurable {

  private static final Logger LOG = LoggerFactory.getLogger(DatasetSink.class);

  private Context context = null;
  private PrivilegedExecutor privilegedExecutor;

  private String datasetName = null;
  private URI datasetUri = null;
  private Schema datasetSchema = null;
  private DatasetWriter<GenericRecord> writer = null;

  /**
   * The number of events to process as a single batch.
   */
  private long batchSize = DEFAULT_BATCH_SIZE;

  /**
   * The number of seconds to wait before rolling a writer.
   */
  private int rollIntervalSeconds = DEFAULT_ROLL_INTERVAL;

  /**
   * Flag that says if Flume should commit on every batch.
   */
  private boolean commitOnBatch = DEFAULT_FLUSHABLE_COMMIT_ON_BATCH;

  /**
   * Flag that says if Flume should sync on every batch.
   */
  private boolean syncOnBatch = DEFAULT_SYNCABLE_SYNC_ON_BATCH;

  /**
   * The last time the writer rolled.
   */
  private long lastRolledMillis = 0L;

  /**
   * The raw number of bytes parsed.
   */
  private long bytesParsed = 0L;

  /**
   * A class for parsing Kite entities from Flume Events.
   */
  private EntityParser<GenericRecord> parser = null;

  /**
   * A class implementing a failure newPolicy for events that had a
 non-recoverable error during processing.
   */
  private FailurePolicy failurePolicy = null;

  private SinkCounter counter = null;

  /**
   * The Kite entity
   */
  private GenericRecord entity = null;
  // TODO: remove this after PARQUET-62 is released
  private boolean reuseEntity = true;

  /**
   * The Flume transaction. Used to keep transactions open across calls to
   * process.
   */
  private Transaction transaction = null;

  /**
   * Internal flag on if there has been a batch of records committed. This is
   * used during rollback to know if the current writer needs to be closed.
   */
  private boolean committedBatch = false;

  // Factories
  private static final EntityParserFactory ENTITY_PARSER_FACTORY =
      new EntityParserFactory();
  private static final FailurePolicyFactory FAILURE_POLICY_FACTORY =
      new FailurePolicyFactory();

  /**
   * Return the list of allowed formats.
   * @return The list of allowed formats.
   */
  protected List<String> allowedFormats() {
    return Lists.newArrayList("avro", "parquet");
  }

  @Override
  public void configure(Context context) {
    this.context = context;

    String principal = context.getString(AUTH_PRINCIPAL);
    String keytab = context.getString(AUTH_KEYTAB);
    String effectiveUser = context.getString(AUTH_PROXY_USER);

    this.privilegedExecutor = FlumeAuthenticationUtil.getAuthenticator(
            principal, keytab).proxyAs(effectiveUser);

    // Get the dataset URI and name from the context
    String datasetURI = context.getString(CONFIG_KITE_DATASET_URI);
    if (datasetURI != null) {
      this.datasetUri = URI.create(datasetURI);
      this.datasetName = uriToName(datasetUri);
    } else {
      String repositoryURI = context.getString(CONFIG_KITE_REPO_URI);
      Preconditions.checkNotNull(repositoryURI, "No dataset configured. Setting "
          + CONFIG_KITE_DATASET_URI + " is required.");

      this.datasetName = context.getString(CONFIG_KITE_DATASET_NAME);
      Preconditions.checkNotNull(datasetName, "No dataset configured. Setting "
          + CONFIG_KITE_DATASET_URI + " is required.");

      String namespace = context.getString(CONFIG_KITE_DATASET_NAMESPACE,
          DEFAULT_NAMESPACE);

      this.datasetUri = new URIBuilder(repositoryURI, namespace, datasetName)
          .build();
    }
    this.setName(datasetUri.toString());

    if (context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
        DEFAULT_SYNCABLE_SYNC_ON_BATCH)) {
      Preconditions.checkArgument(
          context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
              DEFAULT_FLUSHABLE_COMMIT_ON_BATCH), "Configuration error: "
                  + CONFIG_FLUSHABLE_COMMIT_ON_BATCH + " must be set to true when "
                  + CONFIG_SYNCABLE_SYNC_ON_BATCH + " is set to true.");
    }

    // Create the configured failure failurePolicy
    this.failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);

    // other configuration
    this.batchSize = context.getLong(CONFIG_KITE_BATCH_SIZE,
        DEFAULT_BATCH_SIZE);
    this.rollIntervalSeconds = context.getInteger(CONFIG_KITE_ROLL_INTERVAL,
        DEFAULT_ROLL_INTERVAL);

    this.counter = new SinkCounter(datasetName);
  }

  @Override
  public synchronized void start() {
    this.lastRolledMillis = System.currentTimeMillis();
    counter.start();
    // signal that this sink is ready to process
    LOG.info("Started DatasetSink " + getName());
    super.start();
  }

  /**
   * Causes the sink to roll at the next {@link #process()} call.
   */
  @VisibleForTesting
  void roll() {
    this.lastRolledMillis = 0L;
  }

  @VisibleForTesting
  DatasetWriter<GenericRecord> getWriter() {
    return writer;
  }

  @VisibleForTesting
  void setWriter(DatasetWriter<GenericRecord> writer) {
    this.writer = writer;
  }

  @VisibleForTesting
  void setParser(EntityParser<GenericRecord> parser) {
    this.parser = parser;
  }

  @VisibleForTesting
  void setFailurePolicy(FailurePolicy failurePolicy) {
    this.failurePolicy = failurePolicy;
  }

  @Override
  public synchronized void stop() {
    counter.stop();

    try {
      // Close the writer and commit the transaction, but don't create a new
      // writer since we're stopping
      closeWriter();
      commitTransaction();
    } catch (EventDeliveryException ex) {
      rollbackTransaction();

      LOG.warn("Closing the writer failed: " + ex.getLocalizedMessage());
      LOG.debug("Exception follows.", ex);
      // We don't propogate the exception as the transaction would have been
      // rolled back and we can still finish stopping
    }

  // signal that this sink has stopped
    LOG.info("Stopped dataset sink: " + getName());
    super.stop();
  }

  @Override
  public Status process() throws EventDeliveryException {
    long processedEvents = 0;

    try {
      if (shouldRoll()) {
        closeWriter();
        commitTransaction();
        createWriter();
      }

      // The writer shouldn't be null at this point
      Preconditions.checkNotNull(writer,
          "Can't process events with a null writer. This is likely a bug.");
      Channel channel = getChannel();

      // Enter the transaction boundary if we haven't already
      enterTransaction(channel);

      for (; processedEvents < batchSize; processedEvents += 1) {
        Event event = channel.take();

        if (event == null) {
          // no events available in the channel
          break;
        }

        write(event);
      }

      // commit transaction
      if (commitOnBatch) {
        // Flush/sync before commiting. A failure here will result in rolling back
        // the transaction
        if (syncOnBatch && writer instanceof Syncable) {
          ((Syncable) writer).sync();
        } else if (writer instanceof Flushable) {
          ((Flushable) writer).flush();
        }
        boolean committed = commitTransaction();
        Preconditions.checkState(committed,
            "Tried to commit a batch when there was no transaction");
        committedBatch |= committed;
      }
    } catch (Throwable th) {
      // catch-all for any unhandled Throwable so that the transaction is
      // correctly rolled back.
      rollbackTransaction();

      if (commitOnBatch && committedBatch) {
        try {
          closeWriter();
        } catch (EventDeliveryException ex) {
          LOG.warn("Error closing writer there may be temp files that need to"
              + " be manually recovered: " + ex.getLocalizedMessage());
          LOG.debug("Exception follows.", ex);
        }
      } else {
        this.writer = null;
      }

      // handle the exception
      Throwables.propagateIfInstanceOf(th, Error.class);
      Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
      throw new EventDeliveryException(th);
    }

    if (processedEvents == 0) {
      counter.incrementBatchEmptyCount();
      return Status.BACKOFF;
    } else if (processedEvents < batchSize) {
      counter.incrementBatchUnderflowCount();
    } else {
      counter.incrementBatchCompleteCount();
    }

    counter.addToEventDrainSuccessCount(processedEvents);

    return Status.READY;
  }

  /**
   * Parse the event using the entity parser and write the entity to the dataset.
   *
   * @param event The event to write
   * @throws EventDeliveryException An error occurred trying to write to the
                                dataset that couldn't or shouldn't be
                                handled by the failure policy.
   */
  @VisibleForTesting
  void write(Event event) throws EventDeliveryException {
    try {
      this.entity = parser.parse(event, reuseEntity ? entity : null);
      this.bytesParsed += event.getBody().length;

      // writeEncoded would be an optimization in some cases, but HBase
      // will not support it and partitioned Datasets need to get partition
      // info from the entity Object. We may be able to avoid the
      // serialization round-trip otherwise.
      writer.write(entity);
    } catch (NonRecoverableEventException ex) {
      failurePolicy.handle(event, ex);
    } catch (DataFileWriter.AppendWriteException ex) {
      failurePolicy.handle(event, ex);
    } catch (RuntimeException ex) {
      Throwables.propagateIfInstanceOf(ex, EventDeliveryException.class);
      throw new EventDeliveryException(ex);
    }
  }

  /**
   * Create a new writer.
   *
   * This method also re-loads the dataset so updates to the configuration or
   * a dataset created after Flume starts will be loaded.
   *
   * @throws EventDeliveryException There was an error creating the writer.
   */
  @VisibleForTesting
  void createWriter() throws EventDeliveryException {
    // reset the commited flag whenever a new writer is created
    committedBatch = false;
    try {
      View<GenericRecord> view;

      view = privilegedExecutor.execute(
        new PrivilegedAction<Dataset<GenericRecord>>() {
          @Override
          public Dataset<GenericRecord> run() {
            return Datasets.load(datasetUri);
          }
        });

      DatasetDescriptor descriptor = view.getDataset().getDescriptor();
      Format format = descriptor.getFormat();
      Preconditions.checkArgument(allowedFormats().contains(format.getName()),
          "Unsupported format: " + format.getName());

      Schema newSchema = descriptor.getSchema();
      if (datasetSchema == null || !newSchema.equals(datasetSchema)) {
        this.datasetSchema = descriptor.getSchema();
        // dataset schema has changed, create a new parser
        parser = ENTITY_PARSER_FACTORY.newParser(datasetSchema, context);
      }

      this.reuseEntity = !(Formats.PARQUET.equals(format));

      // TODO: Check that the format implements Flushable after CDK-863
      // goes in. For now, just check that the Dataset is Avro format
      this.commitOnBatch = context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
          DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format));

      // TODO: Check that the format implements Syncable after CDK-863
      // goes in. For now, just check that the Dataset is Avro format
      this.syncOnBatch = context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
          DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));

      this.datasetName = view.getDataset().getName();

      this.writer = view.newWriter();

      // Reset the last rolled time and the metrics
      this.lastRolledMillis = System.currentTimeMillis();
      this.bytesParsed = 0L;
    } catch (DatasetNotFoundException ex) {
      throw new EventDeliveryException("Dataset " + datasetUri + " not found."
          + " The dataset must be created before Flume can write to it.", ex);
    } catch (RuntimeException ex) {
      throw new EventDeliveryException("Error trying to open a new"
          + " writer for dataset " + datasetUri, ex);
    }
  }

  /**
   * Return true if the sink should roll the writer.
   *
   * Currently, this is based on time since the last roll or if the current
   * writer is null.
   *
   * @return True if and only if the sink should roll the writer
   */
  private boolean shouldRoll() {
    long currentTimeMillis = System.currentTimeMillis();
    long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
        currentTimeMillis - lastRolledMillis);

    LOG.debug("Current time: {}, lastRolled: {}, diff: {} sec",
        new Object[] {currentTimeMillis, lastRolledMillis, elapsedTimeSeconds});

    return elapsedTimeSeconds >= rollIntervalSeconds || writer == null;
  }

  /**
   * Close the current writer.
   *
   * This method always sets the current writer to null even if close fails.
   * If this method throws an Exception, callers *must* rollback any active
   * transaction to ensure that data is replayed.
   *
   * @throws EventDeliveryException
   */
  @VisibleForTesting
  void closeWriter() throws EventDeliveryException {
    if (writer != null) {
      try {
        writer.close();

        long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
            System.currentTimeMillis() - lastRolledMillis);
        LOG.info("Closed writer for {} after {} seconds and {} bytes parsed",
            new Object[]{datasetUri, elapsedTimeSeconds, bytesParsed});
      } catch (DatasetIOException ex) {
        throw new EventDeliveryException("Check HDFS permissions/health. IO"
            + " error trying to close the  writer for dataset " + datasetUri,
            ex);
      } catch (RuntimeException ex) {
        throw new EventDeliveryException("Error trying to close the  writer for"
            + " dataset " + datasetUri, ex);
      } finally {
        // If we failed to close the writer then we give up on it as we'll
        // end up throwing an EventDeliveryException which will result in
        // a transaction rollback and a replay of any events written during
        // the current transaction. If commitOnBatch is true, you can still
        // end up with orphaned temp files that have data to be recovered.
        this.writer = null;
        failurePolicy.close();
      }
    }
  }

  /**
   * Enter the transaction boundary. This will either begin a new transaction
   * if one didn't already exist. If we're already in a transaction boundary,
   * then this method does nothing.
   *
   * @param channel The Sink's channel
   * @throws EventDeliveryException There was an error starting a new batch
   *                                with the failure policy.
   */
  private void enterTransaction(Channel channel) throws EventDeliveryException {
    // There's no synchronization around the transaction instance because the
    // Sink API states "the Sink#process() call is guaranteed to only
    // be accessed  by a single thread". Technically other methods could be
    // called concurrently, but the implementation of SinkRunner waits
    // for the Thread running process() to end before calling stop()
    if (transaction == null) {
      this.transaction = channel.getTransaction();
      transaction.begin();
      failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);
    }
  }

  /**
   * Commit and close the transaction.
   *
   * If this method throws an Exception the caller *must* ensure that the
   * transaction is rolled back. Callers can roll back the transaction by
   * calling {@link #rollbackTransaction()}.
   *
   * @return True if there was an open transaction and it was committed, false
   *         otherwise.
   * @throws EventDeliveryException There was an error ending the batch with
   *                                the failure policy.
   */
  @VisibleForTesting
  boolean commitTransaction() throws EventDeliveryException {
    if (transaction != null) {
      failurePolicy.sync();
      transaction.commit();
      transaction.close();
      this.transaction = null;
      return true;
    } else {
      return false;
    }
  }

  /**
   * Rollback the transaction. If there is a RuntimeException during rollback,
   * it will be logged but the transaction instance variable will still be
   * nullified.
   */
  private void rollbackTransaction() {
    if (transaction != null) {
      try {
        // If the transaction wasn't committed before we got the exception, we
        // need to rollback.
        transaction.rollback();
      } catch (RuntimeException ex) {
        LOG.error("Transaction rollback failed: " + ex.getLocalizedMessage());
        LOG.debug("Exception follows.", ex);
      } finally {
        transaction.close();
        this.transaction = null;
      }
    }
  }

  /**
   * Get the name of the dataset from the URI
   *
   * @param uri The dataset or view URI
   * @return The dataset name
   */
  private static String uriToName(URI uri) {
    return Registration.lookupDatasetUri(URI.create(
        uri.getRawSchemeSpecificPart())).second().get("dataset");
  }
}