/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.hdfs; import org.apache.streams.core.DatumStatus; import org.apache.streams.core.StreamsDatum; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FileStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.util.concurrent.TimeUnit; import java.util.zip.GZIPInputStream; /** * WebHdfsPersistReaderTask reads from hdfs on behalf of * @see org.apache.streams.hdfs.WebHdfsPersistReader */ public class WebHdfsPersistReaderTask implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(WebHdfsPersistReaderTask.class); private WebHdfsPersistReader reader; public WebHdfsPersistReaderTask(WebHdfsPersistReader reader) { this.reader = reader; } @Override public void run() { LOGGER.info("WebHdfsPersistReaderTask: files to process"); for ( FileStatus fileStatus : reader.status ) { LOGGER.info(" " + fileStatus.getPath().getName()); } for ( FileStatus fileStatus : reader.status ) { InputStream inputStream; InputStreamReader inputStreamReader; BufferedReader bufferedReader; if ( fileStatus.isFile() && !fileStatus.getPath().getName().startsWith("_")) { HdfsWriterConfiguration.Compression compression = HdfsWriterConfiguration.Compression.NONE; if ( fileStatus.getPath().getName().endsWith(".gz")) { compression = HdfsWriterConfiguration.Compression.GZIP; } LOGGER.info("Started Processing: {} Encoding: {} Compression: {}", fileStatus.getPath().getName(), reader.hdfsConfiguration.getEncoding(), compression.toString()); try { inputStream = reader.client.open(fileStatus.getPath()); if ( compression.equals(HdfsWriterConfiguration.Compression.GZIP)) { inputStream = new GZIPInputStream(inputStream); } inputStreamReader = new InputStreamReader(inputStream, reader.hdfsConfiguration.getEncoding()); bufferedReader = new BufferedReader(inputStreamReader); } catch (Exception ex) { LOGGER.error("Exception Opening " + fileStatus.getPath(), ex.getMessage()); return; } String line = ""; do { try { line = bufferedReader.readLine(); if (StringUtils.isNotEmpty(line)) { reader.countersCurrent.incrementAttempt(); StreamsDatum entry = reader.lineReaderUtil.processLine(line); if ( entry != null ) { reader.write(entry); reader.countersCurrent.incrementStatus(DatumStatus.SUCCESS); } else { LOGGER.warn("processLine failed"); reader.countersCurrent.incrementStatus(DatumStatus.FAIL); } } } catch (Exception ex) { LOGGER.warn("WebHdfsPersistReader readLine Exception: {}", ex); reader.countersCurrent.incrementStatus(DatumStatus.FAIL); } } while (StringUtils.isNotEmpty(line)); LOGGER.info("Finished Processing " + fileStatus.getPath().getName()); try { bufferedReader.close(); } catch (Exception ex) { LOGGER.error("WebHdfsPersistReader Exception: {}", ex); } } } LOGGER.info("WebHdfsPersistReaderTask Finished"); Uninterruptibles.sleepUninterruptibly(15, TimeUnit.SECONDS); } }