package org.apache.hadoop.mapred; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.GeneralConstant; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.HashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; import org.apache.commons.logging.Log; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.mapred.GenWriterThread.DirectoryChecksum; @SuppressWarnings("deprecation") public class GenReaderThread extends GenThread implements GeneralConstant { private static final Log LOG = LogFactory.getLog(GenReaderThread.class); public static final String TEST_TYPE = "read"; // for reduce private long total_open_files = 0; private ArrayList<String> corrupt_dir = null; // Thread specific variables private long checksum; private boolean verifyChecksum = false; public long open_files = 0; public boolean isCorrupt = false; public DirectoryChecksum dc = new DirectoryChecksum(); public RunTimeConstants rtc = null; public GenReaderThread() { } public GenReaderThread(Configuration conf, Path p, long checksum, boolean verifyChecksum, RunTimeConstants rtc) throws IOException{ super(conf, p, null, rtc); this.rtc = rtc; this.checksum = checksum; this.verifyChecksum = verifyChecksum; } /** * Create a number of threads to generate read traffics * @param conf * @param key directory of files to read * @param value checksum file locaiton * @return * @throws IOException */ @Override public GenThread[] prepare(JobConf conf, Text key, Text value) throws IOException { this.rtc = new RunTimeConstants(); super.prepare(conf, key, value, rtc); Path basePath = new Path(key.toString()); LOG.info("base path is " + basePath); Path checksumPath = null; FileSystem fs = FileSystem.newInstance(conf); if (value.toString().length() != 0) { checksumPath = new Path(value.toString()); } HashMap<String, Long> checksumMap = null; boolean verifyChecksum = false; if (fs.exists(checksumPath)) { LOG.info("checksum path is " + checksumPath); verifyChecksum = true; checksumMap = new HashMap<String, Long>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, checksumPath, conf); Writable dir = (Writable) ReflectionUtils.newInstance( reader.getKeyClass(), conf); Writable checksum = (Writable) ReflectionUtils.newInstance( reader.getValueClass(), conf); while(reader.next(dir, checksum)) { LOG.info("dir: " + dir.toString() + " checksum: " + checksum); checksumMap.put( fs.makeQualified(new Path(dir.toString())).toUri().getPath(), Long.parseLong(checksum.toString())); } } catch(Exception e) { LOG.error(e); throw new IOException(e); } finally { IOUtils.closeStream(reader); } } FileStatus[] baseDirs = fs.listStatus(basePath); if (rtc.nthreads != baseDirs.length) { throw new IOException("Number of directory under " + basePath + "(" + baseDirs.length + ") doesn't match number of threads " + "(" + rtc.nthreads + ")."); } GenReaderThread[] threads = new GenReaderThread[(int)rtc.nthreads]; for (int i=0; i < rtc.nthreads; i++) { long checksum = 0; if (verifyChecksum) { String basePathStr = baseDirs[i].getPath().toUri().getPath(); checksum = checksumMap.get(basePathStr); } threads[i] = new GenReaderThread(conf, baseDirs[i].getPath(), checksum, verifyChecksum, rtc); } return threads; } @Override public Map<String, String> collectStats(JobConf conf, GenThread[] threads, long execTime) throws IOException { long total_opened_files = 0; String corruptFiles = ""; for (Thread rawThread: threads) { GenReaderThread thread = (GenReaderThread)rawThread; total_opened_files += thread.open_files; if (thread.isCorrupt) { corruptFiles += thread.inputPath.toString() + " "; } } LOG.info("Number of open files = " + total_opened_files); Map<String, String> stat = super.collectStats(conf, threads, execTime); stat.put("openfiles", String.valueOf(total_opened_files)); stat.put("corruptdirs", corruptFiles); return stat; } @Override public void run() { try { FileStatus[] files = fs.listStatus(inputPath); for (FileStatus file : files) { if (verifyChecksum) { dc.openFile(); } FSDataInputStream in = null; try { in = fs.open(file.getPath()); if (in.isUnderConstruction()) { open_files++; LOG.info("file " + file.getPath() + " is still open"); continue; } int size = 0; while (true) { size = in.read(buffer, 0, rtc.buffer_size); if (size <= 0) { break; } processed_size += size; if (verifyChecksum) { dc.getFileChecksum().update(buffer, 0, size); } } } catch (Exception e) { LOG.error("Error in reading file " + file.getPath(), e); this.errors.add(e); } finally { IOUtils.closeStream(in); } files_processed++; if (verifyChecksum) { dc.closeFile(); } } LOG.info("Directory " + inputPath + " is scanned with checksum " + dc.getDirectoryChecksum()); this.isCorrupt = open_files > 0; if (verifyChecksum) this.isCorrupt = this.isCorrupt || dc.getDirectoryChecksum() != checksum; } catch (Exception ioe) { LOG.error("Error:", ioe); this.errors.add(ioe); } } @Override public void reset() { total_open_files = 0; corrupt_dir = new ArrayList<String>(); } @Override public void analyze(Map<String, String> stat) throws IOException { total_open_files += Long.parseLong(stat.get("openfiles")); String[] files = stat.get("corruptdirs").split(" "); for (String file : files) { if (file != null && file.length() > 0) corrupt_dir.add(file); } } @Override public void output(FSDataOutputStream out) throws IOException { out.writeChars("Number of open files:\t\t\t" + total_open_files + "\n"); out.writeChars("Number of corrupt dirs:\t\t\t" + corrupt_dir.size() + "\n"); if (corrupt_dir.size() > 0) { out.writeChars("-----------------------------\n"); out.writeChars("Corrupt Dirs:\n"); out.writeChars("-----------------------------\n"); for (String file : corrupt_dir) { out.writeChars(file + "\n"); } } } }