/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.indexer; import java.io.*; import java.util.Random; import org.apache.lucene.store.*; import org.apache.nutch.util.HadoopFSUtil; import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.Configuration; /** Reads a Lucene index stored in DFS. */ public class FsDirectory extends Directory { private FileSystem fs; private Path directory; private int ioFileBufferSize; public FsDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } if (!fs.getFileStatus(directory).isDir()) throw new IOException(directory + " not a directory"); } private void create() throws IOException { if (!fs.exists(directory)) { fs.mkdirs(directory); } if (!fs.getFileStatus(directory).isDir()) throw new IOException(directory + " not a directory"); // clear old files FileStatus[] fstats = fs.listStatus(directory, HadoopFSUtil.getPassAllFilter()); Path[] files = HadoopFSUtil.getPaths(fstats); for (int i = 0; i < files.length; i++) { if (!fs.delete(files[i], false)) throw new IOException("Cannot delete " + files[i]); } } public String[] list() throws IOException { FileStatus[] fstats = fs.listStatus(directory, HadoopFSUtil.getPassAllFilter()); Path[] files = HadoopFSUtil.getPaths(fstats); if (files == null) return null; String[] result = new String[files.length]; for (int i = 0; i < files.length; i++) { result[i] = files[i].getName(); } return result; } public boolean fileExists(String name) throws IOException { return fs.exists(new Path(directory, name)); } public long fileModified(String name) { throw new UnsupportedOperationException(); } public void touchFile(String name) { throw new UnsupportedOperationException(); } public long fileLength(String name) throws IOException { return fs.getFileStatus(new Path(directory, name)).getLen(); } public void deleteFile(String name) throws IOException { if (!fs.delete(new Path(directory, name), false)) throw new IOException("Cannot delete " + name); } public void renameFile(String from, String to) throws IOException { // DFS is currently broken when target already exists, // so we explicitly delete the target first. Path target = new Path(directory, to); if (fs.exists(target)) { fs.delete(target, false); } fs.rename(new Path(directory, from), target); } public IndexOutput createOutput(String name) throws IOException { Path file = new Path(directory, name); if (fs.exists(file) && !fs.delete(file, false)) // delete existing, if any throw new IOException("Cannot overwrite: " + file); return new DfsIndexOutput(file, this.ioFileBufferSize); } public IndexInput openInput(String name) throws IOException { return new DfsIndexInput(new Path(directory, name), this.ioFileBufferSize); } public Lock makeLock(final String name) { return new Lock() { public boolean obtain() { return true; } public void release() { } public boolean isLocked() { throw new UnsupportedOperationException(); } public String toString() { return "Lock@" + new Path(directory, name); } }; } public synchronized void close() throws IOException { fs.close(); } public String toString() { return this.getClass().getName() + "@" + directory; } private class DfsIndexInput extends BufferedIndexInput { /** Shared by clones. */ private class Descriptor { public FSDataInputStream in; public long position; // cache of in.getPos() public Descriptor(Path file, int ioFileBufferSize) throws IOException { this.in = fs.open(file); } } private final Descriptor descriptor; private final long length; private boolean isClone; public DfsIndexInput(Path path, int ioFileBufferSize) throws IOException { descriptor = new Descriptor(path,ioFileBufferSize); length = fs.getFileStatus(path).getLen(); } protected void readInternal(byte[] b, int offset, int len) throws IOException { synchronized (descriptor) { long position = getFilePointer(); if (position != descriptor.position) { descriptor.in.seek(position); descriptor.position = position; } int total = 0; do { int i = descriptor.in.read(b, offset+total, len-total); if (i == -1) throw new IOException("read past EOF"); descriptor.position += i; total += i; } while (total < len); } } public void close() throws IOException { if (!isClone) { descriptor.in.close(); } } protected void seekInternal(long position) {} // handled in readInternal() public long length() { return length; } protected void finalize() throws IOException { close(); // close the file } public Object clone() { DfsIndexInput clone = (DfsIndexInput)super.clone(); clone.isClone = true; return clone; } } private class DfsIndexOutput extends BufferedIndexOutput { private FSDataOutputStream out; private RandomAccessFile local; private File localFile; public DfsIndexOutput(Path path, int ioFileBufferSize) throws IOException { // create a temporary local file and set it to delete on exit String randStr = Integer.toString(new Random().nextInt(Integer.MAX_VALUE)); localFile = File.createTempFile("index_" + randStr, ".tmp"); localFile.deleteOnExit(); local = new RandomAccessFile(localFile, "rw"); out = fs.create(path); } public void flushBuffer(byte[] b, int offset, int size) throws IOException { local.write(b, offset, size); } public void close() throws IOException { super.close(); // transfer to dfs from local byte[] buffer = new byte[4096]; local.seek(0); int read = -1; while ((read = local.read(buffer)) != -1) { out.write(buffer, 0, read); } out.close(); local.close(); } public void seek(long pos) throws IOException { super.seek(pos); local.seek(pos); } public long length() throws IOException { return local.length(); } } }