JobIndexParse.java example

Explorer
mdrill-master
- trunk
package com.alimama.mdrill.index;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class JobIndexParse {
	public JobIndexParse(FileSystem fs) {
		this.fs = fs;
	}

	private FileSystem fs;
	
	public void writeStr(Path file, String contents) throws IOException {
		if (fs.exists(file)) {
			fs.delete(file, true);
		}

		FSDataOutputStream write = fs.create(file);
		write.write(contents.getBytes());
		write.close();
	}
	
	
	public Set<String> readPartion(Path dir) throws IOException {
		HashSet<String> rtn = new HashSet<String>();
		if (fs.exists(dir)) {
			FileStatus[] list = fs.listStatus(dir);
			for (FileStatus d : list) {
				String dirname = d.getPath().getName();
				if (!d.isDir() || dirname.startsWith("_")
						|| dirname.startsWith(".") || dirname.equals("index")) {
					continue;
				}
				Path p = d.getPath();
				rtn.add(p.getName());
			}
		}
		return rtn;
	}

	public String readFirstLineStr(Path file) {
		StringBuffer buff = new StringBuffer();
		try {
			if (fs.exists(file)) {
				FSDataInputStream r = fs.open(file);
				BufferedReader in = new BufferedReader(new InputStreamReader(r,
						"UTF-8"));
				buff.append(in.readLine());
				in.close();
				r.close();
			}
		} catch (IOException e) {
		}
		return buff.toString();
	}

	public Path distribute(String output) {
		return new Path(output + "_DistributedCache");
	}

	public Path smallIndex(String output) {
		return new Path(output + "_smallIndex");
	}

}