package com.alimama.mdrill.adhoc;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class InHdfs_udf extends UDF {
private static HashMap<String, HashSet<String>> match=new HashMap<String, HashSet<String>>();
public Text evaluate(final Text d, String file) {
if (d == null) {
return new Text("-");
}
HashSet<String> set=match.get(file);
if(set==null)
{
try {
set = new HashSet<String>();
Configuration conf = new Configuration();
Path p = new Path(file);
FileSystem fs = p.getFileSystem(conf);
if(fs.exists(p))
{
FSDataInputStream in = fs.open(p);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String s1 = null;
while ((s1 = br.readLine()) != null) {
String line = s1.trim();
if (!line.isEmpty()) {
set.add(line);
}
}
br.close();
in.close();
}
match.put(file, set);
} catch (IOException e) {
}
}
if(set.contains(d.toString()))
{
return new Text("ok");
}
return new Text("-");
}
}