package com.alimama.mdrill.index; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.apache.lucene.index.TermInfosWriter; import com.alimama.mdrill.hdfsDirectory.FileSystemDirectory; import com.alimama.mdrill.index.utils.DocumentConverter; import com.alimama.mdrill.index.utils.HeartBeater; import com.alimama.mdrill.index.utils.JobIndexPublic; import com.alimama.mdrill.index.utils.ShardWriter; import com.alimama.mdrill.utils.ZipUtils; public class IndexReducerMerge extends Reducer<IntWritable, Text, IntWritable, Text> { private HeartBeater heartBeater = null; private ShardWriter shardWriter = null; private String indexHdfsPath = null; private String tmpath = null; private String tmpathzip = null; public DocumentConverter documentConverter; boolean isNotFdtMode=true; boolean isNotZip=false; protected void setup(Context context) throws java.io.IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); isNotFdtMode=conf.get("mdrill.table.mode","").indexOf("@hdfs@")<0; isNotZip=conf.get("mdrill.table.mode","").indexOf("@nonzipout@")>=0; if(!isNotFdtMode) { TermInfosWriter.setSkipInterVal(16); } TermInfosWriter.setNotUseQuick(false); String fieldStrs = conf.get("higo.index.fields"); String[] fieldslist = fieldStrs.split(","); this.documentConverter = new DocumentConverter(fieldslist,"solrconfig.xml", "schema.xml"); heartBeater = new HeartBeater(context); heartBeater.needHeartBeat(); shardWriter = this.initShardWriter(context); shardWriter.getDir().setAllowLinks(true); } protected void cleanup(Context context) throws IOException, InterruptedException { try { shardWriter.addEmptyDoc(); shardWriter.optimize(); shardWriter.close(); Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); if(isNotFdtMode&&!isNotZip) { ZipUtils.zip(fs, tmpath, fs, tmpathzip); if (!fs.exists(new Path(indexHdfsPath))) { fs.rename(new Path(tmpathzip), new Path(indexHdfsPath)); } }else{ if (!fs.exists(new Path(indexHdfsPath))) { fs.rename(new Path(tmpath), new Path(indexHdfsPath)); } } } catch (Throwable e) { throw new IOException(e); } heartBeater.cancelHeartBeat(); heartBeater.interrupt(); } private ShardWriter initShardWriter(Context context) throws IOException { String part_xxxxx = JobIndexPublic.getOutFileName(context, "part"); Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputdir = conf.get("mapred.output.dir"); String uuid=java.util.UUID.randomUUID().toString(); indexHdfsPath = new Path(outputdir, part_xxxxx).toString(); tmpath = new Path(outputdir + "/_tmpindex", part_xxxxx + "_"+uuid).toString(); tmpathzip = new Path(outputdir + "/_tmpindex", part_xxxxx + "_zip_"+ uuid).toString(); ShardWriter shardWriter = new ShardWriter(fs, tmpath, conf); return shardWriter; } protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); Iterator<Text> iterator = values.iterator(); while (iterator.hasNext()) { Text path = iterator.next(); shardWriter.process(new FileSystemDirectory(fs, new Path(path .toString()), false, conf)); context.write(key, path); } } }