package com.alimama.mdrill.index.utils; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.HashMap; import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; public class DocumentList { private ArrayList<HashMap<String, String>> list ; private int count=0; public DocumentList() { list=new ArrayList<HashMap<String,String>>(); } public int add(DocumentMap res,String[] fields) { int cnt=res.setMap(this.list, fields); this.count+=cnt; return cnt; } public boolean isoversize() { return this.count>100; } public RamWriter toRamWriter(DocumentConverter documentConverter,Analyzer analyzer, Context context) throws IOException { ArrayList<Document> list=this.transDodument(documentConverter,context); RamWriter listform = new RamWriter(); listform.process(list, analyzer); listform.closeWriter(); return listform; } private static int debuglines=0; private ArrayList<Document> transDodument( DocumentConverter documentConverter, Context context) throws IOException { ArrayList<Document> doclist = new ArrayList<Document>(list.size()); for (HashMap<String, String> res : list) { try{ Document doc = documentConverter.convert(res); if(doc.getFields().size()<=0) { context.getCounter("higo", "skipdocument2").increment(1); }else{ doclist.add(doc); } }catch(org.apache.solr.common.SolrException e){ context.getCounter("higo", "skipdocument").increment(1); if(debuglines<100) { debuglines++; System.out.println("skipdocument: " + res.toString()+","+stringify_error(e)); } } } return doclist; } public static String stringify_error(Throwable error) { StringWriter result = new StringWriter(); PrintWriter printer = new PrintWriter(result); error.printStackTrace(printer); return result.toString(); } }