package ruc.irm.classification; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** * 文档的特征 * * @author xiatian * */ public class Feature { /** 每个关键词在不同类别中出现的文档数量 */ private Map<String, Integer> docCountMap = new HashMap<String, Integer>(); /** 特征名称 */ private String name; public String getName() { return name; } public void setName(String name) { this.name = name; } public void incDocCount(String category){ if(docCountMap.containsKey(category)){ docCountMap.put(category, docCountMap.get(category)+1); }else{ docCountMap.put(category, 1); } } public int getDocCount(String category){ if(docCountMap.containsKey(category)){ return docCountMap.get(category); }else{ return 0; } } public void write(DataOutput out) throws IOException{ out.writeUTF(name==null?"":name); out.writeInt(docCountMap.size()); for(String category:docCountMap.keySet()){ out.writeUTF(category); out.writeInt(docCountMap.get(category)); } } public void readFields(DataInput in) throws IOException { this.name = in.readUTF(); docCountMap = new HashMap<String, Integer>(); int size = in.readInt(); for(int i=0; i<size; i++){ String category = in.readUTF(); int docCount = in.readInt(); docCountMap.put(category, docCount); } } public static Feature read(DataInput in) throws IOException{ Feature f = new Feature(); f.readFields(in); return f; } }