package edu.fudan.util; import gnu.trove.iterator.TIntFloatIterator; import gnu.trove.iterator.hash.TObjectHashIterator; import gnu.trove.map.hash.TIntFloatHashMap; import gnu.trove.map.hash.TObjectFloatHashMap; import gnu.trove.set.hash.TCharHashSet; import gnu.trove.set.hash.THashSet; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; /** * 常用集合操作 * @author xpqiu * */ public class MyCollection { /** * 由大到小排序 * @param map * @return 数组下标 */ public static int[] sort(TIntFloatHashMap tmap) { HashMap<Integer, Float> map = new HashMap<Integer, Float>(); TIntFloatIterator it = tmap.iterator(); while (it.hasNext()) { it.advance(); int id = it.key(); float val = it.value(); map.put(id, Math.abs(val)); } it = null; List<Entry> list = sort(map); int[] idx = new int[list.size()]; Iterator<Entry> it1 = list.iterator(); int i=0; while (it1.hasNext()) { Entry entry = it1.next(); idx[i++] = (Integer) entry.getKey(); } return idx; } /** * 由大到小排序 * @param map * @return */ public static List<Map.Entry> sort(Map map) { LinkedList<Map.Entry> list = new LinkedList<Map.Entry>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry>() { @Override public int compare(Entry o1,Entry o2) { // make sure the values implement Comparable return -((Comparable) o1.getValue()).compareTo(o2.getValue()); } }); return list; } public static void TSet2List(THashSet<String> newset, ArrayList<String> al) { TObjectHashIterator<String> it = newset.iterator(); while(it.hasNext()){ String s = it.next(); al.add(s); } } /** * 输出List<Entry>到文件 * @param entryList * @param file * @param b 是否输出值域 */ public static void write(List<Entry> entryList, String file, boolean b) { try { BufferedWriter bout = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF-8")); Iterator<Entry> it = entryList.iterator(); while (it.hasNext()) { Entry entry = it.next(); bout.write(entry.getKey().toString()); if (b) { bout.write("\t"); bout.write(entry.getValue().toString()); } bout.write("\n"); } bout.close(); } catch (Exception e) { } } /** * 将Map写到文件 * @param map * @throws IOException */ public static void write(Map map,String file) throws IOException { BufferedWriter bout = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF-8")); Iterator iterator = map.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry entry = (Map.Entry)iterator.next(); String key = entry.getKey().toString(); String v = entry.getValue().toString(); bout.append(key); bout.append("\t"); bout.append(v); bout.newLine(); } bout.close(); } /** * 每行为一个字符集合 * @param path * @return * @throws IOException */ public static ArrayList<TCharHashSet> loadTCharHashSetArray(String path) throws IOException{ BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { System.out.print("没找到文件:"+path); return null; } ArrayList<TCharHashSet> setArray= new ArrayList<TCharHashSet>(); String line = null; int count=0; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; TCharHashSet set = new TCharHashSet(); for(int i=0;i<line.length();i++){ char c = line.charAt(i); if(c!='\t') set.add(c); } setArray.add(set); } bfr.close(); return setArray; } /** * 每行为一个或多个元素 * @param path * @param b true,每行为一个元素;false: 每行为多个元素 * @return * @throws IOException */ public static THashSet<String> loadTSet(String path,boolean b) throws IOException{ THashSet<String> dict = new THashSet<String>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { System.out.print("没找到文件:"+path); return dict; } String line = null; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; if(b) dict.add(line); else{ String[] toks = line.split("\\s+"); for(String tok:toks) dict.add(tok); } } bfr.close(); return dict; } /** * 每行为一个元素 * @param path * @return * @throws IOException */ public static THashSet<String> loadTSet(String path) throws IOException{ return loadTSet(path,true); } /** * 去除重复的集合 * @param path * @return * @throws IOException */ public static void cleanSet(String path) throws IOException{ THashSet<String> set = loadTSet(path,true); write(set, path); } /** * 每行为一个元素 * @param path * @return * @throws IOException */ public static HashSet<String> loadSet(String path) throws IOException{ return loadSet(path, true); } /** * 每行为一个或多个元素 * @param path * @param b true,每行为一个元素;false: 每行为多个元素 * @return * @throws IOException */ public static HashSet<String> loadSet(String path,boolean b) throws IOException{ HashSet<String> dict = new HashSet<String>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { System.out.print("没找到文件:"+path); return dict; } String line = null; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; if(b) dict.add(line); else{ String[] toks = line.split("\\s+"); for(String tok:toks) dict.add(tok); } } return dict; } /** * 每行为一个或多个元素 * @param path * @param b true,每行为一个元素;false: 每行为多个元素 * @return * @throws IOException */ public static Set<String> loadSet(Set<String> dict,String path,boolean b) throws IOException{ BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { System.out.print("没找到文件:"+path); return dict; } String line = null; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; if(b) dict.add(line); else{ String[] toks = line.split("\\s+"); for(String tok:toks) dict.add(tok); } } return dict; } public static TObjectFloatHashMap<String> loadTStringFloatMap(String path) throws IOException { TObjectFloatHashMap<String> dict = new TObjectFloatHashMap<String>(); BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path), "utf8")); String line = null; while ((line = bfr.readLine()) != null) { if (line.length() == 0) continue; int idx = line.lastIndexOf("\t"); dict.put(line.substring(0, idx), Float.parseFloat(line.substring(idx + 1))); } bfr.close(); return dict; } /** * 将文件读入到HashMap * @param path * @return * @throws IOException */ public static HashMap<String,String> loadStringStringMap(String path) throws IOException{ return loadStringStringMap(path,false); } /** * 将文件读入到HashMap * @param path * @param isRevert 是否颠倒顺序 * @return * @throws IOException */ public static HashMap<String,String> loadStringStringMap(String path,boolean isRevert) throws IOException{ HashMap<String,String> dict = new HashMap<String,String>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { return dict; } String line = null; int count=0; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; int idx = line.lastIndexOf("\t"); if(isRevert) dict.put(line.substring(idx+1),line.substring(0,idx)); else dict.put(line.substring(0,idx), line.substring(idx+1)); } bfr.close(); return dict; } /** * 将文件读入到HashMap * @param path * @return * @throws IOException */ public static HashMap<String,Float> loadStringFloatMap(String path) throws IOException{ HashMap<String,Float> dict = new HashMap<String,Float>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8")); } catch (FileNotFoundException e) { return dict; } String line = null; int count=0; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; int idx = line.lastIndexOf("\t"); String key = line.substring(0,idx); String v = line.substring(idx+1); dict.put(key, Float.parseFloat(v)); } return dict; } /** * 从多文件中读入Map * @param sfiles * @return * @throws NumberFormatException * @throws IOException */ public static HashMap<String,Float> loadStringFloatMapInMultiFiles(String sfiles) throws NumberFormatException, IOException { HashMap<String, Float> map = new HashMap<String, Float>(); String[] files = sfiles.split(";"); for(String f:files){ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f),"utf8")); String line; while ((line = br.readLine()) != null) { if(line.length()==0) continue; int idx = line.lastIndexOf("\t"); if(idx==-1) continue; String key = line.substring(0,idx); float v = Float.parseFloat(line.substring(idx+1)); if (map.containsKey(key)) { float tempV = map.get(key); map.put(key, v + tempV); } else map.put(key, v); } } return map; } public static void write(Iterable set, String file) { try { BufferedWriter bout = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF-8")); Iterator it = set.iterator(); while (it.hasNext()) { String entry = it.next().toString(); bout.write(entry); bout.write("\n"); } bout.close(); } catch (Exception e) { } } public static HashMap<String, HashSet<String>> loadMultiValueSetMap(String path) throws IOException { return loadMultiValueSetMap(new FileInputStream(path)); } public static HashMap<String, HashSet<String>> loadMultiValueSetMap(InputStream is) throws IOException { HashMap<String, HashSet<String>> dict = new HashMap<String, HashSet<String>>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(is,"utf8")); } catch (Exception e) { return dict; } String line = null; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; String[] toks = line.split("\\s"); HashSet<String> v = dict.get(toks[0]); if(v==null){ v = new HashSet<String>(); } for(int i=1;i<toks.length;i++){ v.add(toks[i]); } dict.put(toks[0], v); } return dict; } public static HashMap<String, String[]> loadMultiValueMap(String path) throws IOException { return loadMultiValueMap(new FileInputStream(path)); } public static HashMap<String, String[]> loadMultiValueMap(InputStream is) throws IOException { HashMap<String, String[]> dict = new HashMap<String, String[]>(); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(is,"utf8")); } catch (Exception e) { return dict; } String line = null; int count=0; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; String[] toks = line.split("\\s"); String[] v = Arrays.copyOfRange(toks, 1, toks.length); dict.put(toks[0], v); } return dict; } /** * 写多值Map,Map结构为HashMap<String, HashSet<String>> * @param map HashMap<String, HashSet<String>> * @param file * @see MyCollection#write(HashMap, String, boolean) */ public static void writeMultiValueMap(Map map, String file) { writeMultiValueMap(map, file,true,"\t"); } /** * 写多值Map,Map结构为HashMap<String, Collection<String>> * @param map HashMap<String, Collection<String>> * @param file */ public static void writeMultiValueMap(Map<String, Collection<String>> map, String file,boolean hasKey,String delim) { try { BufferedWriter bout = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF-8")); Iterator<Entry<String, Collection<String>>> it1 = map.entrySet().iterator(); while(it1.hasNext()){ Entry<String, Collection<String>> entry = it1.next(); if(hasKey){ bout.write(entry.getKey()); bout.write("\t"); } Collection<String> val = entry.getValue(); if(val==null){ if(it1.hasNext()) bout.write("\n"); continue; } Iterator<String> it = val.iterator(); while (it.hasNext()) { String en = it.next(); bout.write(en); if(it.hasNext()) bout.write(delim); } if(it1.hasNext()) bout.write("\n"); } bout.close(); } catch (Exception e) { System.err.println(e.toString()); e.printStackTrace(); } } /** * 写多值Map,Map结构为HashMap<String, HashSet<String>> * @param map HashMap<String, HashSet<String>> * @param file * @return * @throws IOException */ public static HashSet<HashSet<String>> loadSetSet(String file) throws IOException { HashSet<HashSet<String>> dict = new HashSet<HashSet<String>> (); BufferedReader bfr; try { bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8")); } catch (Exception e) { return dict; } String line = null; int count=0; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; HashSet<String> set = new HashSet<String>(); String[] toks = line.split("\\s"); for(String t:toks){ set.add(t); } dict.add(set); } return dict; } public static int isContain(THashSet<String> set, ArrayList<String> subwords) { int i = 0; for(String s: subwords){ if(set.contains(s)) i++; } return i; } public static int getLength(THashSet<String> set) { int i = 0; TObjectHashIterator<String> it = set.iterator(); while(it.hasNext()){ String s = it.next(); if(s.length()>i) i=s.length(); } return i; } /** * 从文件读入字符串数组 * @param file * @param delim 分隔符 * @return * @throws IOException */ public static ArrayList<String> loadList(String file,String delim) throws IOException { ArrayList<String> list= new ArrayList<String>(); BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8")); String line = null; while ((line = bfr.readLine()) != null) { if(line.length()==0) continue; if(delim!=null){ String[] toks = line.split(delim); for(String t:toks){ list.add(t); } }else{ list.add(line); } } bfr.close(); return list; } public static List<String> asList(String[] strs) { ArrayList<String> list= new ArrayList<String>(); for(int i=0;i<strs.length;i++) list.add(strs[i]); return list; } public static void writeMultiValueMap1(MultiValueMap<String, String> c2e, String c2ePath) { } }