package com.tistory.devyongsik.utils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class NounDictionaryDuplWordRemover {
public static void main(String[] args) throws IOException {
File nounFile = new File("/Users/need4spd/Programming/Java/workspace/walkingword/src/com/tistory/devyongsik/analyzer/dictionary/noun.txt");
File customNounFile = new File("/Users/need4spd/Programming/Java/workspace/walkingword/src/com/tistory/devyongsik/analyzer/dictionary/custom.txt");
InputStream nounIs = new FileInputStream(nounFile);
InputStreamReader nounIsr = new InputStreamReader(nounIs);
BufferedReader nounBr = new BufferedReader(nounIsr);
Map<String, String> nounsMap = new HashMap<String, String>();
String nounTemp = "";
while((nounTemp = nounBr.readLine()) != null) {
nounsMap.put(nounTemp, "");
}
InputStream customIs = new FileInputStream(customNounFile);
InputStreamReader customIsr = new InputStreamReader(customIs);
BufferedReader customBr = new BufferedReader(customIsr);
Map<String, String> customMap = new HashMap<String, String>();
String customTemp = "";
while((customTemp = customBr.readLine()) != null) {
customMap.put(customTemp, "");
}
int dupCount = 0;
Set<String> customNounsKeySet = customMap.keySet();
for(String customNoun : customNounsKeySet) {
if (nounsMap.containsKey(customNoun)) {
nounsMap.remove(customNoun);
dupCount++;
}
}
System.out.println("dup count : " + dupCount);
customBr.close();
customIsr.close();
customIs.close();
nounBr.close();
nounIsr.close();
nounIs.close();
OutputStream nounOs = new FileOutputStream(nounFile, false);
OutputStreamWriter osw = new OutputStreamWriter(nounOs);
BufferedWriter bw = new BufferedWriter(osw);
List<String> cleanedNouns = new ArrayList<String>();
for(String n : nounsMap.keySet()) {
cleanedNouns.add(n);
}
Collections.sort(cleanedNouns);
for(String n : cleanedNouns) {
bw.write(n);
bw.write("\n");
}
bw.flush();
bw.close();
osw.close();
nounOs.close();
}
}