package tv.dyndns.kishibe.qmaclone.server.relevance;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipFile;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import tv.dyndns.kishibe.qmaclone.client.constant.Constant;
import tv.dyndns.kishibe.qmaclone.server.util.Downloader;
import tv.dyndns.kishibe.qmaclone.server.util.DownloaderException;
public class NicoVideoDicImeDictionary implements Dictionary {
private static final Logger logger = Logger.getLogger(NicoVideoDicImeDictionary.class.getName());
private static final String NICO_VIDEO_DIC_IME_URL = "http://tkido.com/data/nicoime.zip";
private static final File NICO_VIDEO_DIC_IME_FILE = new File(
Constant.FILE_PATH_BASE + "qmaclone/nicoime.zip");
private final Downloader downloader;
@Inject
public NicoVideoDicImeDictionary(Downloader downloader) {
this.downloader = Preconditions.checkNotNull(downloader);
}
@Override
public List<String> getWords() {
try {
ensureFile();
return readFile();
} catch (IOException e) {
logger.log(Level.WARNING, "ニコニコ大百科IME辞書の取得に失敗しました", e);
return Lists.newArrayList();
}
}
private void ensureFile() throws IOException {
if (NICO_VIDEO_DIC_IME_FILE.isFile() && System
.currentTimeMillis() < NICO_VIDEO_DIC_IME_FILE.lastModified() + 7L * 24 * 60 * 60 * 1000) {
return;
}
try {
downloader.downloadToFile(new URL(NICO_VIDEO_DIC_IME_URL), NICO_VIDEO_DIC_IME_FILE);
} catch (DownloaderException e) {
logger.log(Level.SEVERE, "ニコニコ大百科IME辞書のダウンロードに失敗しました");
throw Throwables.propagate(e);
}
}
private List<String> readFile() throws IOException {
List<String> words = Lists.newArrayList();
try (ZipFile zipFile = new ZipFile(NICO_VIDEO_DIC_IME_FILE);
Scanner scanner = new Scanner(zipFile.getInputStream(zipFile.getEntry("nicoime_msime.txt")),
"utf-8")) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
String[] split = line.split("\t");
if (split.length < 2) {
continue;
}
String word = split[1];
if (line.contains("(")) {
line = line.substring(0, line.indexOf("("));
}
line = line.replaceAll(" ", "").trim();
if (line.isEmpty()) {
continue;
}
words.add(word);
if (words.size() % 10000 == 0) {
logger.log(Level.INFO, "NicoVideoIme: " + words.size());
}
}
}
return words;
}
}