package marubinotto.piggydb.spike;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.StringReader;
import java.net.URI;
import java.text.BreakIterator;
import java.util.StringTokenizer;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.SystemUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternCompiler;
import org.apache.oro.text.regex.PatternMatcher;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.junit.Test;
public class HodgepodgeSpike {
private PatternCompiler compiler = new Perl5Compiler();
private PatternMatcher matcher = new Perl5Matcher();
@Test
public void filePathAndUrl() throws Exception {
String userHome = System.getProperty("user.home");
System.out.println("userHome: " + userHome);
File userHomePath = new File(userHome);
System.out.println("userHomePath: " + userHomePath);
URI userHomeURI = userHomePath.toURI();
System.out.println("userHomeURI: " + userHomeURI);
File userHomePath2 = FileUtils.toFile(userHomeURI.toURL());
System.out.println("userHomePath2: " + userHomePath2);
}
@Test
public void fileToUrlWithEscaping() throws Exception {
File file = new File("/my docs/file.txt");
System.out.println("toURI: " + file.toURI());
// System.out.println("toURL: " + file.toURL());
}
@Test
public void youtubeUrl() throws Exception {
Pattern pattern = compiler.compile("youtube\\.com/watch\\?v=([^&\\s]+)");
boolean result = this.matcher.contains("http://jp.youtube.com/watch?v=iEL2grmKSnM&feature=related", pattern);
assertTrue(result);
MatchResult matchResult = matcher.getMatch();
System.out.println(matchResult.group(1));
}
@Test
public void tableRow() throws Exception {
Pattern pattern = compiler.compile("^(\\|\\|).+(\\|\\|\\s*)$");
boolean result = this.matcher.matches("||hoge||fuga||", pattern);
System.out.println(result);
}
@Test
public void match() throws Exception {
Pattern pattern = compiler.compile("^(hogehoge)$");
boolean result = this.matcher.contains("hogehoge", pattern);
System.out.println(result);
}
@Test
public void userHome() throws Exception {
System.out.println("IS_OS_WINDOWS: " + SystemUtils.IS_OS_WINDOWS);
System.out.println("user.home: " + System.getProperty("user.home"));
System.out.println("HOME: " + System.getenv("HOME"));
System.out.println("HOMEDRIVE: " + System.getenv("HOMEDRIVE"));
System.out.println("HOMEPATH: " + System.getenv("HOMEPATH"));
System.out.println("USERPROFILE: " + System.getenv("USERPROFILE"));
System.out.println("ALLUSERSPROFILE: " + System.getenv("ALLUSERSPROFILE"));
System.out.println("SYSTEMDRIVE: " + System.getenv("SYSTEMDRIVE"));
}
private static Analyzer analyzer = new CJKAnalyzer();
@Test
public void tokenize() throws Exception {
String text = "Piggydb is an easy-to-use Web application for building a personal knowledge repository.";
text = text + " Piggydbは個人向けの知識を管理するためのWebアプリケーションです。";
// Standard
System.out.println("Standard ----");
StringTokenizer tokenizer = new StringTokenizer(text, " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]");
while (tokenizer.hasMoreTokens()) {
System.out.println(" word: " + tokenizer.nextToken());
}
// N-gram
System.out.println("N-gram ----");
NGramTokenizer nGramTokenizer = new NGramTokenizer(new StringReader(text), 2, 2);
Token token = new Token();
while ((token = nGramTokenizer.next(token)) != null) {
System.out.println(" word: " + token.term());
}
// CJKAnalyzer
System.out.println("CJKAnalyzer ----");
TokenStream stream = analyzer.tokenStream("F", new StringReader(text));
token = new Token();
while ((token = stream.next(token)) != null) {
System.out.println(" word: " + token.term());
}
}
@Test
public void sentenceIterator() throws Exception {
printFirstSentence("");
printFirstSentence("without a period");
printFirstSentence("This is a pen. Hello world.");
printFirstSentence("日本語です。どうですか?");
}
private void printFirstSentence(String text) {
BreakIterator iterator = BreakIterator.getSentenceInstance();
iterator.setText(text);
int end = iterator.next();
if (end == BreakIterator.DONE) {
System.out.println("DONE");
}
else {
System.out.println(text.substring(0, end));
}
}
}