package org.wikipedia.miner.extract; import static org.junit.Assert.assertNotNull; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLStreamException; import org.junit.Before; import org.junit.Test; import org.simpleframework.xml.Serializer; import org.simpleframework.xml.core.Persister; import org.wikipedia.miner.extract.model.DumpPage; import org.wikipedia.miner.extract.model.DumpPageParser; import org.wikipedia.miner.extract.util.Languages; import org.wikipedia.miner.extract.util.Languages.Language; import org.wikipedia.miner.extract.util.SiteInfo; import org.wikipedia.miner.util.MarkupStripper; public class MarkupTestCase { private SiteInfo siteInfo ; private Language langConf ; private MarkupStripper stripper = new MarkupStripper() ; @Before public void init() throws FactoryConfigurationError, Exception { siteInfo = loadSiteInfo() ; langConf = loadLanguageConfig("simple") ; } @Test public void test() { assertNotNull(langConf) ; } private SiteInfo loadSiteInfo() throws Exception { ClassLoader classloader = Thread.currentThread().getContextClassLoader() ; return SiteInfo.load(classloader.getResourceAsStream("siteInfo.xml")) ; } private Language loadLanguageConfig(String langCode) throws Exception { Serializer serializer = new Persister(); File source = new File("../configs/languages.xml"); Languages languages = serializer.read(Languages.class, source); return languages.get(langCode) ; } public DumpPage loadPage(String fileName) throws IOException, XMLStreamException { DumpPageParser parser = new DumpPageParser(langConf, siteInfo) ; ClassLoader classloader = Thread.currentThread().getContextClassLoader() ; BufferedReader reader = new BufferedReader(new InputStreamReader(classloader.getResourceAsStream(fileName))); StringBuffer sb = new StringBuffer() ; String line ; while ((line = reader.readLine()) != null) { sb.append(line) ; sb.append("\n") ; } return parser.parsePage(sb.toString()) ; } public SiteInfo getSiteInfo() { return siteInfo; } public Language getLangConf() { return langConf; } public MarkupStripper getStripper() { return stripper; } }