package marytts.tools.dbselection; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.fest.assertions.Assertions; import com.google.common.base.Charsets; import com.google.common.io.Files; import com.google.common.io.Resources; /** * Tests for WikipediaMarkupCleaner * * @author ingmar * */ public class WikipediaMarkupCleanerTest { private WikipediaMarkupCleaner wikiCleaner; private URL markupResource; @Rule public TemporaryFolder tempDir = new TemporaryFolder(); @Before public void setup() throws IOException { wikiCleaner = new WikipediaMarkupCleaner(); markupResource = Resources.getResource(getClass(), "Autorack.mediawiki"); } @Test public void testRemoveMarkup() throws IOException, URISyntaxException { // read markup from test resource String page = Resources.toString(markupResource, Charsets.UTF_8); // process to extract markup-less text String pageWithoutMarkup = wikiCleaner.removeMarkup(page).firstElement(); // write processed text to temp file File actualFile = tempDir.newFile("Autorack.txt"); Files.write(pageWithoutMarkup, actualFile, Charsets.UTF_8); // get expected text and compare with actual processed text URL expectedResource = Resources.getResource(getClass(), "Autorack.txt"); File expectedFile = new File(expectedResource.toURI()); Assertions.assertThat(actualFile).hasSameContentAs(expectedFile); } }