package hapax.test; import static org.junit.Assert.assertEquals; import org.junit.Test; import org.junit.runner.RunWith; import ch.akuhn.hapax.corpus.Terms; import ch.akuhn.hapax.index.TermDocumentMatrix; import ch.akuhn.util.Get; import ch.unibe.jexample.Given; import ch.unibe.jexample.JExample; @RunWith(JExample.class) public class DeerExample { public static final String SORTED = "[computer, eps, graph, human, interface, minors, response, survey, system, time, trees, user]"; public static final String[][] DATA = { { "c1", "Human machine interface for Lab ABC computer applications" }, { "c2", "A survey of user opinion of computer system response time" }, { "c3", "The EPS user interface management system" }, { "c4", "System and human system engineering testing of EPS" }, { "c5", "Relation of user-perceived response time to error measurement" }, { "m1", "The generation of random, binary, unordered trees" }, { "m2", "The intersection graph of paths in trees" }, { "m3", "Graph minors IV: Widths of trees and well-quasi-ordering" }, { "m4", "Graph minors: A survey" }}; @Test public TermDocumentMatrix makeTermDocumentMatrix() { TermDocumentMatrix tdm = new TermDocumentMatrix(); for (String[] each: DATA) { tdm.putDocument(each[0], new Terms(each[1])); } assertEquals(9, tdm.documentCount()); assertEquals(45, tdm.termCount()); return tdm; } @Test @Given("#makeTermDocumentMatrix") public TermDocumentMatrix rejectStopWords(final TermDocumentMatrix matrix) { TermDocumentMatrix tdm = matrix; tdm = tdm.toLowerCase(); assertEquals(9, tdm.documentCount()); assertEquals(42, tdm.termCount()); tdm = tdm.rejectHapaxes(); assertEquals(9, tdm.documentCount()); assertEquals(16, tdm.termCount()); tdm = tdm.toLowerCase().rejectStopwords(); assertEquals(9, tdm.documentCount()); assertEquals(12, tdm.termCount()); assertEquals(SORTED, Get.sorted(tdm.terms().elementSet()).toString()); return tdm; } // @Test // @Given("#rejectStopWords") // public void testImportExport(TermDocumentMatrix tdm) { // StringBuilder buf = new StringBuilder(); // tdm.storeOn(buf); // TermDocumentMatrix tdm2 = TermDocumentMatrix.readFrom(new Scanner(buf.toString())); // StringBuilder buf2 = new StringBuilder(); // tdm2.storeOn(buf2); // assertEquals(buf.toString(), buf2.toString()); // assertEquals(tdm.documentCount(), tdm2.documentCount()); // assertEquals(tdm.termCount(), tdm2.termCount()); // assertEquals(tdm.density(), tdm2.density(), Double.MIN_VALUE); // } }