/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.classifier.bayes; import java.io.File; import java.io.Writer; import java.util.Iterator; import com.google.common.base.Charsets; import com.google.common.io.Closeables; import com.google.common.io.Files; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.mahout.classifier.BayesFileFormatter; import org.apache.mahout.common.iterator.FileLineIterable; import org.apache.mahout.common.iterator.FileLineIterator; import org.apache.mahout.common.MahoutTestCase; import org.junit.Before; import org.junit.Test; public final class BayesFileFormatterTest extends MahoutTestCase { private static final String[] WORDS = {"dog", "cat", "fish", "snake", "zebra"}; private File input; private File out; @Override @Before public void setUp() throws Exception { super.setUp(); input = getTestTempDir("bayes/in"); out = getTestTempDir("bayes/out"); for (String word : WORDS) { File file = new File(input, word); Writer writer = Files.newWriter(file, Charsets.UTF_8); try { writer.write(word); } finally { Closeables.closeQuietly(writer); } } } @Test public void test() throws Exception { Analyzer analyzer = new WhitespaceAnalyzer(); File[] files = out.listFiles(); assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length); BayesFileFormatter.format("animal", analyzer, input, Charsets.UTF_8, out); files = out.listFiles(); assertEquals("files Size: " + files.length + " is not: " + WORDS.length, files.length, WORDS.length); for (File file : files) { //should only be one line in the file, and it should be label label Iterator<String> it = new FileLineIterator(file); String line = it.next().trim(); assertFalse(it.hasNext()); String label = "animal" + '\t' + file.getName(); assertEquals(line + ":::: is not equal to " + label + "::::", line, label); } } @Test public void testCollapse() throws Exception { Analyzer analyzer = new WhitespaceAnalyzer(); File[] files = out.listFiles(); assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length); BayesFileFormatter.collapse("animal", analyzer, input, Charsets.UTF_8, new File(out, "animal")); files = out.listFiles(); assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length); int count = 0; for (String line : new FileLineIterable(files[0])) { assertTrue("line does not start with label", line.startsWith("animal")); count++; } assertEquals(count + " does not equal: " + WORDS.length, count, WORDS.length); } }