/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.text; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.lucene.search.TermQuery; import org.apache.mahout.common.HadoopUtil; import org.apache.mahout.text.doc.MultipleFieldsDocument; import org.apache.mahout.text.doc.SingleFieldDocument; import org.junit.After; import org.junit.Before; import org.junit.Test; @Deprecated public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStorageTest { private SequenceFilesFromLuceneStorageDriver driver; private LuceneStorageConfiguration lucene2SeqConf; private String idField; private List<String> fields; private Path seqFilesOutputPath; private Configuration conf; @Before public void before() throws Exception { conf = getConfiguration(); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); seqFilesOutputPath = new Path(getTestTempDirPath(), "seqfiles"); idField = SingleFieldDocument.ID_FIELD; fields = Collections.singletonList("field"); driver = new SequenceFilesFromLuceneStorageDriver() { @Override public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration configuration, List<Path> indexPaths, Path seqPath, String idField, List<String> fields) { lucene2SeqConf = new LuceneStorageConfiguration(configuration, indexPaths, seqPath, idField, fields); return lucene2SeqConf; } }; } @After public void after() throws IOException { HadoopUtil.delete(conf, seqFilesOutputPath); HadoopUtil.delete(conf, getIndexPath1()); } @Test public void testNewLucene2SeqConfiguration() { lucene2SeqConf = driver.newLucene2SeqConfiguration(conf, Collections.singletonList(new Path(getIndexPath1().toString())), seqFilesOutputPath, idField, fields); assertEquals(conf, lucene2SeqConf.getConfiguration()); assertEquals(Collections.singletonList(getIndexPath1()), lucene2SeqConf.getIndexPaths()); assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); assertEquals(idField, lucene2SeqConf.getIdField()); assertEquals(fields, lucene2SeqConf.getFields()); } @Test public void testRun() throws Exception { List<MultipleFieldsDocument> docs = Collections.singletonList(new MultipleFieldsDocument("123", "test 1", "test 2", "test 3")); commitDocuments(getDirectory(getIndexPath1AsFile()), docs.get(0)); String queryField = "queryfield"; String queryTerm = "queryterm"; String maxHits = "500"; String field1 = "field1"; String field2 = "field2"; String[] args = { "-i", getIndexPath1AsFile().toString(), "-o", seqFilesOutputPath.toString(), "-id", idField, "-f", field1 + "," + field2, "-q", queryField + ":" + queryTerm, "-n", maxHits, "-xm", "sequential" }; driver.setConf(conf); driver.run(args); assertEquals(1, lucene2SeqConf.getIndexPaths().size()); assertEquals(getIndexPath1().toUri().getPath(), lucene2SeqConf.getIndexPaths().get(0).toUri().getPath()); assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); assertEquals(idField, lucene2SeqConf.getIdField()); assertEquals(Arrays.asList(field1, field2), lucene2SeqConf.getFields()); assertTrue(lucene2SeqConf.getQuery() instanceof TermQuery); assertEquals(queryField, ((TermQuery) lucene2SeqConf.getQuery()).getTerm().field()); assertEquals(queryTerm, ((TermQuery) lucene2SeqConf.getQuery()).getTerm().text()); assertEquals(new Integer(maxHits), (Integer) lucene2SeqConf.getMaxHits()); } @Test public void testRunOptionalArguments() throws Exception { commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool")); commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool")); String[] args = { "-i", getIndexPath1AsFile().toString(), "-o", seqFilesOutputPath.toString(), "-id", idField, "-f", StringUtils.join(fields, SequenceFilesFromLuceneStorageDriver.SEPARATOR_FIELDS) }; driver.setConf(conf); driver.run(args); assertEquals(1, lucene2SeqConf.getIndexPaths().size()); assertEquals(getIndexPath1().toUri().getPath(), lucene2SeqConf.getIndexPaths().get(0).toUri().getPath()); assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); assertEquals(idField, lucene2SeqConf.getIdField()); assertEquals(fields, lucene2SeqConf.getFields()); assertEquals(conf, lucene2SeqConf.getConfiguration()); assertEquals(SequenceFilesFromLuceneStorageDriver.DEFAULT_QUERY, lucene2SeqConf.getQuery()); assertEquals(SequenceFilesFromLuceneStorageDriver.DEFAULT_MAX_HITS, lucene2SeqConf.getMaxHits()); } @Test public void testRunInvalidQuery() throws Exception { commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool")); commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool")); String[] args = { "-i", getIndexPath1AsFile().toString(), "-o", seqFilesOutputPath.toString(), "-id", idField, "-f", StringUtils.join(fields, SequenceFilesFromLuceneStorageDriver.SEPARATOR_FIELDS), "-q", "invalid:query", "-xm", "sequential" }; driver.setConf(conf); driver.run(args); assertTrue(FileSystem.get(conf).exists(seqFilesOutputPath)); //shouldn't be any real files in the seq files out path } @Test public void testHelp() throws Exception { driver = new SequenceFilesFromLuceneStorageDriver(); driver.run(new String[]{"--help"}); } }