/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.byTask.tasks; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashSet; import java.util.Properties; import java.util.Set; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; /** Tests the functionality of {@link WriteLineDocTask}. */ public class WriteLineDocTaskTest extends BenchmarkTestCase { // class has to be public so that Class.forName.newInstance() will work public static final class WriteLineDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO)); doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO)); doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work public static final class NewLinesDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(BODY_FIELD, "body\r\ntext\ttwo", Field.Store.NO)); doc.add(new StringField(TITLE_FIELD, "title\r\ntext", Field.Store.NO)); doc.add(new StringField(DATE_FIELD, "date\r\ntext", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work public static final class NoBodyDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO)); doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work public static final class NoTitleDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO)); doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work public static final class JustDateDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work // same as JustDate just that this one is treated as legal public static final class LegalJustDateDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO)); return doc; } } // class has to be public so that Class.forName.newInstance() will work public static final class EmptyDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { return new Document(); } } // class has to be public so that Class.forName.newInstance() will work public static final class ThreadingDocMaker extends DocMaker { @Override public Document makeDocument() throws Exception { Document doc = new Document(); String name = Thread.currentThread().getName(); doc.add(new StringField(BODY_FIELD, "body_" + name, Field.Store.NO)); doc.add(new StringField(TITLE_FIELD, "title_" + name, Field.Store.NO)); doc.add(new StringField(DATE_FIELD, "date_" + name, Field.Store.NO)); return doc; } } private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); private PerfRunData createPerfRunData(Path file, boolean allowEmptyDocs, String docMakerName) throws Exception { Properties props = new Properties(); props.setProperty("doc.maker", docMakerName); props.setProperty("line.file.out", file.toAbsolutePath().toString()); props.setProperty("directory", "RAMDirectory"); // no accidental FS dir. if (allowEmptyDocs) { props.setProperty("sufficient.fields", ","); } if (docMakerName.equals(LegalJustDateDocMaker.class.getName())) { props.setProperty("line.fields", DocMaker.DATE_FIELD); props.setProperty("sufficient.fields", DocMaker.DATE_FIELD); } Config config = new Config(props); return new PerfRunData(config); } private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody) throws Exception { InputStream in = Files.newInputStream(file); switch(fileType) { case BZIP2: in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in); break; case GZIP: in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in); break; case PLAIN: break; // nothing to do default: assertFalse("Unknown file type!",true); //fail, should not happen } try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNotNull(line); String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); int numExpParts = expBody == null ? 2 : 3; assertEquals(numExpParts, parts.length); assertEquals(expTitle, parts[0]); assertEquals(expDate, parts[1]); if (expBody != null) { assertEquals(expBody, parts[2]); } assertNull(br.readLine()); } } static void assertHeaderLine(String line) { assertTrue("First line should be a header line",line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)); } /* Tests WriteLineDocTask with a bzip2 format. */ public void testBZip2() throws Exception { // Create a document in bz2 format. Path file = getWorkDir().resolve("one-line.bz2"); PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.BZIP2, "title", "date", "body"); } /* Tests WriteLineDocTask with a gzip format. */ public void testGZip() throws Exception { // Create a document in gz format. Path file = getWorkDir().resolve("one-line.gz"); PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.GZIP, "title", "date", "body"); } public void testRegularFile() throws Exception { // Create a document in regular format. Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.PLAIN, "title", "date", "body"); } public void testCharsReplace() throws Exception { // WriteLineDocTask replaced only \t characters w/ a space, since that's its // separator char. However, it didn't replace newline characters, which // resulted in errors in LineDocSource. Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, NewLinesDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.PLAIN, "title text", "date text", "body text two"); } public void testEmptyBody() throws Exception { // WriteLineDocTask threw away documents w/ no BODY element, even if they // had a TITLE element (LUCENE-1755). It should throw away documents if they // don't have BODY nor TITLE Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, NoBodyDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.PLAIN, "title", "date", null); } public void testEmptyTitle() throws Exception { Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, NoTitleDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); doReadTest(file, Type.PLAIN, "", "date", "body"); } /** Fail by default when there's only date */ public void testJustDate() throws Exception { Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, JustDateDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNull(line); } } public void testLegalJustDate() throws Exception { Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, LegalJustDateDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNotNull(line); } } public void testEmptyDoc() throws Exception { Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, true, EmptyDocMaker.class.getName()); WriteLineDocTask wldt = new WriteLineDocTask(runData); wldt.doLogic(); wldt.close(); try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNotNull(line); } } public void testMultiThreaded() throws Exception { Path file = getWorkDir().resolve("one-line"); PerfRunData runData = createPerfRunData(file, false, ThreadingDocMaker.class.getName()); final WriteLineDocTask wldt = new WriteLineDocTask(runData); Thread[] threads = new Thread[10]; for (int i = 0; i < threads.length; i++) { threads[i] = new Thread("t" + i) { @Override public void run() { try { wldt.doLogic(); } catch (Exception e) { throw new RuntimeException(e); } } }; } for (Thread t : threads) t.start(); for (Thread t : threads) t.join(); wldt.close(); Set<String> ids = new HashSet<>(); try (BufferedReader br = Files.newBufferedReader(file, StandardCharsets.UTF_8)) { String line = br.readLine(); assertHeaderLine(line); // header line is written once, no matter how many threads there are for (int i = 0; i < threads.length; i++) { line = br.readLine(); String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); assertEquals(3, parts.length); // check that all thread names written are the same in the same line String tname = parts[0].substring(parts[0].indexOf('_')); ids.add(tname); assertEquals(tname, parts[1].substring(parts[1].indexOf('_'))); assertEquals(tname, parts[2].substring(parts[2].indexOf('_'))); } // only threads.length lines should exist assertNull(br.readLine()); assertEquals(threads.length, ids.size()); } } }