/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.byTask.feeds; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.Properties; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; public class EnwikiContentSourceTest extends LuceneTestCase { /** An EnwikiContentSource which works on a String and not files. */ private static class StringableEnwikiSource extends EnwikiContentSource { private final String docs; public StringableEnwikiSource(String docs) { this.docs = docs; } @Override protected InputStream openInputStream() throws IOException { return new ByteArrayInputStream(docs.getBytes(StandardCharsets.UTF_8)); } } private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate) throws ParseException { assertNotNull(dd); assertEquals(expName, dd.getName()); assertEquals(expTitle, dd.getTitle()); assertEquals(expBody, dd.getBody()); assertEquals(expDate, dd.getDate()); } private void assertNoMoreDataException(EnwikiContentSource stdm) throws Exception { expectThrows(NoMoreDataException.class, () -> { stdm.getNextDocData(null); }); } private static final String PAGE1 = " <page>\r\n" + " <title>Title1</title>\r\n" + " <ns>0</ns>\r\n" + " <id>1</id>\r\n" + " <revision>\r\n" + " <id>11</id>\r\n" + " <parentid>111</parentid>\r\n" + " <timestamp>2011-09-14T11:35:09Z</timestamp>\r\n" + " <contributor>\r\n" + " <username>Mister1111</username>\r\n" + " <id>1111</id>\r\n" + " </contributor>\r\n" + " <minor />\r\n" + " <comment>/* Never mind */</comment>\r\n" + " <text>Some text 1 here</text>\r\n" + " </revision>\r\n" + " </page>\r\n"; private static final String PAGE2 = " <page>\r\n" + " <title>Title2</title>\r\n" + " <ns>0</ns>\r\n" + " <id>2</id>\r\n" + " <revision>\r\n" + " <id>22</id>\r\n" + " <parentid>222</parentid>\r\n" + " <timestamp>2022-09-14T22:35:09Z</timestamp>\r\n" + " <contributor>\r\n" + " <username>Mister2222</username>\r\n" + " <id>2222</id>\r\n" + " </contributor>\r\n" + " <minor />\r\n" + " <comment>/* Never mind */</comment>\r\n" + " <text>Some text 2 here</text>\r\n" + " </revision>\r\n" + " </page>\r\n"; @Test public void testOneDocument() throws Exception { String docs = "<mediawiki>\r\n" + PAGE1 + "</mediawiki>"; EnwikiContentSource source = createContentSource(docs, false); DocData dd = source.getNextDocData(new DocData()); assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000"); assertNoMoreDataException(source); } private EnwikiContentSource createContentSource(String docs, boolean forever) throws IOException { Properties props = new Properties(); props.setProperty("print.props", "false"); props.setProperty("content.source.forever", Boolean.toString(forever)); Config config = new Config(props); EnwikiContentSource source = new StringableEnwikiSource(docs); source.setConfig(config); // doc-maker just for initiating content source inputs DocMaker docMaker = new DocMaker(); docMaker.setConfig(config, source); docMaker.resetInputs(); return source; } @Test public void testTwoDocuments() throws Exception { String docs = "<mediawiki>\r\n" + PAGE1 + PAGE2 + "</mediawiki>"; EnwikiContentSource source = createContentSource(docs, false); DocData dd1 = source.getNextDocData(new DocData()); assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000"); DocData dd2 = source.getNextDocData(new DocData()); assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000"); assertNoMoreDataException(source); } @Test public void testForever() throws Exception { String docs = "<mediawiki>\r\n" + PAGE1 + PAGE2 + "</mediawiki>"; EnwikiContentSource source = createContentSource(docs, true); // same documents several times for (int i=0; i<3; i++) { DocData dd1 = source.getNextDocData(new DocData()); assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000"); DocData dd2 = source.getNextDocData(new DocData()); assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000"); // Don't test that NoMoreDataException is thrown, since the forever flag is turned on. } source.close(); } }