/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.byTask.tasks; import java.io.BufferedWriter; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; /** * A {@link WriteLineDocTask} which for Wikipedia input, will write category pages * to another file, while remaining pages will be written to the original file. * The categories file is derived from the original file, by adding a prefix "categories-". */ public class WriteEnwikiLineDocTask extends WriteLineDocTask { private final PrintWriter categoryLineFileOut; public WriteEnwikiLineDocTask(PerfRunData runData) throws Exception { super(runData); OutputStream out = StreamUtils.outputStream(categoriesLineFile(Paths.get(fname))); categoryLineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), StreamUtils.BUFFER_SIZE)); writeHeader(categoryLineFileOut); } /** Compose categories line file out of original line file */ public static Path categoriesLineFile(Path f) { Path dir = f.toAbsolutePath().getParent(); String categoriesName = "categories-"+f.getFileName(); return dir.resolve(categoriesName); } @Override public void close() throws Exception { categoryLineFileOut.close(); super.close(); } @Override protected PrintWriter lineFileOut(Document doc) { IndexableField titleField = doc.getField(DocMaker.TITLE_FIELD); if (titleField!=null && titleField.stringValue().startsWith("Category:")) { return categoryLineFileOut; } return super.lineFileOut(doc); } }