/******************************************************************************* * Copyright (c) 2012, Directors of the Tyndale STEP Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * Neither the name of the Tyndale House, Cambridge (www.TyndaleHouse.com) * nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ package com.tyndalehouse.step.core.data.create; import static com.tyndalehouse.step.core.utils.StringUtils.isBlank; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.tyndalehouse.step.core.data.entities.impl.EntityIndexWriterImpl; import com.tyndalehouse.step.core.data.loaders.AbstractClasspathBasedModuleLoader; import com.tyndalehouse.step.core.exceptions.StepInternalException; /** * Loads an Easton Dictionary * * @author chrisburrell * */ public class HeadwordLineBasedLoader extends AbstractClasspathBasedModuleLoader { private static final Logger LOGGER = LoggerFactory.getLogger(HeadwordLineBasedLoader.class); private static final String START_TOKEN = "=============="; // state used during processing private int count; private final EntityIndexWriterImpl writer; /** * Loads up dictionary items * * @param writer the lucene index writer * @param resourcePath the classpath to the data */ public HeadwordLineBasedLoader(final EntityIndexWriterImpl writer, final String resourcePath) { super(resourcePath); this.writer = writer; } @Override protected void parseFile(final Reader reader, int skipLines) { final BufferedReader bufferedReader = new BufferedReader(reader); String line = null; try { while ((line = bufferedReader.readLine()) != null) { parseLine(line); } } catch (final IOException e) { throw new StepInternalException("Unable to read a line from the source file ", e); } // save last article this.writer.save(); LOGGER.info("Loaded [{}] entries.", this.count); } /** * Parses a line by setting the current state of this loader appropriately * * @param line the line that has been read from file */ private void parseLine(final String line) { // deal with case where we are hitting a new word if (line.endsWith(START_TOKEN)) { this.count++; this.writer.save(); if (this.count % 5000 == 0) { super.getMainLoader().addUpdate("install_generic_progress", this.count); } } parseField(line); } /** * parses a simple field by examining the type and setting the content (or appending the content to a * * @param line the line content including field name and value */ private void parseField(final String line) { if (line == null || line.length() == 0 || line.charAt(0) != '@') { // ignoring line return; } // get the field name final int tabIndex = line.indexOf('\t'); if (tabIndex < 1) { LOGGER.error("Invalid line was found in file: [{}]", line); return; } // get field name and value final String fieldName = line.substring(0, tabIndex - 1); final int startValue = tabIndex + 1; // get value if (startValue > line.length()) { // no value, so skip LOGGER.trace("Skipping empty field [{}]", fieldName); return; } final String fieldValue = line.substring(startValue); if (isBlank(fieldValue)) { LOGGER.trace("Skipping empty field [{}] => [{}]", fieldName, fieldValue); // skipping empty field return; } this.writer.addFieldToCurrentDocument(fieldName, fieldValue); } }