package com.tyndalehouse.step.core.data.create;
import static com.tyndalehouse.step.core.utils.IOUtils.closeQuietly;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.tyndalehouse.step.core.data.entities.impl.EntityIndexWriterImpl;
import com.tyndalehouse.step.core.data.loaders.AbstractClasspathBasedModuleLoader;
/**
* Loads up all lexical forms
*
* @author chrisburrell
*
*/
public class SpecificFormsLoader extends AbstractClasspathBasedModuleLoader {
private static final Logger LOGGER = LoggerFactory.getLogger(HeadwordLineBasedLoader.class);
private final EntityIndexWriterImpl writer;
/**
* @param writer to the index file
* @param resourcePath the file
*/
public SpecificFormsLoader(final EntityIndexWriterImpl writer, final String resourcePath) {
super(resourcePath);
this.writer = writer;
}
@Override
protected void parseFile(final Reader reader, int skipLines) {
final BufferedReader bufferedReader = new BufferedReader(reader);
String line = null;
int leftOverSkip = skipLines;
int lines = 0;
try {
while ((line = bufferedReader.readLine()) != null) {
leftOverSkip--;
if(leftOverSkip >= 0) {
continue;
}
parseLine(line);
lines++;
if (lines % 10000 == 0) {
getMainLoader().addUpdate("install_original_word_forms_progress", lines);
}
}
} catch (final IOException io) {
LOGGER.warn(io.getMessage(), io);
} finally {
closeQuietly(bufferedReader);
}
LOGGER.info("Finished loading [{}] specific forms", lines);
}
/**
* parses a line into SQL
*
* @param line the line
*/
private void parseLine(final String line) {
final String[] split = line.split("[,]");
if (split.length < 2 || split[0].contains("|")) {
return;
}
this.writer.addFieldToCurrentDocument("strongNumber", split[0]);
this.writer.addFieldToCurrentDocument("originalForm", split[1]);
this.writer.save();
}
}