package com.tyndalehouse.step.tools.esv.deprecated; import java.io.File; import java.io.IOException; import java.util.*; import java.util.regex.Pattern; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.apache.commons.io.FileUtils; import org.crosswire.jsword.book.Book; import org.crosswire.jsword.book.Books; import org.crosswire.jsword.passage.NoSuchKeyException; import org.jdom2.Content; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Reads in an xml file and tries to track progress with a tagging sheet, enriching where it can * * @deprecated * @author chrisburrell */ @Deprecated public class EsvOsisEnricher { private static Pattern PUNCTUATION = Pattern.compile("[,?!./\\-:;'\"—]+"); private static final Logger LOGGER = LoggerFactory.getLogger(EsvOsisEnricher.class); private final Set<String> canonicals = new HashSet<String>(); private Map<String, Deque<Word>> verseContent; public static void main(final String[] args) throws JDOMException, IOException, NoSuchKeyException, TransformerFactoryConfigurationError, TransformerException { new EsvOsisEnricher().process("c:\\Downloads\\esv-osis-sect.xml", "c:\\temp\\tagging.csv"); } private void process(final String osisFile, final String mappingFile) throws JDOMException, IOException, NoSuchKeyException, TransformerFactoryConfigurationError, TransformerException { init(mappingFile); final SAXBuilder builder = new SAXBuilder(); builder.setValidation(false); // final Document doc = builder.build(f); final StreamSource streamSource = new StreamSource(getClass().getResourceAsStream("/enrich.xsl")); final Transformer newTransformer = TransformerFactory.newInstance().newTransformer(streamSource); final StreamSource original = new StreamSource(getClass().getResourceAsStream("/esv2011.xml")); final StreamResult result = new StreamResult(new File("d:\\temp\\esv-out.xml")); newTransformer.setParameter("state", new EnricherState(this.verseContent)); newTransformer.transform(original, result); String s = FileUtils.readFileToString(new File("d:\\temp\\esv-out.xml")); s = s.replaceAll("#<#", "<"); s = s.replaceAll("#>#", ">"); FileUtils.write(new File("d:\\temp\\esv-out.xml"), s); // result.getWriter().close(); // final Element rootElement = doc.getRootElement(); // final Iterator<Content> content = rootElement.getDescendants(); // parseContent(content); } private void init(final String tagging) throws IOException, NoSuchKeyException { this.canonicals.add("q"); this.canonicals.add("divineName"); readMappingFile(new File(tagging)); } private void readMappingFile(final File tagging) throws IOException, NoSuchKeyException { final Book b = Books.installed().getBook("ESV-THE"); List<String> lines = FileUtils.readLines(tagging); this.verseContent = new HashMap<String, Deque<Word>>(); Deque<Word> currentVerseWords = null; String currentVerse = null; int lineNumber = 1; for (String lineText : lines) { String[] line = lineText.split("\\t"); LOGGER.trace("line: {}", line); Word w; if (line.length <= 1) { LOGGER.warn("Blank line found in file, line {}", lineNumber); continue; } else if (line.length == 2) { w = new Word(line[1]); } else { w = new Word(line[1], line[2]); } if (!line[0].equalsIgnoreCase(currentVerse)) { LOGGER.trace("New verse {}", line[0]); currentVerseWords = new LinkedList<Word>(); try { this.verseContent.put(b.getKey(getValidOsisRef(line[0])).getOsisID(), currentVerseWords); } catch (final NoSuchKeyException e) { LOGGER.warn("[{}]: Failure to resolve verse to OSIS reference.", line[0]); continue; } } LOGGER.trace("Adding word {}", w.getW()); currentVerseWords.add(w); currentVerse = line[0]; lineNumber++; } } private String getValidOsisRef(final String ref) { if (ref.startsWith("Sol")) { return ref.replace("Sol", "Song"); } return ref; } private void parseContent(final Iterator<Content> content) { boolean isVerse = false; String currentVerse = null; while (content.hasNext()) { final Content c = content.next(); if (c instanceof Element) { final Element element = (Element) c; if ("verse".equals(element.getName())) { currentVerse = element.getAttributeValue("sID"); if (currentVerse != null) { LOGGER.debug("Found opening verse marker"); isVerse = true; } if (element.getAttribute("eID") != null) { LOGGER.debug("Found end verse marker"); isVerse = false; } // processVerse(element, null); } else { if (isVerse) { processVerse(currentVerse, c); } } } else { // we're in between 2 verses if (isVerse) { processVerse(currentVerse, c); } // System.out.println(c); } } } private void processVerse(final String currentVerse, final Content c) { LOGGER.trace("Processing verse {}", currentVerse); final Deque<Word> queue = this.verseContent.get(currentVerse); if (queue == null || queue.size() == 0) { LOGGER.warn("[{}]: no matching data available, skipping", currentVerse); return; } } public static String reduce(final String s) { if (s == null) { return ""; } return PUNCTUATION.matcher(s).replaceAll(" ").replace(" ", " "); } }