package net.bible.service.format;
import net.bible.service.common.Logger;
import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.Verse;
/** Tidy up verses fetched from the raw input stream.
* Ensure each verse has a verse tag and remove any extra tags that break the structure.
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OSISVerseTidy {
private Book book;
private static final String VERSE_OPENING_TAG_START = "<"+OSISUtil.OSIS_ELEMENT_VERSE+" "+OSISUtil.OSIS_ATTR_OSISID+"='";
private static final String VERSE_OPENING_TAG_END = "'>";
// WEB has <l> tags that span verses so avoid errors by using empty verse tags
private static final String VERSE_CLOSING_TAG = "</"+OSISUtil.OSIS_ELEMENT_VERSE+">";
private static final Logger log = new Logger(OSISVerseTidy.class.getName());
/** Constructor
*
* @param book The currently selected book
*/
public OSISVerseTidy(Book book) {
this.book = book;
}
/** add verse number and do basic validation/fix of verse text
* @param key
* @param verseText
* @return
*/
public String tidy(Key key, String verseText) {
verseText = checkVerseText(key, verseText);
verseText = addVerseTag(key, verseText);
return verseText;
}
/** This hack is based on a hack in JSword.
* I suspect we need to start at the beginning of a chapter instead of verse 1 to fix this
* because NET seems to have a <div> before the first verse and </div> at the end
*
* @param key
* @return
*/
private String checkVerseText(Key key, String verseText) {
// FIXME(dms): this is a major HACK handling a problem with a badly
// encoded module.
//TODO NET appears to open <div> before the verse start and the closing </div> is after the verse start - need to sort later
if (book.getAbbreviation().startsWith("NET") ) //$NON-NLS-1$ //$NON-NLS-2$
{
if (verseText.contains("</div>") && !(verseText.contains("<div ") || verseText.contains("<div>")) ) {
log.debug("Fixing up NET div");
verseText = verseText.replaceAll("</div>", "");
}
// verseText = verseText.substring(0, verseText.length() - 6);
}
//TODO WEB appears to open <l> before the verse start and the closing </l> is after the verse start - need to sort later
// as below you can see that <lg> and <l> start before the verse and close in the verse
// I think teh default JSword module loader would probably strip all tags
// <lg>
// <l type="x-primary">
// <verse sID="Ps.1.1" osisID="Ps.1.1" />Blessed is the man who doesnt walk in the counsel of the wicked,</l>
// <l type="x-secondary">nor stand in the way of sinners,</l>
// <l type="x-secondary">nor sit in the seat of scoffers;</l>
// </lg>
if (book.getAbbreviation().startsWith("WEB") && key instanceof Verse ) //$NON-NLS-1$ //$NON-NLS-2$
{
if (((Verse)key).getVerse()==1) {
log.debug("start of WEB chapter");
if (verseText.indexOf("</l>") < verseText.indexOf("<l type=\"x-primary\">")) {
log.debug("adding <lg><l>");
verseText = "<lg><l type=\"x-primary\">"+verseText;
}
}
// if (StringUtils.countMatches(verseText, "<l>") < StringUtils.countMatches(verseText, "</l>") ) {
// log.debug("Fixing up WEB <l>");
// verseText = verseText.replaceFirst("</l>", "");
// }
// //TODO - really! how can a verse end with an opening tag
// if (verseText.endsWith("<l type=\"x-primary\"> ")) {
// verseText = verseText.substring(0, verseText.length() - "<l type=\"x-primary\">".length());
// }
}
return verseText;
}
/** Ensure each verse has the appropriate OSIS verse tag.
*
* @param verse
* @param plain
* @return
*/
private String addVerseTag(Key verse, String plain) {
String ret = plain;
if (!plain.contains("<"+OSISUtil.OSIS_ELEMENT_VERSE)) {
StringBuilder bldr = new StringBuilder();
bldr.append(VERSE_OPENING_TAG_START).append(verse.getOsisID()).append(VERSE_OPENING_TAG_END).append(plain).append(VERSE_CLOSING_TAG);
ret = bldr.toString();
}
return ret;
}
}