/**
* Distribution License:
* JSword is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License, version 2.1 as published by
* the Free Software Foundation. This program is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The License is available on the internet at:
* http://www.gnu.org/copyleft/lgpl.html
* or by writing to:
* Free Software Foundation, Inc.
* 59 Temple Place - Suite 330
* Boston, MA 02111-1307, USA
*
* Copyright: 2005
* The copyright to this program is held by it's authors.
*
* ID: $Id: OSISFilter.java 1966 2009-10-30 01:15:14Z dmsmith $
*/
package org.crosswire.jsword.book.filter.osis;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.crosswire.common.util.Logger;
import org.crosswire.common.xml.XMLUtil;
import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.DataPolice;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.book.filter.Filter;
import org.crosswire.jsword.passage.Key;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.InputSource;
/**
* Filter to convert an OSIS XML string to OSIS format.
*
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's authors.
* @author Joe Walker [joe at eireneh dot com]
*/
public class OSISFilter implements Filter {
/*
* (non-Javadoc)
*
* @see
* org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book
* .Book, org.crosswire.jsword.passage.Key, java.lang.String)
*/
public List toOSIS(Book book, Key key, String plain) {
DataPolice.setKey(key);
Element ele = null;
Exception ex = null;
String clean = plain;
// FIXME(dms): this is a major HACK handling a problem with a badly
// encoded module.
if (book.getInitials().startsWith("NET") && plain.endsWith("</div>")) //$NON-NLS-1$ //$NON-NLS-2$
{
clean = clean.substring(0, plain.length() - 6);
}
try {
ele = parse(clean);
} catch (JDOMException e) {
ex = e;
} catch (IOException e) {
ex = e;
}
if (ele == null) {
clean = XMLUtil.cleanAllEntities(clean);
try {
ele = parse(clean);
} catch (JDOMException e) {
ex = e;
} catch (IOException e) {
ex = e;
} finally {
// Make sure that other places don't report this problem
DataPolice.setKey(null);
}
}
if (ex != null) {
DataPolice.report("Parse " + book.getInitials() + "(" + key.getName() + ") failed: " + ex.getMessage() + //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
"\non: " + plain); //$NON-NLS-1$
ele = cleanTags(book, key, clean);
}
if (ele == null) {
ele = OSISUtil.factory().createP();
}
return ele.removeContent();
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#clone()
*/
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
assert false : e;
}
return null;
}
private Element cleanTags(Book book, Key key, String plain) {
// So just try to strip out all XML looking things
String shawn = XMLUtil.cleanAllTags(plain);
Exception ex = null;
try {
return parse(shawn);
} catch (JDOMException e) {
ex = e;
} catch (IOException e) {
ex = e;
}
log.warn("Could not fix " + book.getInitials() + "(" + key.getName() + ") by cleaning tags: " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
return null;
}
/**
* If the string is invalid then we might want to have more than one crack
* at parsing it
*/
private Element parse(String plain) throws JDOMException, IOException {
// create a root element to house our document fragment
StringReader in = new StringReader("<div>" + plain + "</div>"); //$NON-NLS-1$ //$NON-NLS-2$
InputSource is = new InputSource(in);
SAXBuilder builder = new SAXBuilder();
//MJD START
builder.setFastReconfigure(true);
//MJD END
Document doc = builder.build(is);
Element div = doc.getRootElement();
return div;
}
/**
* The log stream
*/
private static final Logger log = Logger.getLogger(OSISFilter.class);
}