package au.com.acpfg.xml.reader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Reader;
import org.knime.base.node.util.BufferedFileReader;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataType;
import org.knime.core.data.DataValue.UtilityFactory;
import org.knime.core.data.def.StringCell;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLFilterImpl;
import org.xml.sax.helpers.XMLReaderFactory;
public class XMLCell extends DataCell {
/**
* Serial ID
*/
private static final long serialVersionUID = -8748457485351154080L;
/**
* internal state which must be serialised
*/
private boolean m_is_fragment;
private boolean m_is_well_formed;
private boolean m_is_ref;
private String m_xml; // overloaded field: either a filename (large XML) or content otherwise
/**
* Maximum XML size to store in the cell, otherwise the cell keeps a reference and loads the file
* on demand. That is the only practical way to keep track of 100MB XML files where the in-core tree is larger
* due to overheads.
*/
public static final long MAX_INCORE_XML_SIZE = (50 * 1024); // 50KB
/**
* The maximum number of lines which will be displayed in an reference XMLCell,
* does NOT apply for XMLCell's where !m_is_ref
*/
private static final int MAX_CELL_LINES = 10; // first 10 lines in table cell with '...' to signify more to come
/**
* Convenience method
*/
public static final DataType TYPE = DataType.getType(XMLCell.class);
/**
* Ensure correct instantiation for XML cells
*/
public static final UtilityFactory UTILITY = new XMLUtilityFactory();
public XMLCell(File input_file) throws IOException {
this(input_file, MAX_INCORE_XML_SIZE);
}
/**
* Constructor which supports a per-object XML size to keep XML (string) in-core
* @param input_file
* @param max_xml_size
* @throws IOException
*/
public XMLCell(File input_file, long max_xml_size) throws IOException {
if (input_file.length() > max_xml_size) {
m_is_ref = true;
m_xml = input_file.getAbsolutePath();
}
BufferedFileReader rdr = BufferedFileReader.createNewReader(new FileInputStream(input_file));
String line;
StringBuffer sb = new StringBuffer((int) input_file.length());
while ((line = rdr.readLine()) != null) {
sb.append(line);
}
m_xml = sb.toString();
}
public XMLCell(String xml, boolean is_frag, boolean is_well_formed) {
super();
m_xml = xml;
m_is_fragment = is_frag; // only part of a document ie. no XML declaration?
m_is_well_formed = is_well_formed; // syntactically valid XML without ANY modifications?
m_is_ref = false;
}
public XMLCell(String xml, boolean is_fragment) {
this(xml, is_fragment, false);
}
public XMLCell(String xml) {
this(xml, true, false);
}
@Override
protected boolean equalsDataCell(DataCell dc) {
return (this == dc);
}
@Override
public int hashCode() {
return m_xml.hashCode();
}
public boolean isReference() {
return m_is_ref;
}
/**
* Returns a file reference to the XML content within this cell. If the amount of
* XML is large, this will be a reference to the data source - but callers must not rely on this behavior.
*
* @return
* @throws IOException
* @throws SAXException if the XML is not well-formed
*/
public File asFile() throws IOException, SAXException {
if (m_is_ref) {
return new File(m_xml);
}
// small XML documents are not kept by reference... so...
File temp_file = java.io.File.createTempFile("xml-temp", ".xml");
PrintWriter pw = null;
try {
OutputStream os = new FileOutputStream(temp_file);
pw = new PrintWriter(os);
pw.print(m_xml);
pw.close();
} catch (IOException e) {
if (pw != null)
pw.close();
throw e;
}
return temp_file;
}
/**
* This code does <em>NOT</em> obey the stripNamespaces property, it just returns a printable
* version of the XML instead. Use <code>asFile()</code> if you want to get the cell content
* with namespaces removed to aid in XQuery processing.
*/
@Override
public String toString() {
if (m_is_ref) {
try {
BufferedFileReader rdr = BufferedFileReader.createNewReader(new FileInputStream(new File(m_xml)));
String line;
int cnt = 0;
StringBuffer sb = new StringBuffer();
while ((line = rdr.readLine()) != null && cnt++ < MAX_CELL_LINES) {
sb.append(line);
}
sb.append("...");
rdr.close();
return sb.toString();
} catch (Exception e) {
return "?";
}
}
return m_xml;
}
/**
* Returns a SAXParser object which can parse XML according to the state of the cell.
* If the cell has NS stripping on, the return object will do this during its <code>parse()</code>
*
* @return
* @throws SAXException
*/
public XMLReader getReader(boolean strip_ns) throws SAXException {
XMLReader rf = XMLReaderFactory.createXMLReader();
if (strip_ns) {
return new MyNSRemover(rf);
} else {
return rf;
}
}
}