package ecologylab.bigsemantics.cyberneko;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.filters.Writer;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import ecologylab.bigsemantics.html.dom.IDOMProvider;
/**
* Wraps the cyberneko DOM parser for use as a DOM provider
*
* @author agh8154
*
*/
public class CybernekoWrapper implements IDOMProvider
{
DOMParser parser;
public CybernekoWrapper()
{
//XMLParserConfiguration parser = new HTMLConfiguration();
parser = new DOMParser();
try
{
parser.setFeature("http://xml.org/sax/features/namespaces", false);
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
}
catch (SAXNotRecognizedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (SAXNotSupportedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public Document parseDOM(InputStream inputStream, OutputStream out) throws IOException
{
InputSource input = new InputSource(inputStream);
return parseDOM(input, out);
}
@Override
public Document parseDOM(Reader reader, OutputStream out) throws IOException
{
InputSource input = new InputSource(reader);
return parseDOM(input, out);
}
private Document parseDOM(InputSource input, OutputStream out) throws IOException
{
if(out != null)
{
XMLDocumentFilter writer = new Writer();
XMLDocumentFilter[] filters = { writer };
try
{
parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
}
catch (SAXNotRecognizedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (SAXNotSupportedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
try
{
parser.parse(input);
}
catch (SAXException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
return parser.getDocument();
}
@Override
public String xPathTagNamesToLower(String xpath)
{
StringBuilder newXpath = new StringBuilder();
boolean isTagName = true;
boolean isAxis = false;
for (int i = 0; i < xpath.length(); i++)
{
char c = xpath.charAt(i);
char specialChar = '\0';
if (!Character.isLetterOrDigit(c))
isTagName = false;
else if (i > 0 && (xpath.charAt(i - 1) == '/' || xpath.charAt(i - 1) == ':'))
{
String restOfXpath = xpath.substring(i + 1);
int specialCharIndex = 0;
int slash = restOfXpath.indexOf('/');
int j = 0;
for (j = 0; j < restOfXpath.length(); j++)
{
if (!Character.isLetterOrDigit(restOfXpath.charAt(j)) && restOfXpath.charAt(j) != '/')
{
specialCharIndex = j;
break;
}
}
if (specialCharIndex != 0)
{
specialChar = restOfXpath.charAt(j);
if ((specialChar == ':' || specialChar == '(' || specialChar == '-')
&& (slash == -1 || slash > specialCharIndex))
isAxis = true;
}
}
if (c == ':' && specialChar != '(')
isAxis = false;
if (i > 0 && (xpath.charAt(i - 1) == '/' || xpath.charAt(i - 1) == ':')
&& Character.isLetter(c) && !isAxis)
isTagName = true;
if (isTagName)
newXpath.append(Character.toUpperCase(c));
else
newXpath.append(c);
}
return newXpath.toString();
}
@Override
public void setQuiet(boolean b)
{
// TODO Auto-generated method stub
}
@Override
public void setShowWarnings(boolean b)
{
// TODO Auto-generated method stub
}
}