package org.openxdm.xcap.common.xml;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.xerces.util.XML11Char;
import org.openxdm.xcap.common.error.InternalServerErrorException;
import org.openxdm.xcap.common.error.NotUTF8ConflictException;
import org.openxdm.xcap.common.error.NotValidXMLFragmentConflictException;
import org.openxdm.xcap.common.error.NotWellFormedConflictException;
import org.openxdm.xcap.common.error.NotXMLAttributeValueConflictException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.sun.syndication.io.XmlReader;
public class XMLValidator {
/**
*
*
*/
public static boolean isQName(String name) {
String[] qName = name.split(":");
if (qName.length == 1) {
return XML11Char.isXML11ValidNCName(name);
} else if (qName.length == 2) {
return XML11Char.isXML11ValidNCName(qName[0])
&& XML11Char.isXML11ValidNCName(qName[1]);
}
return false;
}
/**
* Validates if the specifiedc string is a valid xml attribute value.
* Specs say that an attr value is validated by the following regex:
*
* AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
* Reference ::= EntityRef | CharRef
* EntityRef ::= '&' Name ';'
* CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
*
*
* NOTE: The specified string doesn't come with surroundings " or ' so we can't accept both chars!!!!
*
* @param value
* @return
*/
public static void checkAttValue(String value)
throws NotXMLAttributeValueConflictException {
try {
StringBuilder sb = new StringBuilder(value);
// check and remove char refs
// [0-9a-fA-F]+ ;
Set<String> set = new HashSet<String>();
while (true) {
int begin = sb.indexOf("");
if (begin > -1) {
// found begin
int end = sb.indexOf(";", begin + 3);
if (end > -1) {
// found an end
set.add(sb.substring(begin + 3, end));
sb = new StringBuilder(sb.substring(0, begin))
.append(sb.substring(end + 1));
} else {
break;
}
} else {
break;
}
}
Pattern p = Pattern.compile("[0-9a-fA-F]+");
for (Iterator<String> i = set.iterator(); i.hasNext();) {
String t = i.next();
Matcher m = p.matcher(t);
if (!m.matches()) {
throw new NotXMLAttributeValueConflictException();
}
}
// [0-9]+ ;
set = new HashSet<String>();
while (true) {
int begin = sb.indexOf("");
if (begin > -1) {
// found begin
int end = sb.indexOf(";", begin + 2);
if (end > -1) {
// found an end
set.add(sb.substring(begin + 2, end));
sb = new StringBuilder(sb.substring(0, begin))
.append(sb.substring(end + 1));
} else {
break;
}
} else {
break;
}
}
p = Pattern.compile("[0-9]+");
for (Iterator<String> i = set.iterator(); i.hasNext();) {
String t = i.next();
Matcher m = p.matcher(t);
if (!m.matches()) {
throw new NotXMLAttributeValueConflictException();
}
}
// check and remove entity refs
// & name ;
set = new HashSet<String>();
while (true) {
int begin = sb.indexOf("&");
if (begin > -1) {
// found begin
int end = sb.indexOf(";", begin + 1);
if (end > -1) {
// found an end
set.add(sb.substring(begin + 1, end));
sb = new StringBuilder(sb.substring(0, begin))
.append(sb.substring(end + 1));
} else {
break;
}
} else {
break;
}
}
// check all names found
for (Iterator<String> i = set.iterator(); i.hasNext();) {
String name = i.next();
if (!XML11Char.isXML11ValidName(name)) {
throw new NotXMLAttributeValueConflictException();
}
}
// check remaining chars
for (int i = 0; i < sb.length(); i++) {
if (sb.charAt(i) == '&' || sb.charAt(i) == '\''
|| sb.charAt(i) == '"' || sb.charAt(i) == '<') {
throw new NotXMLAttributeValueConflictException();
}
}
} catch (Exception e) {
// parsing error
throw new NotXMLAttributeValueConflictException();
}
}
public static String getUTF8String(InputStream is)
throws NotUTF8ConflictException, InternalServerErrorException {
// lets get the byte array in stream
ByteArrayOutputStream bos = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
int len;
try {
while ((len = is.read(buf)) > 0) {
bos.write(buf, 0, len);
}
} catch (IOException e) {
throw new InternalServerErrorException(e.getMessage());
}
byte[] data = bos.toByteArray();
// now decode the bytes
CharsetDecoder dec = Charset.forName("UTF8").newDecoder();
try {
return dec.decode(ByteBuffer.wrap(data)).toString();
} catch (Exception e) {
throw new NotUTF8ConflictException();
}
}
public static Document getWellFormedDocument(Reader reader)
throws NotWellFormedConflictException, InternalServerErrorException {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder parser = factory.newDocumentBuilder();
return parser.parse(new InputSource(reader));
} catch (SAXException e) {
throw new NotWellFormedConflictException();
} catch (IOException e) {
throw new InternalServerErrorException(e.getMessage());
} catch (ParserConfigurationException e) {
throw new InternalServerErrorException(e.getMessage());
}
}
public static Element getWellFormedDocumentFragment(Reader reader)
throws NotValidXMLFragmentConflictException,
InternalServerErrorException {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder parser = factory.newDocumentBuilder();
Document dummyDocument = parser.parse(new InputSource(reader));
return dummyDocument.getDocumentElement();
} catch (SAXException e) {
throw new NotValidXMLFragmentConflictException();
} catch (IOException e) {
throw new InternalServerErrorException(e.getMessage());
} catch (ParserConfigurationException e) {
throw new InternalServerErrorException(e.getMessage());
}
}
public static Reader getUTF8Reader(InputStream is)
throws NotUTF8ConflictException, InternalServerErrorException {
try {
XmlReader reader = new XmlReader(is);
if (reader.getEncoding().equals("UTF-8")) {
// encoding ok, return reader
return reader;
}
} catch (Exception e) {
// no comments, this is to not change XMLReader so we can update it if needed
if (e.getMessage().startsWith("Invalid encoding")) {
throw new NotUTF8ConflictException();
} else {
throw new InternalServerErrorException(e.getMessage());
}
}
// encoding not ok, throw exception
throw new NotUTF8ConflictException();
}
public static boolean weaklyEquals(String xml1, String xml2) {
// clean xml1 string
xml1 = xml1.trim().replaceAll("\n", "").replaceAll("\t", "")
.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\f", "");
// clean xml2 string
xml2 = xml2.trim().replaceAll("\n", "").replaceAll("\t", "")
.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\f", "");
return xml1.compareTo(xml2) == 0;
}
}