package jp.aonir.fuzzyxml.internal;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FuzzyXMLUtil {
/** XML */
private static Pattern encoding = Pattern.compile("<\\?xml\\s+[^\\?>]*?encoding\\s*=\\s*\"(.*?)\"[^\\?>]*?\\?>");
private static Pattern script = Pattern.compile("(<script.*?>)(.*?)(</script>)", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static Pattern woTag = Pattern.compile("<(/*)(wo|webobject)(s*[^>]*)>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static Pattern whiteSpace = Pattern.compile("([\\ \\t\\r\\n])");
/**
* <script ... > </script>
*
* @param source XML
* @return
*/
public static String escapeScript(String source) {
StringBuffer sb = new StringBuffer();
int lastIndex = 0;
Matcher matcher = script.matcher(source);
while (matcher.find()) {
if (matcher.start() != 0) {
sb.append(source.substring(lastIndex, matcher.start()));
}
sb.append(matcher.group(1));
String group2 = matcher.group(2);
//sb.append("<!--");
group2 = woTag.matcher(group2).replaceAll("[WOOPEN]$1$2$3[WOCLOSE]");
group2 = group2.replaceAll("<", " ");
group2 = group2.replaceAll(">", " ");
group2 = group2.replaceAll("\\[WOOPEN\\]", "<");
group2 = group2.replaceAll("\\[WOCLOSE\\]", ">");
sb.append(group2);
/*
for (int i = 0; i < group2.length(); i++) {
sb.append(" ");
}
*/
//sb.append("-->");
sb.append(matcher.group(3));
lastIndex = matcher.end();
}
if (lastIndex < source.length()) {
sb.append(source.substring(lastIndex));
}
return sb.toString();
}
/**
* @param source
* @return
*/
public static String escapeString(String source) {
StringBuffer sb = new StringBuffer();
int flag = 0;
boolean tag = false;
boolean escape = false;
for (int i = 0; i < source.length(); i++) {
char c = source.charAt(i);
if (tag) {
// MS: I took out escaping .. This is potentially a really sketchy thing to do, but it
// was breaking attributes like numberformat = "\$#,##0.00"
// Q: Added back in but handle escaping differently now
if ((flag == 1 || flag == 2) && c == '\\') {
escape = true;
continue;
}
else if (flag == 0 && c == '"') {
flag = 1;
}
else if (flag == 1 && c == '"') {
if (!escape) {
flag = 0;
} else {
sb.append('\\');
}
escape = false;
}
else if (flag == 0 && c == '\'') {
flag = 2;
}
else if (flag == 2 && c == '\'') {
if (!escape) {
flag = 0;
} else {
sb.append('\\');
}
escape = false;
}
else if ((flag == 1 || flag == 2)) {
sb.append(' ');
if (escape) {
sb.append(' ');
escape = false;
}
continue;
}
else if (flag == 0 && c == '>') {
tag = false;
}
}
else if (c == '<') {
tag = true;
}
sb.append(c);
}
return sb.toString();
}
/**
* HTML/JSP/XML
* <ul>
* <li><!-- --></li>
* </ul>
*
* @param source XML
* @param contentsOnly true
* false
* @return
*/
public static String comment2space(String source, boolean contentsOnly) {
int index = 0;
int last = 0;
StringBuffer sb = new StringBuffer();
while ((index = source.indexOf("<!--", last)) != -1) {
int end = source.indexOf("-->", index);
if (end != -1) {
sb.append(source.substring(last, index));
int length = end - index + 3;
if (contentsOnly) {
sb.append("<!--");
length = length - 7;
}
int i = 0;
Matcher woMatcher = woTag.matcher(source.substring(index + 4, end));
while (woMatcher.find()) {
int woTagStart = woMatcher.start();
int woTagEnd = woMatcher.end();
for (; woTagStart > i; woTagStart --) {
sb.append(" ");
}
sb.append(woMatcher.group());
i = woTagEnd;
}
for (; i < length; i++) {
sb.append(" ");
}
if (contentsOnly) {
sb.append("-->");
}
}
else {
break;
}
last = end + 3;
}
if (last != source.length()) {
sb.append(source.substring(last));
}
return sb.toString();
}
public static String cdata2space(String source, boolean contentsOnly) {
int index = 0;
int last = 0;
StringBuffer sb = new StringBuffer();
while ((index = source.indexOf("<![CDATA[", last)) != -1) {
int end = source.indexOf("]]>", index);
if (end != -1) {
sb.append(source.substring(last, index));
int length = end - index + 3;
if (contentsOnly) {
sb.append("<![CDATA[");
length = length - 12;
}
for (int i = 0; i < length; i++) {
sb.append(" ");
}
if (contentsOnly) {
sb.append("]]>");
}
}
else {
break;
}
last = end + 3;
}
if (last != source.length()) {
sb.append(source.substring(last));
}
return sb.toString();
}
public static String doctype2space(String source, boolean contentsOnly) {
int index = 0;
int last = 0;
StringBuffer sb = new StringBuffer();
while ((index = source.indexOf("<!DOCTYPE", last)) != -1) {
sb.append(source.substring(last, index));
if (contentsOnly) {
sb.append("<!DOCTYPE");
}
else {
sb.append(" ");
}
boolean flag = false;
for (index = index + 9; index < source.length(); index++) {
char c = source.charAt(index);
if (c == '[') {
flag = true;
}
if (flag == true && c == ']') {
flag = false;
}
if (flag == false && c == '>') {
if (contentsOnly) {
sb.append('>');
}
else {
sb.append(' ');
}
break;
}
sb.append(" ");
}
last = index + 1;
}
if (last != source.length()) {
sb.append(source.substring(last));
}
return sb.toString();
}
public static String processing2space(String source, boolean contentsOnly) {
int index = 0;
int last = 0;
StringBuffer sb = new StringBuffer();
while ((index = source.indexOf("<?", last)) != -1) {
int end = source.indexOf("?>", index);
if (end != -1) {
sb.append(source.substring(last, index));
int length = end - index + 2;
if (contentsOnly) {
sb.append("<?");
length = length - 4;
}
for (int i = 0; i < length; i++) {
sb.append(" ");
}
if (contentsOnly) {
sb.append("?>");
}
}
else {
break;
}
last = end + 2;
}
if (last != source.length()) {
sb.append(source.substring(last));
}
return sb.toString();
}
public static String scriptlet2space(String source, boolean contentsOnly) {
int index = 0;
int last = 0;
StringBuffer sb = new StringBuffer();
while ((index = source.indexOf("<%", last)) != -1) {
int end = source.indexOf("%>", index);
if (end != -1) {
sb.append(source.substring(last, index));
int length = end - index + 2;
if (contentsOnly) {
sb.append("<%");
length = length - 4;
}
for (int i = 0; i < length; i++) {
sb.append(" ");
}
if (contentsOnly) {
sb.append("%>");
}
}
else {
break;
}
last = end + 2;
}
if (last != source.length()) {
sb.append(source.substring(last));
}
return sb.toString();
}
public static byte[] readStream(InputStream in) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
int len = 0;
byte[] buf = new byte[1024 * 8];
while ((len = in.read(buf)) != -1) {
out.write(buf, 0, len);
}
out.close();
}
finally {
in.close();
}
byte[] result = out.toByteArray();
return result;
}
public static String escape(String value, boolean isHTML) {
Entities entities = null;
if (isHTML) {
entities = Entities.HTML40;
}
else {
entities = Entities.XML;
}
StringBuffer buf = new StringBuffer(value.length() * 2);
int i;
for (i = 0; i < value.length(); ++i) {
char ch = value.charAt(i);
String entityName = entities.entityName(ch);
if (entityName == null) {
buf.append(ch);
}
else {
buf.append('&');
buf.append(entityName);
buf.append(';');
}
}
return buf.toString();
}
public static String decode(String value, boolean isHTML) {
Entities entities = null;
if (isHTML) {
entities = Entities.HTML40;
}
else {
entities = Entities.XML;
}
StringBuffer buf = new StringBuffer(value.length());
int i;
for (i = 0; i < value.length(); ++i) {
char ch = value.charAt(i);
if (ch == '&') {
int semi = value.indexOf(';', i + 1);
if (semi == -1) {
buf.append(ch);
continue;
}
String entityName = value.substring(i + 1, semi);
int entityValue;
if (entityName.length() == 0) {
entityValue = -1;
}
else if (entityName.charAt(0) == '#') {
if (entityName.length() == 1) {
entityValue = -1;
}
else {
char charAt1 = entityName.charAt(1);
try {
if (charAt1 == 'x' || charAt1 == 'X') {
entityValue = Integer.valueOf(entityName.substring(2), 16).intValue();
}
else {
entityValue = Integer.parseInt(entityName.substring(1));
}
}
catch (NumberFormatException ex) {
entityValue = -1;
}
}
}
else {
entityValue = entities.entityValue(entityName);
}
if (entityValue == -1) {
buf.append('&');
buf.append(entityName);
buf.append(';');
}
else {
buf.append((char) (entityValue));
}
i = semi;
}
else {
buf.append(ch);
}
}
return buf.toString();
}
public static String escapeCDATA(String value) {
value = value.replaceAll(">", ">");
return value;
}
public static String getEncoding(byte[] bytes) {
String str = new String(bytes);
Matcher matcher = encoding.matcher(str);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
public static int getSpaceIndex(String value) {
Matcher matcher = whiteSpace.matcher(value);
if (matcher.find()) {
return matcher.start(1);
}
return -1;
}
public static boolean isWhitespace(char c) {
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
return true;
}
return false;
}
public static boolean isAllUppercase(String str) {
for (int i = str.length() - 1; i >= 0; i --) {
if (!Character.isUpperCase(str.charAt(i))) {
return false;
}
}
return true;
}
public static boolean isAllWhitescape(String str) {
for (int i = 0; i < str.length(); i++) {
if (!Character.isWhitespace(str.charAt(i))) {
return false;
}
}
return true;
}
public static String blockIndent(RenderContext renderContext, String text) {
StringBuffer indent = new StringBuffer();
String value = text;
if (value.trim().length() == 0)
return value;
renderContext.appendIndent(indent);
Pattern pattern = Pattern.compile("^[^\\s]", Pattern.MULTILINE);
while (value.length() > 0 && !pattern.matcher(value).find()) {
value = value.replaceAll("^[ \t]", "").replaceAll("\n[ \t]", "\n");
}
value = value.replaceAll("(\n)(.+)", "$1" + indent + "$2");
return value;
}
}