package es.alvsanand.webpage.common;
import java.util.HashMap;
import java.util.Map;
import com.google.appengine.api.datastore.Text;
public class XMLUtils {
private final static Map<String, String> AUTO_CLOSED_HTML_TAGS;
static {
AUTO_CLOSED_HTML_TAGS = new HashMap<String, String>();
AUTO_CLOSED_HTML_TAGS.put("<br([^>]*)>", "<br/>");
AUTO_CLOSED_HTML_TAGS.put("<hr([^>]*)>", "<hr/>");
}
private final static Map<String, String> HTML_XML_ENTITIES;
static {
HTML_XML_ENTITIES = new HashMap<String, String>();
HTML_XML_ENTITIES.put("& ", "& ");
HTML_XML_ENTITIES.put(""", """);
HTML_XML_ENTITIES.put("&", "&");
HTML_XML_ENTITIES.put("<", "<");
HTML_XML_ENTITIES.put(">", ">");
HTML_XML_ENTITIES.put(" ", " ");
HTML_XML_ENTITIES.put("¡", "¡");
HTML_XML_ENTITIES.put("¢", "¢");
HTML_XML_ENTITIES.put("£", "£");
HTML_XML_ENTITIES.put("¤", "¤");
HTML_XML_ENTITIES.put("¥", "¥");
HTML_XML_ENTITIES.put("¦", "¦");
HTML_XML_ENTITIES.put("§", "§");
HTML_XML_ENTITIES.put("¨", "¨");
HTML_XML_ENTITIES.put("©", "©");
HTML_XML_ENTITIES.put("ª", "ª");
HTML_XML_ENTITIES.put("«", "«");
HTML_XML_ENTITIES.put("¬", "¬");
HTML_XML_ENTITIES.put("", "");
HTML_XML_ENTITIES.put("®", "®");
HTML_XML_ENTITIES.put("¯", "¯");
HTML_XML_ENTITIES.put("°", "°");
HTML_XML_ENTITIES.put("±", "±");
HTML_XML_ENTITIES.put("²", "²");
HTML_XML_ENTITIES.put("³", "³");
HTML_XML_ENTITIES.put("´", "´");
HTML_XML_ENTITIES.put("µ", "µ");
HTML_XML_ENTITIES.put("¶", "¶");
HTML_XML_ENTITIES.put("·", "·");
HTML_XML_ENTITIES.put("¸", "¸");
HTML_XML_ENTITIES.put("¹", "¹");
HTML_XML_ENTITIES.put("º", "º");
HTML_XML_ENTITIES.put("»", "»");
HTML_XML_ENTITIES.put("¼", "¼");
HTML_XML_ENTITIES.put("½", "½");
HTML_XML_ENTITIES.put("¾", "¾");
HTML_XML_ENTITIES.put("¿", "¿");
HTML_XML_ENTITIES.put("À", "À");
HTML_XML_ENTITIES.put("Á", "Á");
HTML_XML_ENTITIES.put("Â", "Â");
HTML_XML_ENTITIES.put("Ã", "Ã");
HTML_XML_ENTITIES.put("Ä", "Ä");
HTML_XML_ENTITIES.put("Å", "Å");
HTML_XML_ENTITIES.put("Æ", "Æ");
HTML_XML_ENTITIES.put("Ç", "Ç");
HTML_XML_ENTITIES.put("È", "È");
HTML_XML_ENTITIES.put("É", "É");
HTML_XML_ENTITIES.put("Ê", "Ê");
HTML_XML_ENTITIES.put("Ë", "Ë");
HTML_XML_ENTITIES.put("Ì", "Ì");
HTML_XML_ENTITIES.put("Í", "Í");
HTML_XML_ENTITIES.put("Î", "Î");
HTML_XML_ENTITIES.put("Ï", "Ï");
HTML_XML_ENTITIES.put("Ð", "Ð");
HTML_XML_ENTITIES.put("Ñ", "Ñ");
HTML_XML_ENTITIES.put("Ò", "Ò");
HTML_XML_ENTITIES.put("Ó", "Ó");
HTML_XML_ENTITIES.put("Ô", "Ô");
HTML_XML_ENTITIES.put("Õ", "Õ");
HTML_XML_ENTITIES.put("Ö", "Ö");
HTML_XML_ENTITIES.put("×", "×");
HTML_XML_ENTITIES.put("Ø", "Ø");
HTML_XML_ENTITIES.put("Ù", "Ù");
HTML_XML_ENTITIES.put("Ú", "Ú");
HTML_XML_ENTITIES.put("Û", "Û");
HTML_XML_ENTITIES.put("Ü", "Ü");
HTML_XML_ENTITIES.put("Ý", "Ý");
HTML_XML_ENTITIES.put("Þ", "Þ");
HTML_XML_ENTITIES.put("ß", "ß");
HTML_XML_ENTITIES.put("à", "à");
HTML_XML_ENTITIES.put("á", "á");
HTML_XML_ENTITIES.put("â", "â");
HTML_XML_ENTITIES.put("ã", "ã");
HTML_XML_ENTITIES.put("ä", "ä");
HTML_XML_ENTITIES.put("å", "å");
HTML_XML_ENTITIES.put("æ", "æ");
HTML_XML_ENTITIES.put("ç", "ç");
HTML_XML_ENTITIES.put("è", "è");
HTML_XML_ENTITIES.put("é", "é");
HTML_XML_ENTITIES.put("ê", "ê");
HTML_XML_ENTITIES.put("ë", "ë");
HTML_XML_ENTITIES.put("ì", "ì");
HTML_XML_ENTITIES.put("í", "í");
HTML_XML_ENTITIES.put("î", "î");
HTML_XML_ENTITIES.put("ï", "ï");
HTML_XML_ENTITIES.put("ð", "ð");
HTML_XML_ENTITIES.put("ñ", "ñ");
HTML_XML_ENTITIES.put("ò", "ò");
HTML_XML_ENTITIES.put("ó", "ó");
HTML_XML_ENTITIES.put("ô", "ô");
HTML_XML_ENTITIES.put("õ", "õ");
HTML_XML_ENTITIES.put("ö", "ö");
HTML_XML_ENTITIES.put("÷", "÷");
HTML_XML_ENTITIES.put("ø", "ø");
HTML_XML_ENTITIES.put("ù", "ù");
HTML_XML_ENTITIES.put("ú", "ú");
HTML_XML_ENTITIES.put("û", "û");
HTML_XML_ENTITIES.put("ü", "ü");
HTML_XML_ENTITIES.put("ý", "ý");
HTML_XML_ENTITIES.put("þ", "þ");
HTML_XML_ENTITIES.put("ÿ", "ÿ");
HTML_XML_ENTITIES.put("€", "€");
}
public static String getFullArticleData(Text articleData){
if(articleData==null){
return null;
}
String dataValue = articleData.getValue();
if(dataValue==null){
return null;
}
return repareText(dataValue.replaceFirst(Globals.ARTICLE_DATA_DELIMITER_REGEXP, ""));
}
public static String getResumeArticleData(Text articleData){
if(articleData==null){
return null;
}
String dataValue = articleData.getValue();
if(dataValue==null){
return null;
}
int lasResumeLastCharacterPosition = dataValue.indexOf(Globals.ARTICLE_DATA_DELIMITER);
if(lasResumeLastCharacterPosition>-1){
return repareText(dataValue.substring(0, lasResumeLastCharacterPosition));
}
else{
return repareText(dataValue);
}
}
public static String repareText(String text){
return convertHTMLToXMLEntities(repareAutoClosedHTMLTags(text));
}
private static String convertHTMLToXMLEntities(String text){
StringBuilder stringBuilderText = new StringBuilder(text);
for(String htmlEntity: HTML_XML_ENTITIES.keySet()){
replaceString(stringBuilderText, htmlEntity.toLowerCase(), HTML_XML_ENTITIES.get(htmlEntity));
replaceString(stringBuilderText, htmlEntity.toUpperCase(), HTML_XML_ENTITIES.get(htmlEntity));
}
return stringBuilderText.toString();
}
private static String repareAutoClosedHTMLTags(final String text){
String textAux = new String(text);
for(String tag: AUTO_CLOSED_HTML_TAGS.keySet()){
textAux = textAux.replaceAll(tag, AUTO_CLOSED_HTML_TAGS.get(tag));
}
return textAux;
}
private static StringBuilder replaceString(StringBuilder text, String search, String replace) {
int fromIndex = 0;
int start = text.indexOf(search, fromIndex);
if (start == -1) {
return text;
}
if (replace.length() > 0) {
int end = 0;
int endAdjust = (search.length() - replace.length());
do {
end = (start + replace.length()) + endAdjust;
text.replace(start, end, replace);
fromIndex = end;
} while ((start = text.indexOf(search, fromIndex)) != -1);
} else {
do {
text.delete(start, search.length());
fromIndex = start + replace.length();
} while ((start = text.indexOf(search, fromIndex)) != -1);
}
return text;
}
}