package es.alvsanand.webpage.common; import java.util.HashMap; import java.util.Map; import com.google.appengine.api.datastore.Text; public class XMLUtils { private final static Map<String, String> AUTO_CLOSED_HTML_TAGS; static { AUTO_CLOSED_HTML_TAGS = new HashMap<String, String>(); AUTO_CLOSED_HTML_TAGS.put("<br([^>]*)>", "<br/>"); AUTO_CLOSED_HTML_TAGS.put("<hr([^>]*)>", "<hr/>"); } private final static Map<String, String> HTML_XML_ENTITIES; static { HTML_XML_ENTITIES = new HashMap<String, String>(); HTML_XML_ENTITIES.put("& ", "& "); HTML_XML_ENTITIES.put(""", """); HTML_XML_ENTITIES.put("&", "&"); HTML_XML_ENTITIES.put("<", "<"); HTML_XML_ENTITIES.put(">", ">"); HTML_XML_ENTITIES.put(" ", " "); HTML_XML_ENTITIES.put("¡", "¡"); HTML_XML_ENTITIES.put("¢", "¢"); HTML_XML_ENTITIES.put("£", "£"); HTML_XML_ENTITIES.put("¤", "¤"); HTML_XML_ENTITIES.put("¥", "¥"); HTML_XML_ENTITIES.put("¦", "¦"); HTML_XML_ENTITIES.put("§", "§"); HTML_XML_ENTITIES.put("¨", "¨"); HTML_XML_ENTITIES.put("©", "©"); HTML_XML_ENTITIES.put("ª", "ª"); HTML_XML_ENTITIES.put("«", "«"); HTML_XML_ENTITIES.put("¬", "¬"); HTML_XML_ENTITIES.put("­", "­"); HTML_XML_ENTITIES.put("®", "®"); HTML_XML_ENTITIES.put("¯", "¯"); HTML_XML_ENTITIES.put("°", "°"); HTML_XML_ENTITIES.put("±", "±"); HTML_XML_ENTITIES.put("²", "²"); HTML_XML_ENTITIES.put("³", "³"); HTML_XML_ENTITIES.put("´", "´"); HTML_XML_ENTITIES.put("µ", "µ"); HTML_XML_ENTITIES.put("¶", "¶"); HTML_XML_ENTITIES.put("·", "·"); HTML_XML_ENTITIES.put("¸", "¸"); HTML_XML_ENTITIES.put("¹", "¹"); HTML_XML_ENTITIES.put("º", "º"); HTML_XML_ENTITIES.put("»", "»"); HTML_XML_ENTITIES.put("¼", "¼"); HTML_XML_ENTITIES.put("½", "½"); HTML_XML_ENTITIES.put("¾", "¾"); HTML_XML_ENTITIES.put("¿", "¿"); HTML_XML_ENTITIES.put("À", "À"); HTML_XML_ENTITIES.put("Á", "Á"); HTML_XML_ENTITIES.put("Â", "Â"); HTML_XML_ENTITIES.put("Ã", "Ã"); HTML_XML_ENTITIES.put("Ä", "Ä"); HTML_XML_ENTITIES.put("Å", "Å"); HTML_XML_ENTITIES.put("Æ", "Æ"); HTML_XML_ENTITIES.put("Ç", "Ç"); HTML_XML_ENTITIES.put("È", "È"); HTML_XML_ENTITIES.put("É", "É"); HTML_XML_ENTITIES.put("Ê", "Ê"); HTML_XML_ENTITIES.put("Ë", "Ë"); HTML_XML_ENTITIES.put("Ì", "Ì"); HTML_XML_ENTITIES.put("Í", "Í"); HTML_XML_ENTITIES.put("Î", "Î"); HTML_XML_ENTITIES.put("Ï", "Ï"); HTML_XML_ENTITIES.put("Ð", "Ð"); HTML_XML_ENTITIES.put("Ñ", "Ñ"); HTML_XML_ENTITIES.put("Ò", "Ò"); HTML_XML_ENTITIES.put("Ó", "Ó"); HTML_XML_ENTITIES.put("Ô", "Ô"); HTML_XML_ENTITIES.put("Õ", "Õ"); HTML_XML_ENTITIES.put("Ö", "Ö"); HTML_XML_ENTITIES.put("×", "×"); HTML_XML_ENTITIES.put("Ø", "Ø"); HTML_XML_ENTITIES.put("Ù", "Ù"); HTML_XML_ENTITIES.put("Ú", "Ú"); HTML_XML_ENTITIES.put("Û", "Û"); HTML_XML_ENTITIES.put("Ü", "Ü"); HTML_XML_ENTITIES.put("Ý", "Ý"); HTML_XML_ENTITIES.put("Þ", "Þ"); HTML_XML_ENTITIES.put("ß", "ß"); HTML_XML_ENTITIES.put("à", "à"); HTML_XML_ENTITIES.put("á", "á"); HTML_XML_ENTITIES.put("â", "â"); HTML_XML_ENTITIES.put("ã", "ã"); HTML_XML_ENTITIES.put("ä", "ä"); HTML_XML_ENTITIES.put("å", "å"); HTML_XML_ENTITIES.put("æ", "æ"); HTML_XML_ENTITIES.put("ç", "ç"); HTML_XML_ENTITIES.put("è", "è"); HTML_XML_ENTITIES.put("é", "é"); HTML_XML_ENTITIES.put("ê", "ê"); HTML_XML_ENTITIES.put("ë", "ë"); HTML_XML_ENTITIES.put("ì", "ì"); HTML_XML_ENTITIES.put("í", "í"); HTML_XML_ENTITIES.put("î", "î"); HTML_XML_ENTITIES.put("ï", "ï"); HTML_XML_ENTITIES.put("ð", "ð"); HTML_XML_ENTITIES.put("ñ", "ñ"); HTML_XML_ENTITIES.put("ò", "ò"); HTML_XML_ENTITIES.put("ó", "ó"); HTML_XML_ENTITIES.put("ô", "ô"); HTML_XML_ENTITIES.put("õ", "õ"); HTML_XML_ENTITIES.put("ö", "ö"); HTML_XML_ENTITIES.put("÷", "÷"); HTML_XML_ENTITIES.put("ø", "ø"); HTML_XML_ENTITIES.put("ù", "ù"); HTML_XML_ENTITIES.put("ú", "ú"); HTML_XML_ENTITIES.put("û", "û"); HTML_XML_ENTITIES.put("ü", "ü"); HTML_XML_ENTITIES.put("ý", "ý"); HTML_XML_ENTITIES.put("þ", "þ"); HTML_XML_ENTITIES.put("ÿ", "ÿ"); HTML_XML_ENTITIES.put("€", "€"); } public static String getFullArticleData(Text articleData){ if(articleData==null){ return null; } String dataValue = articleData.getValue(); if(dataValue==null){ return null; } return repareText(dataValue.replaceFirst(Globals.ARTICLE_DATA_DELIMITER_REGEXP, "")); } public static String getResumeArticleData(Text articleData){ if(articleData==null){ return null; } String dataValue = articleData.getValue(); if(dataValue==null){ return null; } int lasResumeLastCharacterPosition = dataValue.indexOf(Globals.ARTICLE_DATA_DELIMITER); if(lasResumeLastCharacterPosition>-1){ return repareText(dataValue.substring(0, lasResumeLastCharacterPosition)); } else{ return repareText(dataValue); } } public static String repareText(String text){ return convertHTMLToXMLEntities(repareAutoClosedHTMLTags(text)); } private static String convertHTMLToXMLEntities(String text){ StringBuilder stringBuilderText = new StringBuilder(text); for(String htmlEntity: HTML_XML_ENTITIES.keySet()){ replaceString(stringBuilderText, htmlEntity.toLowerCase(), HTML_XML_ENTITIES.get(htmlEntity)); replaceString(stringBuilderText, htmlEntity.toUpperCase(), HTML_XML_ENTITIES.get(htmlEntity)); } return stringBuilderText.toString(); } private static String repareAutoClosedHTMLTags(final String text){ String textAux = new String(text); for(String tag: AUTO_CLOSED_HTML_TAGS.keySet()){ textAux = textAux.replaceAll(tag, AUTO_CLOSED_HTML_TAGS.get(tag)); } return textAux; } private static StringBuilder replaceString(StringBuilder text, String search, String replace) { int fromIndex = 0; int start = text.indexOf(search, fromIndex); if (start == -1) { return text; } if (replace.length() > 0) { int end = 0; int endAdjust = (search.length() - replace.length()); do { end = (start + replace.length()) + endAdjust; text.replace(start, end, replace); fromIndex = end; } while ((start = text.indexOf(search, fromIndex)) != -1); } else { do { text.delete(start, search.length()); fromIndex = start + replace.length(); } while ((start = text.indexOf(search, fromIndex)) != -1); } return text; } }