package net.sf.jabref.imports; import java.util.HashMap; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sf.jabref.export.layout.LayoutFormatter; /** * Created by IntelliJ IDEA. * User: alver * Date: Mar 26, 2006 * Time: 8:05:08 PM * To change this template use File | Settings | File Templates. */ public class HTMLConverter implements LayoutFormatter { private HashMap<String, String> escapedSymbols = new HashMap<String, String>(); public HTMLConverter() { super(); escapedSymbols.put("“", "``"); escapedSymbols.put("”", "''"); escapedSymbols.put("‘", "``"); escapedSymbols.put("’", "''"); escapedSymbols.put(" ", " "); escapedSymbols.put(""", "\""); escapedSymbols.put("&", "&"); escapedSymbols.put("<", "<"); escapedSymbols.put(">", ">"); } public String format(String text) { if (text == null) return null; StringBuffer sb = new StringBuffer(); for (int i=0; i<text.length(); i++) { int c = text.charAt(i); if (c == '<') { i = readTag(text, sb, i); } else sb.append((char)c); } text = sb.toString(); Set<String> patterns = escapedSymbols.keySet(); for (String pattern: patterns) { text = text.replaceAll(pattern, escapedSymbols.get(pattern)); } Pattern escapedPattern = Pattern.compile("&#([x]*\\d+);"); Matcher m = escapedPattern.matcher(text); while (m.find()) { int num = Integer.decode("x", "#")); switch (num) { case 37: text = text.replaceAll("&#" + + ";", "%"); break; case 38: text = text.replaceAll("&#" + + ";", "&"); break; case 916: text = text.replaceAll("&#" + + ";", "$\\delta$"); break; case 956: text = text.replaceAll("&#" + + ";", "$\\mu$"); break; case 8208: text = text.replaceAll("&#" + + ";", "-"); break; case 8211: text = text.replaceAll("&#" + + ";", "--"); break; case 8212: text = text.replaceAll("&#" + + ";", "---"); break; case 8217: text = text.replaceAll("&#" + + ";", "'"); break; default: System.err.println("HTML escaped char not converted " + + ": " + Integer.toString(num)); } } return text.trim(); } private final int MAX_TAG_LENGTH = 30; /*private final int MAX_CHAR_LENGTH = 10; private int readHtmlChar(String text, StringBuffer sb, int position) { // Have just read the < character that starts the tag. int index = text.indexOf(';', position); if ((index > position) && (index-position < MAX_CHAR_LENGTH)) { //String code = text.substring(position, index); //System.out.println("Removed code: "+text.substring(position, index)); return index; // Just skip the tag. } else return position; // Don't do anything. }*/ private int readTag(String text, StringBuffer sb, int position) { // Have just read the < character that starts the tag. int index = text.indexOf('>', position); if ((index > position) && (index-position < MAX_TAG_LENGTH)) { //System.out.println("Removed tag: "+text.substring(position, index)); return index; // Just skip the tag. } else return position; // Don't do anything. } }