/* ****************************************************************************** * Copyright (c) 2006-2012 XMind Ltd. and others. * * This file is a part of XMind 3. XMind releases 3 and * above are dual-licensed under the Eclipse Public License (EPL), * which is available at http://www.eclipse.org/legal/epl-v10.html * and the GNU Lesser General Public License (LGPL), * which is available at http://www.gnu.org/licenses/lgpl.html * See http://www.xmind.net/license.html for details. * * Contributors: * XMind Ltd. - initial API and implementation *******************************************************************************/ package org.xmind.ui.util; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class EncodingUtils { private static final Map<String, String> ENTITIES = new HashMap<String, String>( 5); static { ENTITIES.put("lt", "<"); //$NON-NLS-1$ //$NON-NLS-2$ ENTITIES.put("gt", ">"); //$NON-NLS-1$ //$NON-NLS-2$ ENTITIES.put("amp", "&"); //$NON-NLS-1$ //$NON-NLS-2$ ENTITIES.put("apos", "'"); //$NON-NLS-1$ //$NON-NLS-2$ ENTITIES.put("quot", "\""); //$NON-NLS-1$ //$NON-NLS-2$ } private static final Pattern ESCAPER = Pattern.compile("&([^;]+);"); //$NON-NLS-1$ private EncodingUtils() { } public static String unescape(String text) { StringBuffer buffer = new StringBuffer(text.length()); Matcher matcher = ESCAPER.matcher(text); String unescaped, entity; int charCode; while (matcher.find()) { entity = matcher.group(1); if (entity.length() > 1 && entity.charAt(0) == '#') { if (entity.length() > 2 && entity.charAt(1) == 'x') { charCode = Integer.parseInt(entity.substring(2), 16); } else { charCode = Integer.parseInt(entity.substring(1), 10); } unescaped = Character.toString((char) charCode); } else { unescaped = ENTITIES.get(entity); if (unescaped == null) { unescaped = "&" + entity + ";"; //$NON-NLS-1$ //$NON-NLS-2$ } } matcher.appendReplacement(buffer, unescaped); } matcher.appendTail(buffer); return buffer.toString().replaceAll("\\s+", " "); //$NON-NLS-1$ //$NON-NLS-2$ } }