/*
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.services.jcr.impl.util;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by The eXo Platform SAS.
*
* @author <a href="mailto:geaz@users.sourceforge.net">Gennady Azarenkov</a>
* @version $Id: StringConverter.java 11907 2008-03-13 15:36:21Z ksm $
*/
public class StringConverter
{
private static String ILLEGAL_DNCHAR = "StringConverter: empty string for denormalization to char";
/** Pattern on an encoded character */
private static final Pattern ENCODE_PATTERN = Pattern.compile("_x\\p{XDigit}{4}_");
private static final int ENCODE_CHARS = 7;
private static final Log LOG = ExoLogger.getLogger("exo.jcr.component.core.StringConverter");
private static class DNChar
{
private char dnChar;
private int dnLength;
public DNChar(char dnChar, int dnLength)
{
this.dnChar = dnChar;
this.dnLength = dnLength;
}
public char getDnChar()
{
return dnChar;
}
public int getDnLength()
{
return dnLength;
}
}
/**
* Normalizes and prints the given string.
*/
public static String normalizeString(String s, boolean canonical)
{
StringBuilder strBuf = new StringBuilder();
int len = (s != null) ? s.length() : 0;
for (int i = 0; i < len; i++)
{
char c = s.charAt(i);
if ('_' == c)
{
if (len - i > ENCODE_CHARS)
{
String spart = s.substring(i, i + ENCODE_CHARS);
Matcher encodeMatcher = ENCODE_PATTERN.matcher(spart);
if (encodeMatcher.matches())
strBuf.append(normalizeChar(c, canonical));
else
strBuf.append(c);
}
else
strBuf.append(c);
}
else
strBuf.append(normalizeChar(c, canonical));
}
return new String(strBuf);
}
public static String denormalizeString(String s)
{
StringBuilder strBuf = new StringBuilder();
int len = (s != null) ? s.length() : 0;
int i = 0;
while (i < len)
{
try
{
DNChar dnc = denormalize(s.substring(i));
strBuf.append(dnc.getDnChar());
i += dnc.getDnLength();
}
catch (IllegalArgumentException e)
{
if (!e.getMessage().equals(ILLEGAL_DNCHAR))
{
throw e;
}
char c = s.charAt(i);
strBuf.append(c);
i++;
}
}
return new String(strBuf);
}
/**
* Normalizes and print the given character.
*/
public static String normalizeChar(char c, boolean canonical)
{
switch (c)
{
case '<' :
return "<";
case '>' :
return ">";
case '&' :
return "&";
case '"' :
return """;
case '\'' :
return "'";
case '\r' :
if (canonical)
return "_x000D_";
case '\n' :
if (canonical)
return "_x000A_";
case '\t' :
if (canonical)
return "_x0009_";
case ' ' :
if (canonical)
return "_x0020_";
case '_' :
if (canonical)
return "_x005f_";
// else, default print char
default :
return "" + c;
}
}
/**
* Denormalizes and print the given character.
*/
public static char denormalizeChar(String string)
{
return denormalize(string).getDnChar();
}
/**
* Denormalizes and print the given character.
*/
private static DNChar denormalize(String string)
{
if (string.startsWith("<"))
return new DNChar('<', 4);
else if (string.startsWith(">"))
return new DNChar('>', 4);
else if (string.startsWith("&"))
return new DNChar('&', 5);
else if (string.startsWith("""))
return new DNChar('"', 6);
else if (string.startsWith("'"))
return new DNChar('\'', 6);
else if (string.startsWith("_x000D_"))
return new DNChar('\r', 7);
else if (string.startsWith("_x000A_"))
return new DNChar('\n', 7);
/**
* Denormalize of this value cause a 4 fails in TCK. If we don'n do it, it text will be remain
* the "_x0009_" value instead of "\t" TCK tests fail because the checkImportSimpleXMLTree
* method of DocumentViewImportTest object have a small problem in this place // both
* possibilities In logic if (!propVal.equals(encodedAttributeValue) ||
* !propVal.equals(encodedAttributeValue)) { fail("Value " + encodedAttributeValue +
* " of attribute " + decodedAttributeName + " is not correctly imported."); of test the
* propVal must be equal of encodedAttributeValue the encoded version of value
*/
else if (string.startsWith("_x0009_"))
return new DNChar('\t', 7);
else if (string.startsWith("_x0020_"))
return new DNChar(' ', 7);
else if (string.startsWith("_x005f_"))
return new DNChar('_', 7);
else
throw new IllegalArgumentException(ILLEGAL_DNCHAR);
}
}