package nars.io;
import java.lang.reflect.Field;
import java.nio.CharBuffer;
import java.text.DecimalFormat;
import java.text.Format;
import java.util.HashMap;
import java.util.Map;
/**
* Utilities for process Text & String input/output, ex: encoding/escaping and decoding/unescaping Terms
*/
public class Texts {
//TODO find more appropriate symbol mapping
//TODO escape any mapped characters if they appear in input during encoding
//http://www.ssec.wisc.edu/~tomw/java/unicode.html
public final static Map<Character,Character> escapeMap = new HashMap(256);
public final static Map<Character,Character> escapeMapReverse = new HashMap(256);
static {
char[][] escapings = new char[][] {
{':', '\u25B8'},
{' ', '\u2581'},
{'%', '\u25B9'},
{'#', '\u25BA'},
{'&', '\u25BB'},
{'?', '\u25FF'},
{'/', '\u279A'},
{'=', '\u25BD'},
{';', '\u25BE'},
{'-', '\u25BF'},
{'.', '\u00B8'},
{'<', '\u25B4'},
{'>', '\u25B5'},
{'[', '\u25B6'},
{']', '\u25B7'},
{'$', '\u25B3'}
};
for (final char[] pair : escapings) {
Character existing = escapeMap.put(pair[0], pair[1]);
if (existing!=null) {
System.err.println("escapeMap has duplicate key: " + pair[0] + " can not apply to both " + existing + " and " + pair[1] );
System.exit(1);
}
}
//generate reverse mapping
for (Map.Entry<Character, Character> e : escapeMap.entrySet())
escapeMapReverse.put(e.getValue(), e.getKey());
}
public static final Field sbval;
public static final Field val;
//Add reflection for String value access
static {
Field sv = null, sbv = null;
try {
sv = String.class.getDeclaredField("value");
//o = String.class.getDeclaredField("offset");
sbv = StringBuilder.class.getSuperclass().getDeclaredField("value");
sv.setAccessible(true);
sbv.setAccessible(true);
//o.setAccessible(true);
} catch (Exception ex) {
ex.printStackTrace();
System.exit(1);
}
val = sv;
sbval = sbv;
}
protected static StringBuilder escape(CharSequence s, boolean unescape, boolean useQuotes) {
StringBuilder b = new StringBuilder(s.length());
final Map<Character,Character> map = unescape ? escapeMapReverse : escapeMap;
boolean inQuotes = !useQuotes;
char lastChar = 0;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == Symbols.QUOTE) {
b.append(Symbols.QUOTE);
if (useQuotes) {
if (lastChar != '\\')
inQuotes = !inQuotes;
}
continue;
}
if (!inQuotes) {
b.append(c);
continue;
}
Character d = map.get(c);
if (d == null)
d = c;
b.append(d);
if (unescape)
lastChar = d;
else
lastChar = c;
}
return b;
}
/** returns an escaped representation for input. ranges that begin and end with Symbols.QUOTE are escaped, otherwise the string is not modified.
*/
public static StringBuilder escape(CharSequence s) {
return escape(s, false, true);
}
/** returns an unescaped represntation of input */
public static StringBuilder unescape(CharSequence s) {
return escape(s, true, true);
}
//
// public static String enterm(String s) {
// return s.replaceAll(":", "\u25B8")
// .replaceAll(" ", "\u2581")
//
// .replaceAll(">", "\u25B5") //TODO find a different unicode char
// .replaceAll("[", "\u25B6") //TODO find a different unicode char
// .replaceAll("]", "\u25B7") //TODO find a different unicode char
// .replaceAll("$", "\u25B8") //TODO find a different unicode char
// .replaceAll("%", "\u25B9") //TODO find a different unicode char
// .replaceAll("#", "\u25BA") //TODO find a different unicode char
// .replaceAll("&", "\u25BB") //TODO find a different unicode char
// .replaceAll("\\?", "\u25FF") //TODO find a different unicode char
// .replaceAll("/", "\u279A") //TODO find a different unicode char
// .replaceAll("=", "\u25BD") //TODO find a different unicode char
// .replaceAll(";", "\u25BE") //TODO find a different unicode char
// .replaceAll("-", "\u25BF")
// .replaceAll("\\.", "\u00B8") //TODO find a different unicode char
// ;
//
// }
//
/** escapeLiteral does not involve quotes. this can be used to escape characters directly.*/
public static StringBuilder escapeLiteral(CharSequence s) {
return escape(s, false, false);
}
/** unescapeLiteral does not involve quotes. this can be used to unescape characters directly.*/
public static StringBuilder unescapeLiteral(CharSequence s) {
return escape(s, true, false);
}
/**
* Warning: don't modify the return char[] because it will beinconsistent with s.hashCode()
* @param String to invade
* @return the private char[] field in String class
*/
public static char[] getCharArray(String s) {
try {
return (char[]) val.get(s);
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
public static char[] getCharArray(StringBuilder s) {
try {
return (char[]) sbval.get(s);
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
/*
public static void main(String[] args) {
String s = "Immutable";
String t = "Notreally";
mutate(s, t);
StdOut.println(t);
// strings are interned so this doesn't even print "Immutable" (!)
StdOut.println("Immutable");
}
*/
/**
* @author http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Java
*/
public static int levenshteinDistance(final CharSequence a, final CharSequence b) {
int len0 = a.length() + 1;
int len1 = b.length() + 1;
int[] cost = new int[len0];
int[] newcost = new int[len0];
for (int i = 0; i < len0; i++) {
cost[i] = i;
}
for (int j = 1; j < len1; j++) {
newcost[0] = j;
final char bj = b.charAt(j - 1);
for (int i = 1; i < len0; i++) {
int match = (a.charAt(i - 1) == bj) ? 0 : 1;
int cost_replace = cost[i - 1] + match;
int cost_insert = cost[i] + 1;
int cost_delete = newcost[i - 1] + 1;
int c = cost_insert;
if (cost_delete < c) c = cost_delete;
if (cost_replace < c) c = cost_replace;
newcost[i] = c;
}
int[] swap = cost;
cost = newcost;
newcost = swap;
}
return cost[len0 - 1];
}
/** Change the first min(|s|, |t|) characters of s to t
TODO must reset the hashcode field
TODO this is untested and probably not yet functional
*/
public static void overwrite(String s, String t) {
try {
char[] value = (char[]) val.get(s);
for (int i = 0; i < Math.min(s.length(), t.length()); i++) {
value[i] = t.charAt(i);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
/** Half-way between a String and a Rope; concatenates a list of strings into an immutable CharSequence which is either:
* If a component is null, it is ignored.
* if total non-null components is 0, returns null
* if total non-null components is 1, returns that component.
* if the combined length <= maxLen, creates a StringBuilder appending them all.
* if the combined length > maxLen, creates a Rope appending them all.
*
* TODO do not allow a StringBuilder to appear in output, instead wrap in CharArrayRope
*/
public static CharSequence yarn(final int maxLen, final CharSequence... components) {
int totalLen = 0;
int total = 0;
CharSequence lastNonNull = null;
for (final CharSequence s : components) {
if (s != null) {
totalLen += s.length();
total++;
lastNonNull = s;
}
}
if (total == 0) {
return null;
}
if (total == 1) {
return lastNonNull.toString();
}
StringBuilder sb = new StringBuilder(totalLen);
for (final CharSequence s : components) {
if (s != null) {
sb.append(s);
}
}
return Texts.sequence(sb);
}
public static boolean containsChar(final CharSequence n, final char c) {
if (n instanceof String)
return ((String)n).indexOf(c)!=-1;
final int l = n.length();
for (int i = 0; i < l; i++)
if (n.charAt(i) == c)
return true;
return false;
}
/**
* wraps a StringBuilder in CharArrayRope for use as a general purpose immutable CharSequence.
* StringBuilder lacks hashCode and other support that CharArrayRope provides.
* CharArrayRope can use the StringBuilder's underlying char[] directly without copy.
*/
public static CharSequence sequence(StringBuilder b) {
return b; //new CharArrayRope(b);
}
final static Format fourDecimal = new DecimalFormat("0.0000");
public static final String n4(final float x) { return fourDecimal.format(x); }
final static Format twoDecimal = new DecimalFormat("0.00");
public static final String n2Slow(final float x) { return twoDecimal.format(x); }
public static long thousandths(final float d) {
return (long) ((d * 1000f + 0.5f));
}
public static long hundredths(final float d) {
return (long) ((d * 100f + 0.5f));
}
public static final CharSequence n2(final float x) {
if ((x < 0) || (x > 1.0f))
throw new RuntimeException("Invalid value for Texts.n2");
int hundredths = (int)hundredths(x);
switch (hundredths) {
//some common values
case 100: return "1.00";
case 99: return "0.99";
case 90: return "0.90";
case 0: return "0.00";
}
if (hundredths > 9) {
int tens = hundredths/10;
return new String(new char[] {
'0', '.', (char)('0' + tens), (char)('0' + hundredths%10)
});
}
else {
return new String(new char[] {
'0', '.', '0', (char)('0' + hundredths)
});
}
}
final static Format oneDecimal = new DecimalFormat("0.0");
public static final String n1(final float x) { return oneDecimal.format(x); }
public static int compareTo(final CharSequence s, final CharSequence t) {
if ((s instanceof String) && (t instanceof String)) {
return ((String)s).compareTo((String)t);
}
else if ((s instanceof CharBuffer) && (t instanceof CharBuffer)) {
return ((CharBuffer)s).compareTo((CharBuffer)t);
}
int i = 0;
final int sl = s.length();
final int tl = t.length();
while (i < sl && i < tl) {
char a = s.charAt(i);
char b = t.charAt(i);
int diff = a - b;
if (diff != 0)
return diff;
i++;
}
return sl - tl;
}
public static CharSequence n2(final double p) {
return n2((float)p);
}
// /** fast append to CharBuffer */
// public final static CharBuffer append(final CharBuffer c, final CharSequence s) {
// if (s instanceof CharBuffer) {
//
// c.append((CharBuffer)s);
// return c;
// }
// else if (s instanceof String) {
// //c.put(getCharArray((String)s), 0, s.length());
// return c.append(s);
// }
// else {
// return c.append(s);
// }
// }
// public final static CharBuffer append(final CharBuffer c, final CharBuffer s) {
// return c.put(s);
// }
// public final static CharBuffer append(final CharBuffer c, final String s) {
// return c.put(getCharArray(s));
// }
// public final static CharBuffer append(final CharBuffer b, final char c) {
// return b.put(c);
// }
}