/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.albite.lang;
import java.util.Vector;
/**
* Basic text processing tools
*
* @author albus
*/
public final class TextTools {
private TextTools() {}
//#if !(TinyMode || TinyModeExport || LightMode || LightModeExport)
/**
* Prepares the word for lookup in a dictionary<p />
* First, strips text from punctuation from both sides.
* Then, if the word starts with a digit, strip non-digits from the right,
* as they most probably are showing the unit type of a physical quantity
*
* @param buffer input character buffer
* @param pos start of text
* @param len length of text
* @return a new String, stripped from punctuation from both sides
*/
public static String prepareForDict(
final char[] buffer, final int pos, final int len) {
int l = pos;
int r = pos + len - 1;
while (
l <= r
&& !AlbiteCharacter.isLetterOrDigit(buffer[l])) {
l++;
}
while (r >= l
&& !AlbiteCharacter.isLetterOrDigit(buffer[r])) {
r--;
}
/*
* If the stripped word starts with a digit or minus / hyphen,
* try to remove non-digits from the right
*/
if (
/*
* 98.6F
*/
(l < r && Character.isDigit(buffer[l]))
/*
* -273.15F
* −273.15F
* –273.15F
*/
|| (l + 1 < r && (Character.isDigit(buffer[l + 1]))
&& (buffer[l] == '-' || buffer[l] == '−'
|| buffer[l] == '–')
)
) {
while (r >= l
&& (buffer[r] != '.')
&& (buffer[r] != ',')
&& !Character.isDigit(buffer[r])) {
r--;
}
/*
* It's easiest, if we create a dedicated buffer, where we could
* swap characters, if needed, i.e. some chars may need to be
* changed in order to be parsable by Double.parseDouble()
*/
char[] b2 = new char[r - l + 1];
for (int i = 0; i < b2.length; i++) {
switch (buffer[l + i]) {
case ',':
b2[i] = '.';
break;
case '–':
case '−':
b2[i] = '-';
break;
default:
b2[i] = buffer[l + i];
}
}
return new String(b2);
}
return new String(buffer, l, r - l + 1);
}
public static int compareCharArrays(
final char[] c1, final int c1Offset, final int c1Len,
final char[] c2, final int c2Offset, final int c2Len) {
/* we need the smallest range */
int search_range = Math.min(c1Len, c2Len);
for (int i = 0; i < search_range; i++) {
char c1x = c1[i+c1Offset];
char c2x = c2[i+c2Offset];
if (c1x == c2x) {
/* the two words still match */
continue;
}
if (c1x < c2x) {
/* c1 is before */
return -1;
}
/* c1 is after */
return 1;
}
/*
* Scanned all common chars
*/
if (c1Len == c2Len) {
/* the same */
return 0;
}
if (c1Len < c2Len) {
/* c1 is before */
return -1;
}
/* c1 is after */
return 1;
}
public static boolean equalsCharArrays(
final char[] c1, final int c1Offset, final int c1Len,
final char[] c2, final int c2Offset, final int c2Len) {
for (int i = 0; i < c1Len; i++) {
if (c1[i + c1Offset] != c2[i + c2Offset]) {
/* the two words still match */
return false;
}
}
return true;
}
public static int binarySearch(final char[][] haystack, final char[] key) {
int left = 0;
int right = haystack.length;
int middle;
int compare = 0;
while (right > left) {
middle = left + ((right - left) / 2);
compare =
compareCharArrays(
key, 0, key.length,
haystack[middle], 0, haystack[middle].length);
if (compare == 0) {
return middle;
}
if (compare < 0) {
right = middle;
} else {
left = middle + 1;
}
}
/*
* Decrease the index by one. Thus, one can make the difference
* whether the exact word has been found when the returned index
* should be zero.
*/
return -left -1;
}
//endif
// public static void toLowerCase(
// final char[] ch, final int off, final int len) {
//
// for (int i = 0; i < len; i++) {
// ch[off + i] = Character.toLowerCase(ch[off + i]);
// }
// }
//#endif
public static Vector split(final String string, final char[] separators) {
int len = string.length();
int lastpos = 0;
int pos = 0;
final Vector res = new Vector();
while ((pos = indexOf(string, separators, lastpos)) != -1 && pos < len) {
res.addElement(string.substring(lastpos, pos));
lastpos = pos + 1;
}
res.addElement(string.substring(lastpos));
return res;
}
public static int indexOf(
final String string, final char[] needles, final int start) {
char ch;
for (int i = start; i < string.length(); i++) {
ch = string.charAt(i);
for (int j = 0; j < needles.length; j++) {
if (needles[j] == ch) {
return i;
}
}
}
return -1;
}
}