package com.plugtree.solrmeter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import com.plugtree.solrmeter.model.FileUtils;
import com.plugtree.solrmeter.model.SolrMeterConfiguration;
public class FileUtilsTest extends BaseTestCase {
private String[] utfCodes = new String[] {"C3A2",
"C3AA",
"C3AE",
"C3B4",
"C3BB",
"C3A1",
"C3A9",
"C3AD",
"C3B3",
"C3BA",
"C3A0",
"C3A8",
"C3AC",
"C3B2",
"C3B9",
"C3A4",
"C3AB",
"C3AF",
"C3B6",
"C3BC",
"C3A3",
"C3B5",
"C3B1",
"C3A7",
"C387",
"C3B0",
"C390",
"C398",
"C3B8",
"C3A5",
"C385",
"C3A6",
"C386",
"C39F",
"C2BF",
"C2A1"};
public void testLoadStringsFromFileInternationalCharactersUtf8() throws UnsupportedEncodingException {
List<String> queries = FileUtils.loadStringsFromFile("internationalQueries.txt");
compareAll(queries);
}
public void testLoadStringsFromFileInternationalCharactersIso88591() throws UnsupportedEncodingException {
SolrMeterConfiguration.setProperty("files.charset", "ISO-8859-1");
List<String> queries = FileUtils.loadStringsFromFile("internationalQueriesISO_8859_1.txt");
compareAll(queries);
}
/*
* Created after http://code.google.com/p/solrmeter/issues/detail?id=103
*/
public void testWindowsInitialBOMIssue() throws UnsupportedEncodingException {
SolrMeterConfiguration.setProperty("files.charset", "UTF-8");
List<String> queries = FileUtils.loadStringsFromFile("windowsBOMProblem.txt");
assertEquals("The file only has one query", 1, queries.size());
assertEquals("numNights:[7 TO 14] AND numAdults:2 AND tourDate:[2012-08-09T01:00:00.000Z TO 2012-08-31T01:00:00.000Z]", queries.get(0));
queries = FileUtils.loadStringsFromFile("regularUTF-8.txt");
assertEquals("The file only has one query", 1, queries.size());
assertEquals("numNights:[7 TO 14] AND numAdults:2 AND tourDate:[2012-08-09T01:00:00.000Z TO 2012-08-31T01:00:00.000Z]", queries.get(0));
}
private void compareAll(List<String> queries)
throws UnsupportedEncodingException {
int i = 0;
for (String s:queries) {
assertEquals("Failed for character " + utfCodes[i],utfCodes[i], byteArrayToHexString(s.getBytes("UTF-8")));
i++;
}
}
private String byteArrayToHexString(byte in[]) {
byte ch = 0x00;
int i = 0;
if (in == null || in.length <= 0)
return null;
String pseudo[] = {"0", "1", "2",
"3", "4", "5", "6", "7", "8",
"9", "A", "B", "C", "D", "E",
"F"};
StringBuffer out = new StringBuffer(in.length * 2);
while (i < in.length) {
ch = (byte) (in[i] & 0xF0); // Strip off high nibble
ch = (byte) (ch >>> 4); // shift the bits down
ch = (byte) (ch & 0x0F); // must do this is high order bit is on!
out.append(pseudo[ (int) ch]); // convert the nibble to a String Character
ch = (byte) (in[i] & 0x0F); // Strip off low nibble
out.append(pseudo[ (int) ch]); // convert the nibble to a String Character
i++;
}
String rslt = new String(out);
return rslt;
}
}