/*************************************************************************** * Copyright 2010 Global Biodiversity Information Facility Secretariat * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. ***************************************************************************/ package org.gbif.io; import org.gbif.utils.file.FileUtils; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import org.apache.commons.lang3.text.StrTokenizer; import org.junit.Test; public class StrTokenizerPerformance { private long test(StrTokenizer tokenizer, File source) throws IOException { FileInputStream fis = new FileInputStream(source); InputStreamReader reader = new InputStreamReader(fis, "utf8"); BufferedReader br = new BufferedReader(reader); // keep track of time while iterating long start = System.currentTimeMillis(); String row = br.readLine(); while (row != null) { tokenizer.reset(row); String[] columns = tokenizer.getTokenArray(); row = br.readLine(); } long dur = System.currentTimeMillis() - start; br.close(); return dur; } @Test public void testCharVsStringPerformance() throws IOException { File source = FileUtils.getClasspathFile("irmng.tail"); // test CHAR StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterChar('\t'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); long time = test(tokenizer, source); System.out.println(time + " milliseconds for CHAR based tokenizer."); // test STRING tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setEmptyTokenAsNull(true); time = test(tokenizer, source); System.out.println(time + " milliseconds for STRING based tokenizer."); } }