/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.metamodel.csv;
import java.io.File;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import junit.framework.TestCase;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.query.SelectItem;
import org.apache.metamodel.schema.Table;
public class CsvBigFileMemoryTest extends TestCase {
private final int hugeFileRows = 3000;
private final int hugeFileCols = 2000;
private File getHugeFile() {
final File file = new File("target/huge_csv.csv");
if (!file.exists()) {
final ExampleDataGenerator exampleDataGenerator = new ExampleDataGenerator(hugeFileRows, hugeFileCols);
exampleDataGenerator.createFile(file);
}
return file;
}
/**
* Runs a performance test based on the data created by the
* ExampleDataCreator utility.
*
* @see ExampleDataGenerator
* @throws Exception
*/
public void testHugeFile() throws Exception {
final File file = getHugeFile();
final long timeAtStart = System.currentTimeMillis();
System.out.println("time at start: " + timeAtStart);
final DataContext dc = new CsvDataContext(file, new CsvConfiguration(1, false, false));
final Table t = dc.getDefaultSchema().getTables()[0];
final long timeAfterDataContext = System.currentTimeMillis();
System.out.println("time after DataContext: " + timeAfterDataContext);
final Query q = new Query().select(t.getColumns()).from(t);
DataSet ds = dc.executeQuery(q);
long timeAfterQuery = System.currentTimeMillis();
System.out.println("time after query: " + timeAfterQuery);
final CountDownLatch countDown = new CountDownLatch(hugeFileRows);
final AtomicBoolean success = new AtomicBoolean(true);
ExecutorService executorService = Executors.newFixedThreadPool(30);
while (ds.next()) {
final Row row = ds.getRow();
executorService.submit(new Runnable() {
@Override
public void run() {
if (hugeFileCols != row.getValues().length) {
System.out.println("Weird row: " + row);
success.set(false);
}
countDown.countDown();
}
});
}
ds.close();
countDown.await();
assertTrue(success.get());
executorService.shutdown();
long timeAfterDataSet = System.currentTimeMillis();
System.out.println("time after dataSet: " + timeAfterDataSet);
long totalTime = timeAfterDataSet - timeAfterDataContext;
System.out.println("Total time to process large file: " + totalTime + " millis");
// results with old impl: [13908, 13827, 14577]. Total= 42312
// results with new impl: [9052, 9200, 8193]. Total= 26445
if (!file.delete()) {
file.deleteOnExit();
}
}
public void testApproximatedCountHugeFile() throws Exception {
DataContext dc = new CsvDataContext(getHugeFile());
Table table = dc.getDefaultSchema().getTables()[0];
Query q = dc.query().from(table).selectCount().toQuery();
SelectItem selectItem = q.getSelectClause().getItem(0);
selectItem.setFunctionApproximationAllowed(true);
DataSet ds = dc.executeQuery(q);
assertTrue(ds.next());
Object[] values = ds.getRow().getValues();
assertEquals(1, values.length);
assertEquals(3332, ((Long) ds.getRow().getValue(selectItem)).intValue());
assertEquals(3332, ((Long) values[0]).intValue());
assertFalse(ds.next());
}
}