package com.ldbc.driver.workloads.ldbc.snb.interactive.performance; import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.ldbc.driver.Operation; import com.ldbc.driver.csv.charseeker.BufferedCharSeeker; import com.ldbc.driver.csv.charseeker.CharSeeker; import com.ldbc.driver.csv.charseeker.Extractors; import com.ldbc.driver.csv.charseeker.Mark; import com.ldbc.driver.csv.charseeker.Readables; import com.ldbc.driver.csv.charseeker.ThreadAheadReadable; import com.ldbc.driver.csv.simple.SimpleCsvFileReader; import com.ldbc.driver.generator.CsvEventStreamReaderBasicCharSeeker; import com.ldbc.driver.generator.CsvEventStreamReaderBasicCharSeeker.EventDecoder; import com.ldbc.driver.generator.GeneratorFactory; import com.ldbc.driver.generator.RandomDataGeneratorFactory; import com.ldbc.driver.runtime.coordination.CompletionTimeException; import com.ldbc.driver.temporal.SystemTimeSource; import com.ldbc.driver.temporal.TemporalUtil; import com.ldbc.driver.temporal.TimeSource; import com.ldbc.driver.workloads.ldbc.snb.interactive.LdbcSnbInteractiveWorkloadConfiguration; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query10EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query11EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query12EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query13EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query14EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query1EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query2EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query3EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query4EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query5EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query6EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query7EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query8EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.Query9EventStreamReader; import com.ldbc.driver.workloads.ldbc.snb.interactive.WriteEventStreamReaderCharSeeker; import com.ldbc.driver.workloads.ldbc.snb.interactive.WriteEventStreamReaderRegex; import org.junit.Ignore; import org.junit.Test; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import static java.lang.String.format; public class InteractiveEventStreamReaderPerformanceTest { private static final TemporalUtil TEMPORAL_UTIL = new TemporalUtil(); TimeSource timeSource = new SystemTimeSource(); DecimalFormat numberFormatter = new DecimalFormat( "###,###,###,##0.00" ); //1 threads 99,010,827.00 operations in 01:04.351.000 (m:s.ms.us) = 1,538,605.88 op/ms //2 threads 198,021,654.00 operations in 01:05.156.000 (m:s.ms.us) = 3,039,192.92 op/ms //3 threads 297,032,481.00 operations in 01:13.117.000 (m:s.ms.us) = 4,062,427.08 op/ms //4 threads 396,043,308.00 operations in 01:21.316.000 (m:s.ms.us) = 4,870,422.89 op/ms //4 396,043,308.00 operations in 01:17.399.000 (m:s.ms.us) = 5,116,904.71 op/ms //5 495,054,135.00 operations in 01:27.476.000 (m:s.ms.us) = 5,659,313.81 op/ms //6 594,064,962.00 operations in 01:39.579.000 (m:s.ms.us) = 5,965,765.49 op/ms @Ignore @Test public void multiThreadedMultiPartitionParserPerformanceTest() throws FileNotFoundException, InterruptedException, CompletionTimeException { List<File> updateStreams = Lists.newArrayList( new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_0.csv" ), new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_1.csv" ), new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_2.csv" ), new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_3.csv" ), new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_4.csv" ), new File( "/Users/alexaverbuch/IdeaProjects/ldbc_snb_workload_interactive_neo4j/ldbc_driver/sample_data" + "/sf30_001-with-replica-streams/updateStream_forum_5.csv" ) ); int bufferSize = 2 * 1024 * 1024; List<UpdateStreamReadingThread> updateStreamReadingThreads = new ArrayList<>(); CountDownLatch readyLatch = new CountDownLatch( updateStreams.size() ); CountDownLatch startLatch = new CountDownLatch( 1 ); CountDownLatch stopLatch = new CountDownLatch( updateStreams.size() ); for ( File updateStream : updateStreams ) { CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new InputStreamReader( new FileInputStream( updateStream ), Charsets.UTF_8 ) ), bufferSize ); int columnDelimiter = '|'; Extractors extractors = new Extractors( ';', ',' ); UpdateStreamReadingThread updateStreamReadingThread = new UpdateStreamReadingThread( readyLatch, startLatch, stopLatch, WriteEventStreamReaderCharSeeker.create( charSeeker, extractors, columnDelimiter ) ); updateStreamReadingThread.start(); updateStreamReadingThreads.add( updateStreamReadingThread ); } readyLatch.await(); startLatch.countDown(); long startTime = timeSource.nowAsMilli(); stopLatch.await(); long finishTime = timeSource.nowAsMilli(); long operationCount = 0; for ( UpdateStreamReadingThread updateStreamReadingThread : updateStreamReadingThreads ) { operationCount += updateStreamReadingThread.count(); } double throughput = 1000 * (double) operationCount / (finishTime - startTime); System.out.println( format( "%s operations in %s = %s op/ms", numberFormatter.format( operationCount ), new TemporalUtil().milliDurationToString( finishTime - startTime ), numberFormatter.format( throughput ) ) ); } private static class UpdateStreamReadingThread extends Thread { private final CountDownLatch readyLatch; private final CountDownLatch startLatch; private final CountDownLatch stopLatch; private final Iterator<Operation> updateStreamReader; private long count = 0; private UpdateStreamReadingThread( CountDownLatch readyLatch, CountDownLatch startLatch, CountDownLatch stopLatch, Iterator<Operation> updateStreamReader ) { this.readyLatch = readyLatch; this.startLatch = startLatch; this.stopLatch = stopLatch; this.updateStreamReader = updateStreamReader; } @Override public void run() { readyLatch.countDown(); try { startLatch.await(); } catch ( InterruptedException e ) { // do nothing } while ( updateStreamReader.hasNext() ) { updateStreamReader.next(); count++; } stopLatch.countDown(); } public long count() { return count; } } @Ignore @Test public void newReadParamsParsingPerformanceTest() throws IOException { GeneratorFactory gf = new GeneratorFactory( new RandomDataGeneratorFactory( 42l ) ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/new_read_params/sf10_partitions_01/" ); File paramsFile = new File( parentStreamsDir, "snb/interactive/query_1_param.txt" ); EventDecoder<Object[]> decoder = new Query1EventStreamReader.Query1Decoder(); int bufferSize = 2 * 1024 * 1024; { // warm up file system doBufferedReaderPerformanceTest( paramsFile, bufferSize ); } long limit = 100000000; int repetitions = 4; { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { SimpleCsvFileReader readOperation1FileReader = new SimpleCsvFileReader( paramsFile, LdbcSnbInteractiveWorkloadConfiguration.PIPE_SEPARATOR_REGEX ); Iterator<String[]> csvStreamReader = gf.limit( gf.repeating( readOperation1FileReader ), limit ); lines += readingStreamPerformanceTest( csvStreamReader ); readOperation1FileReader.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; double linesPerSecond = Math.round( ((double) lines / durationAsMilli) * 1000l ); System.out.println( format( "%s took %s to read %s line: %s lines/s", SimpleCsvFileReader.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( linesPerSecond ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new FileReader( paramsFile ) ), bufferSize ); Extractors extractors = new Extractors( ';', ',' ); Mark mark = new Mark(); int columnDelimiter = '|'; // skip headers - this file has 2 columns per row charSeeker.seek( mark, new int[]{columnDelimiter} ); charSeeker.seek( mark, new int[]{columnDelimiter} ); Iterator<Object[]> operation1StreamWithoutTimes = gf.limit( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker, extractors, mark, decoder, columnDelimiter ) ), limit ); lines += readingStreamPerformanceTest( operation1StreamWithoutTimes ); charSeeker.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; double linesPerSecond = Math.round( ((double) lines / durationAsMilli) * 1000l ); System.out.println( format( "%s took %s to read %s line: %s lines/s", CsvEventStreamReaderBasicCharSeeker.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( linesPerSecond ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new FileReader( paramsFile ) ), bufferSize ); Extractors extractors = new Extractors( ';', ',' ); Mark mark = new Mark(); int columnDelimiter = '|'; // skip headers - this file has 2 columns per row charSeeker.seek( mark, new int[]{columnDelimiter} ); charSeeker.seek( mark, new int[]{columnDelimiter} ); Iterator<Object[]> query1Parameters = gf.limit( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker, extractors, mark, decoder, columnDelimiter ) ), limit ); Iterator<Operation> query1OperationsWithoutTimes = new Query1EventStreamReader( query1Parameters ); lines += readingStreamPerformanceTest( query1OperationsWithoutTimes ); charSeeker.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; double linesPerSecond = Math.round( ((double) lines / durationAsMilli) * 1000l ); System.out.println( format( "%s took %s to read %s line: %s lines/s", Query1EventStreamReader.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( linesPerSecond ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new FileReader( paramsFile ) ), bufferSize ); Extractors extractors = new Extractors( ';', ',' ); Mark mark = new Mark(); int columnDelimiter = '|'; // skip headers - this file has 2 columns per row charSeeker.seek( mark, new int[]{columnDelimiter} ); charSeeker.seek( mark, new int[]{columnDelimiter} ); Iterator<Object[]> query1Parameters = gf.limit( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker, extractors, mark, decoder, columnDelimiter ) ), limit ); Iterator<Operation> query1OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query1EventStreamReader( query1Parameters ) ); lines += readingStreamPerformanceTest( query1OperationsWithTimes ); charSeeker.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; double linesPerSecond = Math.round( ((double) lines / durationAsMilli) * 1000l ); System.out.println( format( "%s took %s to read %s line: %s lines/s", Query1EventStreamReader.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( linesPerSecond ) ) ); } } @Ignore @Test public void newParseAndMergeAllReadOperationStreamsPerformanceTest() throws IOException { GeneratorFactory gf = new GeneratorFactory( new RandomDataGeneratorFactory( 42l ) ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/new_read_params/sf10_partitions_01/" ); long limit = 100000000; int bufferSize = 2 * 1024 * 1024; int repetitions = 1; Extractors extractors = new Extractors( ';', ',' ); int columnDelimiter = '|'; { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { EventDecoder<Object[]> decoder1 = new Query1EventStreamReader.Query1Decoder(); CharSeeker charSeeker1 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_1_param.txt" ) ) ), bufferSize ); Mark mark1 = new Mark(); // skip headers charSeeker1.seek( mark1, new int[]{columnDelimiter} ); charSeeker1.seek( mark1, new int[]{columnDelimiter} ); Iterator<Operation> query1OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query1EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker1, extractors, mark1, decoder1, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder2 = new Query2EventStreamReader.Query2Decoder(); CharSeeker charSeeker2 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_2_param.txt" ) ) ), bufferSize ); Mark mark2 = new Mark(); // skip headers charSeeker2.seek( mark2, new int[]{columnDelimiter} ); charSeeker2.seek( mark2, new int[]{columnDelimiter} ); Iterator<Operation> query2OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query2EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker2, extractors, mark2, decoder2, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder3 = new Query3EventStreamReader.Query3Decoder(); CharSeeker charSeeker3 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_3_param.txt" ) ) ), bufferSize ); Mark mark3 = new Mark(); // skip headers charSeeker3.seek( mark3, new int[]{columnDelimiter} ); charSeeker3.seek( mark3, new int[]{columnDelimiter} ); charSeeker3.seek( mark3, new int[]{columnDelimiter} ); charSeeker3.seek( mark3, new int[]{columnDelimiter} ); charSeeker3.seek( mark3, new int[]{columnDelimiter} ); Iterator<Operation> query3OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query3EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker3, extractors, mark3, decoder3, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder4 = new Query4EventStreamReader.Query4Decoder(); CharSeeker charSeeker4 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_4_param.txt" ) ) ), bufferSize ); Mark mark4 = new Mark(); // skip headers charSeeker4.seek( mark4, new int[]{columnDelimiter} ); charSeeker4.seek( mark4, new int[]{columnDelimiter} ); charSeeker4.seek( mark4, new int[]{columnDelimiter} ); Iterator<Operation> query4OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query4EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker4, extractors, mark4, decoder4, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder5 = new Query5EventStreamReader.Query5Decoder(); CharSeeker charSeeker5 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_5_param.txt" ) ) ), bufferSize ); Mark mark5 = new Mark(); // skip headers charSeeker5.seek( mark5, new int[]{columnDelimiter} ); charSeeker5.seek( mark5, new int[]{columnDelimiter} ); Iterator<Operation> query5OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query5EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker5, extractors, mark5, decoder5, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder6 = new Query6EventStreamReader.Query6Decoder(); CharSeeker charSeeker6 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_6_param.txt" ) ) ), bufferSize ); Mark mark6 = new Mark(); // skip headers charSeeker6.seek( mark6, new int[]{columnDelimiter} ); charSeeker6.seek( mark6, new int[]{columnDelimiter} ); Iterator<Operation> query6OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query6EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker6, extractors, mark6, decoder6, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder7 = new Query7EventStreamReader.Query7Decoder(); CharSeeker charSeeker7 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_7_param.txt" ) ) ), bufferSize ); Mark mark7 = new Mark(); // skip headers charSeeker7.seek( mark7, new int[]{columnDelimiter} ); Iterator<Operation> query7OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query7EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker7, extractors, mark7, decoder7, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder8 = new Query8EventStreamReader.Query8Decoder(); CharSeeker charSeeker8 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_8_param.txt" ) ) ), bufferSize ); Mark mark8 = new Mark(); // skip headers charSeeker8.seek( mark8, new int[]{columnDelimiter} ); Iterator<Operation> query8OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query8EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker8, extractors, mark8, decoder8, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder9 = new Query9EventStreamReader.Query9Decoder(); CharSeeker charSeeker9 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_9_param.txt" ) ) ), bufferSize ); Mark mark9 = new Mark(); // skip headers charSeeker9.seek( mark9, new int[]{columnDelimiter} ); charSeeker9.seek( mark9, new int[]{columnDelimiter} ); Iterator<Operation> query9OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query9EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker9, extractors, mark9, decoder9, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder10 = new Query10EventStreamReader.Query10Decoder(); CharSeeker charSeeker10 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_10_param.txt" ) ) ), bufferSize ); Mark mark10 = new Mark(); // skip headers charSeeker10.seek( mark10, new int[]{columnDelimiter} ); charSeeker10.seek( mark10, new int[]{columnDelimiter} ); Iterator<Operation> query10OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query10EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker10, extractors, mark10, decoder10, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder11 = new Query11EventStreamReader.Query11Decoder(); CharSeeker charSeeker11 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_11_param.txt" ) ) ), bufferSize ); Mark mark11 = new Mark(); // skip headers charSeeker11.seek( mark11, new int[]{columnDelimiter} ); charSeeker11.seek( mark11, new int[]{columnDelimiter} ); charSeeker11.seek( mark11, new int[]{columnDelimiter} ); Iterator<Operation> query11OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query11EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker11, extractors, mark11, decoder11, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder12 = new Query12EventStreamReader.Query12Decoder(); CharSeeker charSeeker12 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_12_param.txt" ) ) ), bufferSize ); Mark mark12 = new Mark(); // skip headers charSeeker12.seek( mark12, new int[]{columnDelimiter} ); charSeeker12.seek( mark12, new int[]{columnDelimiter} ); Iterator<Operation> query12OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query12EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker12, extractors, mark12, decoder12, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder13 = new Query13EventStreamReader.Query13Decoder(); CharSeeker charSeeker13 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_13_param.txt" ) ) ), bufferSize ); Mark mark13 = new Mark(); // skip headers charSeeker13.seek( mark13, new int[]{columnDelimiter} ); charSeeker13.seek( mark13, new int[]{columnDelimiter} ); Iterator<Operation> query13OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query13EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker13, extractors, mark13, decoder13, columnDelimiter ) ) ) ); EventDecoder<Object[]> decoder14 = new Query14EventStreamReader.Query14Decoder(); CharSeeker charSeeker14 = new BufferedCharSeeker( Readables.wrap( new FileReader( new File( parentStreamsDir, "snb/interactive/query_14_param.txt" ) ) ), bufferSize ); Mark mark14 = new Mark(); // skip headers charSeeker14.seek( mark14, new int[]{columnDelimiter} ); charSeeker14.seek( mark14, new int[]{columnDelimiter} ); Iterator<Operation> query14OperationsWithTimes = gf.assignStartTimes( gf.incrementing( 0l, 1l ), new Query14EventStreamReader( gf.repeating( new CsvEventStreamReaderBasicCharSeeker<>( charSeeker14, extractors, mark14, decoder14, columnDelimiter ) ) ) ); lines += readingStreamPerformanceTest( gf.limit( gf.mergeSortOperationsByTimeStamp( query1OperationsWithTimes, query2OperationsWithTimes, query3OperationsWithTimes, query4OperationsWithTimes, query5OperationsWithTimes, query6OperationsWithTimes, query7OperationsWithTimes, query8OperationsWithTimes, query9OperationsWithTimes, query10OperationsWithTimes, query11OperationsWithTimes, query12OperationsWithTimes, query13OperationsWithTimes, query14OperationsWithTimes ), limit ) ); charSeeker1.close(); charSeeker2.close(); charSeeker3.close(); charSeeker4.close(); charSeeker5.close(); charSeeker6.close(); charSeeker7.close(); charSeeker8.close(); charSeeker9.close(); charSeeker10.close(); charSeeker11.close(); charSeeker12.close(); charSeeker13.close(); charSeeker14.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; double linesPerSecond = Math.round( ((double) lines / durationAsMilli) * 1000l ); System.out.println( format( "%s took %s to read %s line: %s lines/s", "Merged", TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( linesPerSecond ) ) ); } } @Ignore @Test public void forumCsvUpdateStreamReadingRegexParserProfileTest() throws IOException, InterruptedException { Thread.sleep( 30000 ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/" ); File forumUpdateStream = new File( parentStreamsDir, "sf10_partitions_01/updateStream_0_0_forum.csv" ); long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); SimpleCsvFileReader simpleCsvFileReader = new SimpleCsvFileReader( forumUpdateStream, SimpleCsvFileReader.DEFAULT_COLUMN_SEPARATOR_REGEX_STRING ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderRegex.create( simpleCsvFileReader ); lines += readingStreamPerformanceTest( writeEventStreamReader ); simpleCsvFileReader.close(); long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli); System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderRegex.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } @Ignore @Test public void forumCsvUpdateStreamReadingCharSeekerParserProfileTest() throws IOException, InterruptedException { Thread.sleep( 30000 ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/current/" ); File forumUpdateStream = new File( parentStreamsDir, "sf10_partitions_01/updateStream_0_0_forum.csv" ); int MB = 1024 * 1024; int bufferSize = 2 * MB; long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new InputStreamReader( new FileInputStream( forumUpdateStream ), Charsets.UTF_8 ) ), bufferSize ); int columnDelimiter = '|'; Extractors extractors = new Extractors( ';', ',' ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderCharSeeker.create( charSeeker, extractors, columnDelimiter ); lines += readingStreamPerformanceTest( writeEventStreamReader ); charSeeker.close(); long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli); System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderCharSeeker.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } @Ignore @Test public void personCsvUpdateStreamReadingRegexParserProfileTest() throws IOException, InterruptedException { Thread.sleep( 30000 ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/" ); File forumUpdateStream = new File( parentStreamsDir, "sf10_partitions_01/updateStream_0_0_person.csv" ); long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); SimpleCsvFileReader simpleCsvFileReader = new SimpleCsvFileReader( forumUpdateStream, SimpleCsvFileReader.DEFAULT_COLUMN_SEPARATOR_REGEX_STRING ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderRegex.create( simpleCsvFileReader ); lines += readingStreamPerformanceTest( writeEventStreamReader ); simpleCsvFileReader.close(); long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli); System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderRegex.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } @Ignore @Test public void personCsvUpdateStreamReadingCharSeekerParserProfileTest() throws IOException, InterruptedException { Thread.sleep( 30000 ); File parentStreamsDir = new File( "/Users/alexaverbuch/IdeaProjects/scale_factor_streams/" ); File forumUpdateStream = new File( parentStreamsDir, "sf10_partitions_01/updateStream_0_0_person.csv" ); int MB = 1024 * 1024; int bufferSize = 2 * MB; long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); CharSeeker charSeeker = new BufferedCharSeeker( Readables.wrap( new InputStreamReader( new FileInputStream( forumUpdateStream ), Charsets.UTF_8 ) ), bufferSize ); int columnDelimiter = '|'; Extractors extractors = new Extractors( ';', ',' ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderCharSeeker.create( charSeeker, extractors, columnDelimiter ); lines += readingStreamPerformanceTest( writeEventStreamReader ); charSeeker.close(); long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli); System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderCharSeeker.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter.format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } @Ignore @Test public void forumCsvUpdateStreamReadingPerformanceTest() throws IOException { // File parentStreamsDir = new File("/Users/alexaverbuch/IdeaProjects/scale_factor_streams/"); // File forumUpdateStream = new File(parentStreamsDir, "sf10_partitions_01/updateStream_0_0_forum.csv"); File parentStreamsDir = new File( "/Users/alexaverbuch/hadoopTempDir/output/social_network/" ); File forumUpdateStream = new File( parentStreamsDir, "snb/interactive/updateStream_0_0_forum.csv" ); { // warm up file system SimpleCsvFileReader simpleCsvFileReader = new SimpleCsvFileReader( forumUpdateStream, SimpleCsvFileReader.DEFAULT_COLUMN_SEPARATOR_REGEX_STRING ); readingStreamPerformanceTest( simpleCsvFileReader ); simpleCsvFileReader.close(); } int repetitions = 2; { int bufferSize = 2 * 1024 * 1024; long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { lines += doBufferedReaderPerformanceTest( forumUpdateStream, bufferSize ); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", BufferedReader.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } { int bufferSize = 2 * 1024 * 1024; long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { lines += doCharSeekerPerformanceTest( forumUpdateStream, bufferSize ); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", CharSeeker.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } { int bufferSize = 2 * 1024 * 1024; long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { lines += doThreadedCharSeekerPerformanceTest( forumUpdateStream, bufferSize ); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", CharSeeker.class.getSimpleName() + "-" + ThreadAheadReadable.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { SimpleCsvFileReader simpleCsvFileReader = new SimpleCsvFileReader( forumUpdateStream, SimpleCsvFileReader.DEFAULT_COLUMN_SEPARATOR_REGEX_STRING ); lines += readingStreamPerformanceTest( simpleCsvFileReader ); simpleCsvFileReader.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", SimpleCsvFileReader.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { SimpleCsvFileReader simpleCsvFileReader = new SimpleCsvFileReader( forumUpdateStream, SimpleCsvFileReader.DEFAULT_COLUMN_SEPARATOR_REGEX_STRING ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderRegex.create( simpleCsvFileReader ); lines += readingStreamPerformanceTest( writeEventStreamReader ); simpleCsvFileReader.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderRegex.class.getSimpleName(), TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } int MB = 1024 * 1024; List<Integer> bufferSizes = Lists.newArrayList( 1 * MB, 2 * MB, 4 * MB, 8 * MB, 16 * MB ); for ( int bufferSize : bufferSizes ) { { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { CharSeeker charSeeker = new BufferedCharSeeker( Readables .wrap( new InputStreamReader( new FileInputStream( forumUpdateStream ), Charsets.UTF_8 ) ), bufferSize ); int columnDelimiter = '|'; Extractors extractors = new Extractors( ';', ',' ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderCharSeeker.create( charSeeker, extractors, columnDelimiter ); lines += readingStreamPerformanceTest( writeEventStreamReader ); charSeeker.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderCharSeeker.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } { long lines = 0; long startTimeAsMilli = timeSource.nowAsMilli(); for ( int i = 0; i < repetitions; i++ ) { Reader reader = new InputStreamReader( new FileInputStream( forumUpdateStream ), Charsets.UTF_8 ); CharSeeker charSeeker = new BufferedCharSeeker( ThreadAheadReadable.threadAhead( Readables.wrap( reader ), bufferSize ), bufferSize ); int columnDelimiter = '|'; Extractors extractors = new Extractors( ';', ',' ); Iterator<Operation> writeEventStreamReader = WriteEventStreamReaderCharSeeker.create( charSeeker, extractors, columnDelimiter ); lines += readingStreamPerformanceTest( writeEventStreamReader ); charSeeker.close(); } long endTimeAsMilli = timeSource.nowAsMilli(); long durationAsMilli = (endTimeAsMilli - startTimeAsMilli) / repetitions; lines = lines / repetitions; System.out.println( format( "%s took %s to read %s line: %s lines/s", WriteEventStreamReaderCharSeeker.class.getSimpleName() + "-" + ThreadAheadReadable.class.getSimpleName() + "-" + bufferSize, TEMPORAL_UTIL.milliDurationToString( durationAsMilli ), numberFormatter.format( lines ), numberFormatter .format( (double) lines / TimeUnit.MILLISECONDS.toSeconds( durationAsMilli ) ) ) ); } } } public long readingStreamPerformanceTest( Iterator parser ) throws FileNotFoundException { long lines = 0; while ( parser.hasNext() ) { parser.next(); lines++; } return lines; } public long doCharSeekerPerformanceTest( File forumUpdateStream, int bufferSize ) throws IOException { CharSeeker seeker = new BufferedCharSeeker( Readables.wrap( new FileReader( forumUpdateStream ) ), bufferSize ); long lines = 0; Mark mark = new Mark(); int[] delimiters = new int[]{'|'}; Extractors extractors = new Extractors( ';', ',' ); while ( seeker.seek( mark, delimiters ) ) { seeker.extract( mark, extractors.string() ).value(); if ( mark.isEndOfLine() ) { lines++; } } seeker.close(); return lines; } public long doThreadedCharSeekerPerformanceTest( File forumUpdateStream, int bufferSize ) throws IOException { CharSeeker seeker = new BufferedCharSeeker( ThreadAheadReadable.threadAhead( Readables.wrap( new FileReader( forumUpdateStream ) ), bufferSize ), bufferSize ); long lines = 0; Mark mark = new Mark(); int[] delimiters = new int[]{'|'}; Extractors extractors = new Extractors( ';', ',' ); while ( seeker.seek( mark, delimiters ) ) { seeker.extract( mark, extractors.string() ).value(); if ( mark.isEndOfLine() ) { lines++; } } seeker.close(); return lines; } public long doBufferedReaderPerformanceTest( File forumUpdateStream, int bufferSize ) throws IOException { BufferedReader bufferedReader = new BufferedReader( new FileReader( forumUpdateStream ), bufferSize ); long lines = 0; while ( null != bufferedReader.readLine() ) { lines++; } bufferedReader.close(); return lines; } }