package com.ldbc.driver;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.ldbc.driver.control.DriverConfiguration;
import com.ldbc.driver.control.LoggingService;
import com.ldbc.driver.control.LoggingServiceFactory;
import com.ldbc.driver.generator.GeneratorFactory;
import com.ldbc.driver.util.Tuple;
import com.ldbc.driver.util.Tuple3;
import com.ldbc.driver.workloads.ClassNameWorkloadFactory;
import com.ldbc.driver.workloads.WorkloadFactory;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import static java.lang.String.format;
public class WorkloadStreams
{
private WorkloadStreamDefinition asynchronousStream = null;
private List<WorkloadStreamDefinition> blockingStreams = new ArrayList<>();
public static WorkloadStreams timeOffsetAndCompressWorkloadStreams(
WorkloadStreams originalWorkloadStreams,
long newStartTimeAsMilli,
double compressionRatio,
GeneratorFactory gf ) throws WorkloadException
{
long minScheduledStartTimeAsMilli = Long.MAX_VALUE;
/*
* Find earliest scheduled start time from across all streams
*/
PeekingIterator<Operation> peekingAsyncDependencyOperationStream =
Iterators.peekingIterator( originalWorkloadStreams.asynchronousStream().dependencyOperations() );
try
{
long firstAsMilli = peekingAsyncDependencyOperationStream.peek().scheduledStartTimeAsMilli();
if ( firstAsMilli < minScheduledStartTimeAsMilli )
{
minScheduledStartTimeAsMilli = firstAsMilli;
}
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
PeekingIterator<Operation> peekingAsyncNonDependencyOperationStream =
Iterators.peekingIterator( originalWorkloadStreams.asynchronousStream().nonDependencyOperations() );
try
{
long firstAsMilli = peekingAsyncNonDependencyOperationStream.peek().scheduledStartTimeAsMilli();
if ( firstAsMilli < minScheduledStartTimeAsMilli )
{
minScheduledStartTimeAsMilli = firstAsMilli;
}
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
List<Long> peekingBlockingDependencyOperationStreamsAheadOfMinByMillis = new ArrayList<>();
List<PeekingIterator<Operation>> peekingBlockingDependencyOperationStreams = new ArrayList<>();
List<Long> peekingBlockingNonDependencyOperationStreamsAheadOfMinByMillis = new ArrayList<>();
List<PeekingIterator<Operation>> peekingBlockingNonDependencyOperationStreams = new ArrayList<>();
List<WorkloadStreamDefinition> blockingStreams = originalWorkloadStreams.blockingStreamDefinitions();
for ( int i = 0; i < blockingStreams.size(); i++ )
{
PeekingIterator<Operation> peekingBlockingDependencyOperationStream =
Iterators.peekingIterator( blockingStreams.get( i ).dependencyOperations() );
try
{
long firstAsMilli = peekingBlockingDependencyOperationStream.peek().scheduledStartTimeAsMilli();
if ( firstAsMilli < minScheduledStartTimeAsMilli )
{
minScheduledStartTimeAsMilli = firstAsMilli;
}
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
peekingBlockingDependencyOperationStreamsAheadOfMinByMillis.add( 0l );
peekingBlockingDependencyOperationStreams.add( peekingBlockingDependencyOperationStream );
PeekingIterator<Operation> peekingBlockingNonDependencyOperationStream =
Iterators.peekingIterator( blockingStreams.get( i ).nonDependencyOperations() );
try
{
long firstAsMilli = peekingBlockingNonDependencyOperationStream.peek().scheduledStartTimeAsMilli();
if ( firstAsMilli < minScheduledStartTimeAsMilli )
{
minScheduledStartTimeAsMilli = firstAsMilli;
}
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
peekingBlockingNonDependencyOperationStreamsAheadOfMinByMillis.add( 0l );
peekingBlockingNonDependencyOperationStreams.add( peekingBlockingNonDependencyOperationStream );
}
if ( Long.MAX_VALUE == minScheduledStartTimeAsMilli )
{
minScheduledStartTimeAsMilli = newStartTimeAsMilli;
}
/*
* Find how far ahead of earliest scheduled start time each stream is when it starts
*/
long peekingAsyncDependencyOperationStreamAheadOfMinByAsMilli = 0;
try
{
long firstAsMilli = peekingAsyncDependencyOperationStream.peek().scheduledStartTimeAsMilli();
peekingAsyncDependencyOperationStreamAheadOfMinByAsMilli =
Math.round( (firstAsMilli - minScheduledStartTimeAsMilli) * compressionRatio );
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
long peekingAsyncNonDependencyOperationStreamAheadOfMinByAsMilli = 0l;
try
{
long firstAsMilli = peekingAsyncNonDependencyOperationStream.peek().scheduledStartTimeAsMilli();
peekingAsyncNonDependencyOperationStreamAheadOfMinByAsMilli =
Math.round( (firstAsMilli - minScheduledStartTimeAsMilli) * compressionRatio );
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
for ( int i = 0; i < peekingBlockingDependencyOperationStreams.size(); i++ )
{
try
{
long firstAsMilli =
peekingBlockingDependencyOperationStreams.get( i ).peek().scheduledStartTimeAsMilli();
peekingBlockingDependencyOperationStreamsAheadOfMinByMillis.set(
i,
Math.round( (firstAsMilli - minScheduledStartTimeAsMilli) * compressionRatio )
);
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
}
for ( int i = 0; i < peekingBlockingNonDependencyOperationStreams.size(); i++ )
{
try
{
long firstAsMilli =
peekingBlockingNonDependencyOperationStreams.get( i ).peek().scheduledStartTimeAsMilli();
peekingBlockingNonDependencyOperationStreamsAheadOfMinByMillis.set(
i,
Math.round( (firstAsMilli - minScheduledStartTimeAsMilli) * compressionRatio )
);
}
catch ( NoSuchElementException e )
{
// do nothing, just means stream was empty
}
}
/*
* copy unbounded streams to new workload streams instance, applying offset and time compression
*/
WorkloadStreams timeOffsetAndCompressedWorkloadStreams = new WorkloadStreams();
timeOffsetAndCompressedWorkloadStreams.setAsynchronousStream(
originalWorkloadStreams.asynchronousStream().dependentOperationTypes(),
originalWorkloadStreams.asynchronousStream().dependencyOperationTypes(),
gf.timeOffsetAndCompress(
peekingAsyncDependencyOperationStream,
newStartTimeAsMilli + peekingAsyncDependencyOperationStreamAheadOfMinByAsMilli,
compressionRatio
),
gf.timeOffsetAndCompress(
peekingAsyncNonDependencyOperationStream,
newStartTimeAsMilli + peekingAsyncNonDependencyOperationStreamAheadOfMinByAsMilli,
compressionRatio
),
originalWorkloadStreams.asynchronousStream().childOperationGenerator()
);
for ( int i = 0; i < blockingStreams.size(); i++ )
{
timeOffsetAndCompressedWorkloadStreams.addBlockingStream(
blockingStreams.get( i ).dependentOperationTypes(),
blockingStreams.get( i ).dependencyOperationTypes(),
gf.timeOffsetAndCompress(
peekingBlockingDependencyOperationStreams.get( i ),
newStartTimeAsMilli + peekingBlockingDependencyOperationStreamsAheadOfMinByMillis.get( i ),
compressionRatio
),
gf.timeOffsetAndCompress(
peekingBlockingNonDependencyOperationStreams.get( i ),
newStartTimeAsMilli +
peekingBlockingNonDependencyOperationStreamsAheadOfMinByMillis.get( i ),
compressionRatio
),
blockingStreams.get( i ).childOperationGenerator()
);
}
return timeOffsetAndCompressedWorkloadStreams;
}
// returns (workload_streams, workload, minimum_timestamp)
public static Tuple3<WorkloadStreams,Workload,Long> createNewWorkloadWithOffsetAndLimitedWorkloadStreams(
DriverConfiguration configuration,
GeneratorFactory gf,
boolean returnStreamsWithDbConnector,
long offset,
long limit,
LoggingServiceFactory loggingServiceFactory ) throws WorkloadException, IOException
{
ClassNameWorkloadFactory workloadFactory = new ClassNameWorkloadFactory( configuration.workloadClassName() );
return createNewWorkloadWithOffsetAndLimitedWorkloadStreams(
workloadFactory,
configuration,
gf,
returnStreamsWithDbConnector,
offset,
limit,
loggingServiceFactory
);
}
// returns (workload_streams, workload, minimum_timestamp)
public static Tuple3<WorkloadStreams,Workload,Long> createNewWorkloadWithOffsetAndLimitedWorkloadStreams(
WorkloadFactory workloadFactory,
DriverConfiguration configuration,
GeneratorFactory gf,
boolean returnStreamsWithDbConnector,
long offset,
long limit,
LoggingServiceFactory loggingServiceFactory ) throws WorkloadException, IOException
{
// ================================
// ====== Calculate Limits ========
// ================================
// get workload
Workload workload = workloadFactory.createWorkload();
workload.init( configuration );
// retrieve unbounded streams
boolean hasDbConnected = false;
WorkloadStreams unlimitedWorkloadStreams = workload.streams( gf, hasDbConnected );
List<Iterator<Operation>> streams = new ArrayList<>();
List<ChildOperationGenerator> childOperationGenerators = new ArrayList<>();
streams.add( unlimitedWorkloadStreams.asynchronousStream().dependencyOperations() );
childOperationGenerators.add( unlimitedWorkloadStreams.asynchronousStream().childOperationGenerator() );
streams.add( unlimitedWorkloadStreams.asynchronousStream().nonDependencyOperations() );
childOperationGenerators.add( unlimitedWorkloadStreams.asynchronousStream().childOperationGenerator() );
for ( WorkloadStreamDefinition stream : unlimitedWorkloadStreams.blockingStreamDefinitions() )
{
streams.add( stream.dependencyOperations() );
childOperationGenerators.add( stream.childOperationGenerator() );
streams.add( stream.nonDependencyOperations() );
childOperationGenerators.add( stream.childOperationGenerator() );
}
// stream through streams once, to calculate how many operations are needed from each,
// to get operation_count in total
Tuple3<long[],long[],Long> limitsAndMinimumsForStream =
WorkloadStreams.fromAmongAllRetrieveTopCountFromOffset(
streams,
offset,
limit,
childOperationGenerators,
loggingServiceFactory
);
long[] startForStream = limitsAndMinimumsForStream._1();
long[] limitForStream = limitsAndMinimumsForStream._2();
long minimumTimeStamp = limitsAndMinimumsForStream._3();
workload.close();
// ================================
// ====== Create Limited Streams ==
// ================================
WorkloadStreams workloadStreams = new WorkloadStreams();
// reinitialize workload, so it can be streamed through from the beginning
workload = workloadFactory.createWorkload();
workload.init( configuration );
// retrieve unbounded streams
unlimitedWorkloadStreams = workload.streams( gf, returnStreamsWithDbConnector );
List<WorkloadStreamDefinition> unlimitedBlockingStreams = unlimitedWorkloadStreams.blockingStreamDefinitions();
// advance to offsets
gf.consume( unlimitedWorkloadStreams.asynchronousStream().dependencyOperations(), startForStream[0] );
gf.consume( unlimitedWorkloadStreams.asynchronousStream().nonDependencyOperations(), startForStream[1] );
for ( int i = 0; i < unlimitedBlockingStreams.size(); i++ )
{
gf.consume( unlimitedBlockingStreams.get( i ).dependencyOperations(), startForStream[i * 2 + 2] );
gf.consume( unlimitedBlockingStreams.get( i ).nonDependencyOperations(), startForStream[i * 2 + 3] );
}
// copy unbounded streams to new workload streams instance, from offsets, applying limits
workloadStreams.setAsynchronousStream(
unlimitedWorkloadStreams.asynchronousStream().dependentOperationTypes(),
unlimitedWorkloadStreams.asynchronousStream().dependencyOperationTypes(),
gf.limit( unlimitedWorkloadStreams.asynchronousStream().dependencyOperations(), limitForStream[0] ),
gf.limit( unlimitedWorkloadStreams.asynchronousStream().nonDependencyOperations(), limitForStream[1] ),
unlimitedWorkloadStreams.asynchronousStream().childOperationGenerator()
);
for ( int i = 0; i < unlimitedBlockingStreams.size(); i++ )
{
workloadStreams.addBlockingStream(
unlimitedBlockingStreams.get( i ).dependentOperationTypes(),
unlimitedBlockingStreams.get( i ).dependencyOperationTypes(),
gf.limit( unlimitedBlockingStreams.get( i ).dependencyOperations(), limitForStream[i * 2 + 2] ),
gf.limit( unlimitedBlockingStreams.get( i ).nonDependencyOperations(), limitForStream[i * 2 + 3] ),
unlimitedBlockingStreams.get( i ).childOperationGenerator()
);
}
return Tuple.tuple3(
workloadStreams,
workload,
minimumTimeStamp
);
}
// returns (start_per_stream, end_per_stream, minimum_timestamp)
public static Tuple3<long[],long[],Long> fromAmongAllRetrieveTopCountFromOffset(
List<Iterator<Operation>> streams,
long offset,
long limit,
List<ChildOperationGenerator> childOperationGenerators,
LoggingServiceFactory loggingServiceFactory ) throws WorkloadException
{
LoggingService loggingService =
loggingServiceFactory.loggingServiceFor( WorkloadStreams.class.getSimpleName() );
final DecimalFormat numberFormat = new DecimalFormat( "###,###,###,###,###" );
final Object result = null;
Operation operation;
ChildOperationGenerator childOperationGenerator;
// last operation retrieved (which has not yet been counted) from each stream
Operation[] streamHeads = new Operation[streams.size()];
for ( int i = 0; i < streams.size(); i++ )
{
streamHeads[i] = null;
}
// ================================================
// ===== advance to start point of each stream =====
// ================================================
// count of operations to retrieve from that particular stream
long[] kForStreamOffset = new long[streams.size()];
for ( int i = 0; i < streams.size(); i++ )
{
kForStreamOffset[i] = 0;
}
long kSoFarOffset = 0;
while ( kSoFarOffset < offset )
{
long minAsMilli = Long.MAX_VALUE;
int indexOfMin = -1;
for ( int i = 0; i < streams.size(); i++ )
{
if ( null != streamHeads[i] || streams.get( i ).hasNext() )
{
if ( null == streamHeads[i] )
{
streamHeads[i] = streams.get( i ).next();
}
long streamHeadTimeStampAsMilli = streamHeads[i].timeStamp();
if ( -1 == streamHeadTimeStampAsMilli )
{
throw new WorkloadException(
format( "Operation must have time stamp\n%s", streamHeads[i] ) );
}
if ( -1 == streamHeads[i].dependencyTimeStamp() )
{
throw new WorkloadException(
format( "Operation must have dependency time stamp\n%s", streamHeads[i] ) );
}
if ( null != streamHeads[i] && streamHeadTimeStampAsMilli < minAsMilli )
{
minAsMilli = streamHeadTimeStampAsMilli;
indexOfMin = i;
}
}
}
if ( -1 == indexOfMin )
{
// iterators are empty, nothing left to retrieve
break;
}
kForStreamOffset[indexOfMin] = kForStreamOffset[indexOfMin] + 1;
kSoFarOffset = kSoFarOffset + 1;
operation = streamHeads[indexOfMin];
childOperationGenerator = childOperationGenerators.get( indexOfMin );
if ( null != childOperationGenerator )
{
double state = childOperationGenerator.initialState();
while ( null != (operation = childOperationGenerator
.nextOperation( state, operation, result, operation.scheduledStartTimeAsMilli(), 0l )) )
{
kSoFarOffset = kSoFarOffset + 1;
state = childOperationGenerator.updateState( state, operation.type() );
}
}
streamHeads[indexOfMin] = null;
if ( kSoFarOffset % 1000000 == 0 )
{
loggingService.info(
format(
"Scanned %s of %s - OFFSET\r",
numberFormat.format( kSoFarOffset ),
numberFormat.format( offset )
)
);
}
}
loggingService.info(
format(
"Scanned %s of %s - OFFSET",
numberFormat.format( kSoFarOffset ),
numberFormat.format( offset )
)
);
// ================================================
// ===== calculate end points for each stream =====
// ================================================
long minimumTimeStamp = Long.MAX_VALUE;
// count of operations to retrieve from that particular stream
long[] kForStreamRun = new long[streams.size()];
for ( int i = 0; i < streams.size(); i++ )
{
kForStreamRun[i] = 0;
}
long kSoFarRun = 0;
while ( kSoFarRun < limit )
{
long minAsMilli = Long.MAX_VALUE;
int indexOfMin = -1;
for ( int i = 0; i < streams.size(); i++ )
{
if ( null != streamHeads[i] || streams.get( i ).hasNext() )
{
if ( null == streamHeads[i] )
{
streamHeads[i] = streams.get( i ).next();
}
long streamHeadTimeStampAsMilli = streamHeads[i].timeStamp();
long streamHeadDependencyTimeStampAsMilli = streamHeads[i].dependencyTimeStamp();
if ( -1 == streamHeadTimeStampAsMilli )
{
throw new WorkloadException(
format( "Operation must have time stamp\n%s", streamHeads[i] ) );
}
if ( -1 == streamHeadDependencyTimeStampAsMilli )
{
throw new WorkloadException(
format( "Operation must have dependency time stamp\n%s", streamHeads[i] ) );
}
if ( streamHeadTimeStampAsMilli < minimumTimeStamp )
{
minimumTimeStamp = streamHeadTimeStampAsMilli;
}
if ( null != streamHeads[i] && streamHeadTimeStampAsMilli < minAsMilli )
{
minAsMilli = streamHeadTimeStampAsMilli;
indexOfMin = i;
}
}
}
if ( -1 == indexOfMin )
{
// iterators are empty, nothing left to retrieve
break;
}
kForStreamRun[indexOfMin] = kForStreamRun[indexOfMin] + 1;
kSoFarRun = kSoFarRun + 1;
operation = streamHeads[indexOfMin];
childOperationGenerator = childOperationGenerators.get( indexOfMin );
if ( null != childOperationGenerator )
{
double state = childOperationGenerator.initialState();
while ( null != (operation = childOperationGenerator
.nextOperation( state, operation, result, operation.scheduledStartTimeAsMilli(), 0l )) )
{
kSoFarRun = kSoFarRun + 1;
state = childOperationGenerator.updateState( state, operation.type() );
}
}
streamHeads[indexOfMin] = null;
if ( kSoFarRun % 1000000 == 0 )
{
loggingService.info(
format( "Scanned %s of %s - RUN\r",
numberFormat.format( kSoFarRun ),
numberFormat.format( limit )
)
);
}
}
loggingService.info(
format(
"Scanned %s of %s - RUN",
numberFormat.format( kSoFarRun ),
numberFormat.format( limit )
)
);
return Tuple.tuple3(
kForStreamOffset,
kForStreamRun,
minimumTimeStamp
);
}
public WorkloadStreamDefinition asynchronousStream()
{
if ( null != asynchronousStream )
{
return asynchronousStream;
}
else
{
return new WorkloadStreamDefinition(
new HashSet<Class<? extends Operation>>(),
new HashSet<Class<? extends Operation>>(),
Collections.<Operation>emptyIterator(),
Collections.<Operation>emptyIterator(),
null
);
}
}
public void setAsynchronousStream( Set<Class<? extends Operation>> dependentOperationTypes,
Set<Class<? extends Operation>> dependencyOperationTypes,
Iterator<Operation> dependencyOperations,
Iterator<Operation> nonDependencyOperations,
ChildOperationGenerator childOperationGenerator )
{
this.asynchronousStream = new WorkloadStreamDefinition(
dependentOperationTypes,
dependencyOperationTypes,
dependencyOperations,
nonDependencyOperations,
childOperationGenerator
);
}
public List<WorkloadStreamDefinition> blockingStreamDefinitions()
{
return blockingStreams;
}
public void addBlockingStream( Set<Class<? extends Operation>> dependentOperationTypes,
Set<Class<? extends Operation>> dependencyOperationTypes,
Iterator<Operation> dependencyOperations,
Iterator<Operation> nonDependencyOperations,
ChildOperationGenerator childOperationGenerator )
{
WorkloadStreamDefinition blockingStream = new WorkloadStreamDefinition(
dependentOperationTypes,
dependencyOperationTypes,
dependencyOperations,
nonDependencyOperations,
childOperationGenerator
);
this.blockingStreams.add( blockingStream );
}
public static Iterator<Operation> mergeSortedByStartTimeExcludingChildOperationGenerators( GeneratorFactory gf,
WorkloadStreams workloadStreams )
{
List<Iterator<Operation>> allStreams = new ArrayList<>();
for ( WorkloadStreamDefinition streamDefinition : workloadStreams.blockingStreamDefinitions() )
{
allStreams.add( streamDefinition.dependencyOperations() );
allStreams.add( streamDefinition.nonDependencyOperations() );
}
allStreams.add( workloadStreams.asynchronousStream().dependencyOperations() );
allStreams.add( workloadStreams.asynchronousStream().nonDependencyOperations() );
return gf.mergeSortOperationsByTimeStamp( allStreams.toArray( new Iterator[allStreams.size()] ) );
}
public static class WorkloadStreamDefinition
{
private final Set<Class<? extends Operation>> dependentOperationTypes;
private final Set<Class<? extends Operation>> dependencyOperationTypes;
private final Iterator<Operation> dependencyOperations;
private final Iterator<Operation> nonDependencyOperations;
private final ChildOperationGenerator childOperationGenerator;
public WorkloadStreamDefinition( Set<Class<? extends Operation>> dependentOperationTypes,
Set<Class<? extends Operation>> dependencyOperationTypes,
Iterator<Operation> dependencyOperations,
Iterator<Operation> nonDependencyOperations,
ChildOperationGenerator childOperationGenerator )
{
this.dependentOperationTypes = dependentOperationTypes;
this.dependencyOperationTypes = dependencyOperationTypes;
this.dependencyOperations = dependencyOperations;
this.nonDependencyOperations = nonDependencyOperations;
this.childOperationGenerator = childOperationGenerator;
}
public Iterator<Operation> dependencyOperations()
{
return (null != dependencyOperations) ? dependencyOperations : Collections.<Operation>emptyIterator();
}
public Iterator<Operation> nonDependencyOperations()
{
return (null != nonDependencyOperations) ? nonDependencyOperations : Collections.<Operation>emptyIterator();
}
public Set<Class<? extends Operation>> dependentOperationTypes()
{
return (null != dependentOperationTypes) ? dependentOperationTypes
: new HashSet<Class<? extends Operation>>();
}
public Set<Class<? extends Operation>> dependencyOperationTypes()
{
return (null != dependencyOperationTypes) ? dependencyOperationTypes
: new HashSet<Class<? extends Operation>>();
}
public ChildOperationGenerator childOperationGenerator()
{
return childOperationGenerator;
}
}
}