package org.gbif.checklistbank.neo.traverse; import org.gbif.checklistbank.cli.model.UsageFacts; import org.gbif.checklistbank.neo.UsageDao; import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongSet; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Path; import org.neo4j.graphdb.traversal.Evaluation; import org.neo4j.graphdb.traversal.Evaluator; /** * Marks appropriate points in the taxonomic tree where concurrent processing can start. * At present, families are marked. This could be improved based on the checklist, * or could take account of the current depth (path.length()), to avoid marking many * unplaced families. */ public class ChunkingEvaluator implements Evaluator { private UsageDao dao; private int chunkSize; private int minChunkSize; private LongSet chunkIds = new LongOpenHashSet(); public ChunkingEvaluator(UsageDao dao, int minChunkSize, int chunkSize) { Preconditions.checkArgument(minChunkSize < chunkSize, "Minimum chunk size needs to be smaller then the chunk size"); Preconditions.checkArgument(minChunkSize >= 0, "Minimum chunk size needs to be positive"); Preconditions.checkArgument(chunkSize > 0, "Chunk size needs to be at least 1"); this.chunkSize = chunkSize; this.dao = dao; this.minChunkSize = minChunkSize; } @Override public Evaluation evaluate(Path path) { Node n = path.endNode(); UsageFacts facts = dao.readFacts(n.getId()); int size = facts == null ? -1 : facts.metrics.getNumDescendants() + facts.metrics.getNumSynonyms(); if (size > minChunkSize && (size < chunkSize || size - facts.metrics.getNumChildren() < minChunkSize)) { chunkIds.add(n.getId()); return Evaluation.INCLUDE_AND_PRUNE; } else { return Evaluation.INCLUDE_AND_CONTINUE; } } public boolean isChunk(long nodeId) { return chunkIds.contains(nodeId); } }