package org.juxtasoftware.service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.simple.JSONObject;
import org.juxtasoftware.Constants;
import org.juxtasoftware.dao.AlignmentDao;
import org.juxtasoftware.dao.ComparisonSetDao;
import org.juxtasoftware.dao.JuxtaAnnotationDao;
import org.juxtasoftware.diff.Comparison;
import org.juxtasoftware.diff.DiffCollator;
import org.juxtasoftware.diff.DiffCollatorConfiguration;
import org.juxtasoftware.diff.Difference;
import org.juxtasoftware.diff.DifferenceStore;
import org.juxtasoftware.diff.Token;
import org.juxtasoftware.diff.TokenSource;
import org.juxtasoftware.diff.TranspositionSource;
import org.juxtasoftware.diff.impl.SimpleTokenComparator;
import org.juxtasoftware.model.Alignment;
import org.juxtasoftware.model.Alignment.AlignedAnnotation;
import org.juxtasoftware.model.AlignmentConstraint;
import org.juxtasoftware.model.CollatorConfig;
import org.juxtasoftware.model.CollatorConfig.HyphenationFilter;
import org.juxtasoftware.model.ComparisonSet;
import org.juxtasoftware.model.JuxtaAnnotation;
import org.juxtasoftware.model.QNameFilter;
import org.juxtasoftware.model.Witness;
import org.juxtasoftware.util.BackgroundTaskSegment;
import org.juxtasoftware.util.BackgroundTaskStatus;
import org.juxtasoftware.util.QNameFilters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Service;
import com.google.common.collect.Sets;
import eu.interedition.text.Annotation;
import eu.interedition.text.Name;
import eu.interedition.text.NameRepository;
import eu.interedition.text.Range;
import eu.interedition.text.rdbms.RelationalAnnotation;
@Service
@Scope(BeanDefinition.SCOPE_PROTOTYPE)
public class ComparisonSetCollator extends DiffCollator {
@Autowired private QNameFilters filters;
@Autowired private ComparisonSetDao setDao;
@Autowired private JuxtaAnnotationDao annotationDao;
@Autowired private AlignmentDao alignmentDao;
@Autowired private NameRepository nameRepository;
@Autowired private Integer collationBatchSize;
private List<Witness> witnessList;
private ComparisonSet comparisonSet;
private static final Logger LOG = LoggerFactory.getLogger( Constants.WS_LOGGER_NAME );
public void collate(ComparisonSet comparisonSet, CollatorConfig config, BackgroundTaskStatus taskStatus) throws IOException {
// grab reference to key data used in the colaltion
this.comparisonSet = comparisonSet;
this.witnessList = this.setDao.getWitnesses(comparisonSet);
this.comparisonSet.setStatus(ComparisonSet.Status.COLLATING);
this.setDao.update(this.comparisonSet);
// copy the witness list into a working copy
final Set<Witness> witnesses = new HashSet<Witness>( this.witnessList );
final CollatorConfigAdapter configAdapter = new CollatorConfigAdapter(config);
final BackgroundTaskSegment ts = taskStatus.add(1, new BackgroundTaskSegment((witnesses.size() * (witnesses.size() - 1)) / 2));
taskStatus.setNote("Collating SET " + JSONObject.escape(comparisonSet.getName()));
LOG.info("Collating " + comparisonSet);
try {
for (Iterator<Witness> baseIt = witnesses.iterator(); baseIt.hasNext(); ) {
final Witness base = baseIt.next();
baseIt.remove();
for (Witness witness : witnesses) {
taskStatus.setNote(base.getJsonName() + " vs. " + witness.getJsonName());
LOG.info("Collating: " + base + " vs. " + witness);
collate(configAdapter, base, witness);
configAdapter.getDifferenceStore().save();
ts.incrementValue();
}
}
this.comparisonSet.setStatus(ComparisonSet.Status.COLLATED);
this.setDao.update(this.comparisonSet);
} catch ( OutOfMemoryError oom ) {
LOG.error("Not enough memory to collate "+this.comparisonSet);
this.comparisonSet.setStatus(ComparisonSet.Status.ERROR);
this.setDao.update(this.comparisonSet);
} catch ( Exception e ) {
LOG.error("Collation of "+this.comparisonSet+" FAILED",e);
this.comparisonSet.setStatus(ComparisonSet.Status.ERROR);
this.setDao.update(this.comparisonSet);
}
}
private class CollatorConfigAdapter implements DiffCollatorConfiguration, TokenizerConfiguration, TranspositionSource {
private final CollatorConfig config;
private final Comparator<Token> tokenComparator;
private final TokenSource tokenSource;
private final MemoryDiffStore memAlignStore = new MemoryDiffStore();
private CollatorConfigAdapter(CollatorConfig config) {
this.config = config;
this.tokenComparator = new SimpleTokenComparator();
this.tokenSource = new RepositoryTokenSource(this, comparisonSet.getId(), annotationDao, filters.getTokensFilter() );
}
@Override
public TokenSource getTokenSource() {
return tokenSource;
}
@Override
public TranspositionSource getTranspositionSource() {
return this;
}
@Override
public Comparator<Token> getTokenComparator() {
return tokenComparator;
}
@Override
public DifferenceStore getDifferenceStore() {
return memAlignStore;
}
@Override
public boolean isFilterWhitespace() {
return config.isFilterWhitespace();
}
@Override
public boolean isFilterPunctuation() {
return config.isFilterPunctuation();
}
@Override
public boolean isFilterCase() {
return config.isFilterCase();
}
@Override
public HyphenationFilter getHyphenationFilter() {
return this.config.getHyphenationFilter();
}
@Override
public Set<Set<Annotation>> transpositionsIn(Comparison collation) throws IOException {
// The comparands here are always instances of a juxta Witness
Witness base = (Witness)collation.getBase();
Witness witness = (Witness)collation.getWitness();
// get a list of all tranpositions in this set
final AlignmentConstraint constraint = new AlignmentConstraint(ComparisonSetCollator.this.comparisonSet, base.getId());
final QNameFilter transposeFilter = ComparisonSetCollator.this.filters.getTranspositionsFilter();
constraint.setFilter( transposeFilter );
final Set<Set<Annotation>> transpositions = Sets.newHashSet();
for ( Alignment align :ComparisonSetCollator.this.alignmentDao.list(constraint)) {
boolean transpositionAdded = false;
for (AlignedAnnotation anno : align.getAnnotations()) {
if ( anno.getWitnessId().equals(witness.getId()) == false ) {
continue;
}
if (collation.getWitnessRanges().isEmpty()) {
transpositions.add( toAnnotations(align.getAnnotations()) );
transpositionAdded = true;
break;
}
for (Range witnessRange : collation.getWitnessRanges()) {
if (witnessRange.hasOverlapWith(anno.getRange())) {
transpositions.add( toAnnotations(align.getAnnotations()) );
transpositionAdded = true;
break;
}
}
if (transpositionAdded) {
break;
}
}
}
return transpositions;
}
private Set<Annotation> toAnnotations( List<AlignedAnnotation> data ) {
Set<Annotation> out = new HashSet<Annotation>();
for ( AlignedAnnotation a : data ) {
for (Witness w : ComparisonSetCollator.this.witnessList ) {
if ( w.getId().equals(a.getWitnessId())) {
out.add( new RelationalAnnotation(w.getText(), a.getQName(), a.getRange(), null, a.getId()) );
break;
}
}
}
if (out.size() != 2) {
throw new RuntimeException("BAD THINGS");
}
return out;
}
}
/**
* An alignment store that caches in-progress collation alignemts in
* memory. Once a threshold of alignments have been collected, they are
* dumped in bulk to the db.
*
* @author loufoster
*
*/
private final class MemoryDiffStore implements DifferenceStore {
protected List<Difference> differences = new LinkedList<Difference>();
protected Name addDelName;
protected Name changeName;
protected Name gapName;
public MemoryDiffStore() {
this.addDelName = nameRepository.get(Constants.ADD_DEL_NAME);
this.changeName = nameRepository.get(Constants.CHANGE_NAME);
this.gapName = nameRepository.get(Constants.GAP_NAME);
}
@Override
public void add(Difference aignment) throws IOException {
this.differences.add(aignment);
if ( differences.size() >= collationBatchSize ) {
save();
}
}
private Witness findWitness(Annotation a) {
for ( Witness w : ComparisonSetCollator.this.witnessList) {
if ( w.getText().equals(a.getText()) ) {
return w;
}
}
LOG.error("No witness found for annotaion "+a.toString());
return null;
}
private JuxtaAnnotation toJxGapAnno( Annotation a) {
Witness w= findWitness(a);
return new JuxtaAnnotation(comparisonSet.getId(), w, this.gapName, a.getRange());
}
@Override
public void save() throws IOException {
LOG.info("Writing " + this.differences.size() +" differences");
Map<Range, JuxtaAnnotation> baseGaps = new HashMap<Range, JuxtaAnnotation>();
Map<Range, JuxtaAnnotation> witGaps = new HashMap<Range, JuxtaAnnotation>();
List<Alignment> alignments = new ArrayList<Alignment>(this.differences.size());
while ( this.differences.size() > 0 ) {
Difference diff = this.differences.remove(0);
// grab base anno and convert it into a jx anno. If the anno is a gap
// ge sure a record for it is created once
final Annotation base = diff.getBase();
JuxtaAnnotation jxBase = null;
if (base.getName().equals(GAP_NAME)) {
if ( baseGaps.containsKey( base.getRange()) == false) {
jxBase = toJxGapAnno(base);
Long id = annotationDao.create(jxBase);
jxBase.setId( id );
baseGaps.put(base.getRange(), jxBase);
} else {
jxBase = baseGaps.get(base.getRange());
}
} else {
jxBase = (JuxtaAnnotation)base;
}
// grab witness anno and convert it into a jx anno. If the anno is a gap
// ge sure a record for it is created once
final Annotation wit = diff.getWitness();
JuxtaAnnotation jxWit = null;
if (wit.getName().equals(GAP_NAME)) {
if ( witGaps.containsKey( wit.getRange()) == false) {
jxWit = toJxGapAnno(wit);
Long id = annotationDao.create(jxWit);
jxWit.setId( id );
witGaps.put(wit.getRange(), jxWit);
} else {
jxWit = witGaps.get(wit.getRange());
}
} else {
jxWit = (JuxtaAnnotation)wit;
}
// create an aligment with the converted/created annotations
Name name = this.changeName;
if ( diff.getType().equals(Difference.Type.ADD_DEL)) {
name = this.addDelName;
}
Alignment align = new Alignment(comparisonSet.getId(), diff.getGroup(), name,
jxBase, jxWit, diff.getEditDistance());
alignments.add( align );
}
// create the batch of alignments
int created =0;
try {
created = ComparisonSetCollator.this.alignmentDao.create(alignments);
} catch (Exception e) {
LOG.error("Error creating alignments", e);
}
if ( created != alignments.size() ) {
LOG.error("Unable to create entries for all alignments. Expected count: "
+alignments.size()+", Actual: "+created);
}
// wipe out the cached data to be ready for the next round
alignments.clear();
alignments = null;
this.differences.clear();
}
}
}