/*
* Copyright (C) 2013-2015, VistaTEC or third-party contributors as indicated
* by the @author tags or express copyright attribution statements applied by
* the authors. All third-party contributions are distributed under license by
* VistaTEC.
*
* This file is part of Ocelot.
*
* Ocelot is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Ocelot is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, write to:
*
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301
* USA
*
* Also, see the full LGPL text here: <http://www.gnu.org/copyleft/lesser.html>
*/
package com.vistatec.ocelot.xliff.okapi;
import com.vistatec.ocelot.its.model.LanguageQualityIssue;
import com.vistatec.ocelot.its.model.Provenance;
import com.vistatec.ocelot.its.model.okapi.OkapiProvenance;
import com.vistatec.ocelot.segment.model.OcelotSegment;
import com.vistatec.ocelot.segment.model.SegmentVariant;
import com.vistatec.ocelot.xliff.XLIFFWriter;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Namespaces;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.annotation.GenericAnnotation;
import net.sf.okapi.common.annotation.GenericAnnotationType;
import net.sf.okapi.common.annotation.ITSLQIAnnotations;
import net.sf.okapi.common.annotation.ITSProvenanceAnnotations;
import net.sf.okapi.common.annotation.XLIFFTool;
import net.sf.okapi.common.encoder.EncoderManager;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.DocumentPart;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.Property;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.skeleton.GenericSkeleton;
import net.sf.okapi.common.skeleton.ISkeletonWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vistatec.ocelot.config.UserProvenance;
import com.vistatec.ocelot.events.ProvenanceAddEvent;
import com.vistatec.ocelot.events.api.OcelotEventQueue;
import com.vistatec.ocelot.segment.model.okapi.Note;
import com.vistatec.ocelot.segment.model.okapi.OkapiSegment;
import com.vistatec.ocelot.segment.model.okapi.TextContainerVariant;
/**
* Write out XLIFF files using Okapi's XLIFFSkeletonWriter.
* Handles synchronization between workbench Segments and the Okapi Event list
* retrieved from the XLIFFParser.
*/
public class OkapiXLIFF12Writer implements XLIFFWriter {
private Logger LOG = LoggerFactory.getLogger(OkapiXLIFF12Writer.class);
private OkapiXLIFF12Parser parser;
private final UserProvenance userProvenance;
private final OcelotEventQueue eventQueue;
public OkapiXLIFF12Writer(OkapiXLIFF12Parser xliffParser,
UserProvenance userProvenance, OcelotEventQueue eventQueue) {
this.parser = xliffParser;
this.userProvenance = userProvenance;
this.eventQueue = eventQueue;
}
public OkapiXLIFF12Parser getParser() {
return this.parser;
}
@Override
public void updateSegment(OcelotSegment seg) {
OkapiSegment okapiSeg = (OkapiSegment) seg;
Event event = getParser().getSegmentEvent(okapiSeg.eventNum);
if (event == null) {
LOG.error("Failed to find Okapi Event associated with segment #"+okapiSeg.getSegmentNumber());
} else if (event.isTextUnit()) {
ITextUnit textUnit = event.getTextUnit();
String rwRef = "RW" + okapiSeg.getSegmentNumber();
updateITSLQIAnnotations(textUnit, okapiSeg, rwRef);
ITSProvenanceAnnotations provAnns = addOcelotProvenance(okapiSeg);
if (provAnns.getAllAnnotations().size() > 0) {
textUnit.setProperty(new Property(Property.ITS_PROV, " its:provenanceRecordsRef=\"#" + rwRef + "\""));
provAnns.setData(rwRef);
textUnit.setAnnotation(provAnns);
}
if (okapiSeg.hasOriginalTarget()) {
// Make sure the Okapi Event is aware that the target has changed.
textUnit.setTarget(LocaleId.fromString(parser.getTargetLang()), unwrap(okapiSeg.getTarget()));
updateOriginalTarget(okapiSeg);
}
} else {
LOG.error("Event associated with Segment was not an Okapi TextUnit!");
LOG.error("Failed to update event for segment #"+okapiSeg.getSegmentNumber());
}
}
ITSProvenanceAnnotations addOcelotProvenance(OcelotSegment seg) {
ITSProvenanceAnnotations provAnns = new ITSProvenanceAnnotations();
for (Provenance prov : seg.getProvenance()) {
String revPerson = prov.getRevPerson();
String revOrg = prov.getRevOrg();
String provRef = prov.getProvRef();
GenericAnnotation ga = new GenericAnnotation(GenericAnnotationType.PROV,
GenericAnnotationType.PROV_PERSON, prov.getPerson(),
GenericAnnotationType.PROV_ORG, prov.getOrg(),
GenericAnnotationType.PROV_TOOL, prov.getTool(),
GenericAnnotationType.PROV_REVPERSON, revPerson,
GenericAnnotationType.PROV_REVORG, revOrg,
GenericAnnotationType.PROV_REVTOOL, prov.getRevTool(),
GenericAnnotationType.PROV_PROVREF, provRef);
provAnns.add(ga);
// Check for existing Ocelot annotation.
if (Objects.equals(prov.getRevPerson(), userProvenance.getRevPerson()) &&
Objects.equals(prov.getRevOrg(), userProvenance.getRevOrg()) &&
Objects.equals(prov.getProvRef(), userProvenance.getProvRef())) {
seg.setOcelotProvenance(true);
}
}
if (!seg.hasOcelotProvenance() && !userProvenance.isEmpty()) {
GenericAnnotation provGA = new GenericAnnotation(GenericAnnotationType.PROV,
GenericAnnotationType.PROV_REVPERSON, userProvenance.getRevPerson(),
GenericAnnotationType.PROV_REVORG, userProvenance.getRevOrg(),
GenericAnnotationType.PROV_PROVREF, userProvenance.getProvRef());
provAnns.add(provGA);
Provenance ocelotProv = new OkapiProvenance(provGA);
eventQueue.post(new ProvenanceAddEvent(ocelotProv, seg, true));
}
return provAnns;
}
void updateITSLQIAnnotations(ITextUnit tu, OcelotSegment seg, String rwRef) {
ITSLQIAnnotations lqiAnns = new ITSLQIAnnotations();
for (LanguageQualityIssue lqi : seg.getLQI()) {
GenericAnnotation ga = new GenericAnnotation(GenericAnnotationType.LQI,
GenericAnnotationType.LQI_TYPE, lqi.getType(),
GenericAnnotationType.LQI_COMMENT, lqi.getComment(),
GenericAnnotationType.LQI_SEVERITY, lqi.getSeverity(),
GenericAnnotationType.LQI_ENABLED, lqi.isEnabled());
lqiAnns.add(ga);
}
if (lqiAnns.size() > 0) {
tu.setProperty(new Property(Property.ITS_LQI, " its:locQualityIssuesRef=\"#"+rwRef+"\""));
tu.setAnnotation(lqiAnns);
} else {
tu.setProperty(new Property(Property.ITS_LQI, ""));
tu.setAnnotation(null);
}
lqiAnns.setData(rwRef);
removeITSLQITextUnitSourceAnnotations(tu, seg);
removeITSLQITextUnitTargetAnnotations(tu, seg);
}
private TextContainer unwrap(SegmentVariant v) {
return ((TextContainerVariant)v).getTextContainer();
}
void removeITSLQITextUnitSourceAnnotations(ITextUnit tu, OcelotSegment seg) {
TextContainer tc = unwrap(seg.getSource());
tc.setProperty(new Property(Property.ITS_LQI, ""));
tc.setAnnotation(null);
tu.setSource(tc);
}
void removeITSLQITextUnitTargetAnnotations(ITextUnit tu, OcelotSegment seg) {
Set<LocaleId> targetLocales = tu.getTargetLocales();
if (targetLocales.size() == 1) {
for (LocaleId tgt : targetLocales) {
TextContainer tgtTC = tu.getTarget(tgt);
tgtTC.setProperty(new Property(Property.ITS_LQI, ""));
tgtTC.setAnnotation(null);
tu.setTarget(tgt, tgtTC);
}
} else if (targetLocales.isEmpty()) {
tu.setTarget(LocaleId.fromString(parser.getTargetLang()),
unwrap(seg.getTarget()));
} else {
LOG.warn("Only 1 target locale in text-unit is currently supported");
}
}
/**
* Add an alt-trans containing the original target if one from this tool
* doesn't exist already.
* @param seg - Segment edited
*/
public void updateOriginalTarget(OcelotSegment seg) {
TextContainer segTarget = unwrap(seg.getTarget());
TextContainer segSource = unwrap(seg.getSource());
TextContainer segOriTarget = unwrap(seg.getOriginalTarget());
TextContainer oriTarget = getParser().retrieveOriginalTarget(segTarget);
if (oriTarget == null) {
AltTranslation rwbAltTrans = new AltTranslation(LocaleId.fromString(parser.getSourceLang()),
LocaleId.fromString(parser.getTargetLang()), null,
segSource.getUnSegmentedContentCopy(), segOriTarget.getUnSegmentedContentCopy(),
MatchType.EXACT, 100, "Ocelot");
XLIFFTool rwbAltTool = new XLIFFTool("Ocelot", "Ocelot");
rwbAltTrans.setTool(rwbAltTool);
AltTranslationsAnnotation altTrans = segTarget.getAnnotation(AltTranslationsAnnotation.class);
altTrans = altTrans == null ? new AltTranslationsAnnotation() : altTrans;
altTrans.add(rwbAltTrans);
segTarget.setAnnotation(altTrans);
}
}
@Override
public void save(File source) throws UnsupportedEncodingException, FileNotFoundException, IOException {
saveEvents(parser.getFilter(), parser.getSegmentEvents(),
source.getAbsolutePath(), LocaleId.fromString(parser.getTargetLang()));
}
// HACK fix for OC-21. As of M23, the XLIFF Filter doesn't properly manage
// ITS namespace insertion for all cases, so we insert it into the <xliff> element
// if one isn't already present.
private boolean foundXliffElement = false;
private static final Pattern XLIFF_ELEMENT_PATTERN = Pattern.compile("(.*<xliff)([^>]*)(>.*)");
private static final Pattern ITS_NAMESPACE_PATTERN = Pattern.compile("xmlns(:[^=]+)?=\"" + Namespaces.ITS_NS_URI + "\"");
@Override
public void updateNotes(OcelotSegment seg) {
// TODO: refactor some of this code with updateSegment
OkapiSegment okapiSeg = (OkapiSegment) seg;
Event event = getParser().getSegmentEvent(okapiSeg.eventNum);
if (event == null) {
LOG.error("Failed to find Okapi Event associated with segment #"+okapiSeg.getSegmentNumber());
} else if (event.isTextUnit()) {
ITextUnit textUnit = event.getTextUnit();
Note note = seg.getNotes().getOcelotNote();
if (note == null) {
// The note has been removed, so we should clear the content
textUnit.removeProperty(Property.NOTE);
return;
}
String noteText = note.getContent();
Property prop = textUnit.getProperty(Property.NOTE);
if (prop == null) {
prop = new Property(Property.NOTE, noteText);
textUnit.setProperty(prop);
}
else {
prop.setValue(noteText);
}
LOG.info("Updated note for " + seg.getTuId() + " to '" + noteText + "'");
} else {
LOG.error("Event associated with Segment was not an Okapi TextUnit!");
LOG.error("Failed to update event for segment #"+okapiSeg.getSegmentNumber());
}
}
private void saveEvents(IFilter filter, List<Event> events, String output, LocaleId locId) throws UnsupportedEncodingException, FileNotFoundException, IOException {
StringBuilder tmp = new StringBuilder();
ISkeletonWriter skelWriter = filter.createSkeletonWriter();
EncoderManager encoderManager = filter.getEncoderManager();
for (Event event : events) {
switch (event.getEventType()) {
case START_DOCUMENT:
tmp.append(skelWriter.processStartDocument(locId, "UTF-8", null, encoderManager,
event.getStartDocument()));
break;
case END_DOCUMENT:
tmp.append(skelWriter.processEndDocument(event.getEnding()));
break;
case START_SUBDOCUMENT:
tmp.append(skelWriter.processStartSubDocument(event.getStartSubDocument()));
break;
case END_SUBDOCUMENT:
tmp.append(skelWriter.processEndSubDocument(event.getEnding()));
break;
case TEXT_UNIT:
tmp.append(skelWriter.processTextUnit(event.getTextUnit()));
break;
case DOCUMENT_PART:
tmp.append(skelWriter.processDocumentPart(
preprocessDocumentPart(event.getDocumentPart())));
break;
case START_GROUP:
tmp.append(skelWriter.processStartGroup(event.getStartGroup()));
break;
case END_GROUP:
tmp.append(skelWriter.processEndGroup(event.getEnding()));
break;
case START_SUBFILTER:
tmp.append(skelWriter.processStartSubfilter(event.getStartSubfilter()));
break;
case END_SUBFILTER:
tmp.append(skelWriter.processEndSubfilter(event.getEndSubfilter()));
break;
default:
break;
}
}
skelWriter.close();
Writer outputFile = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(output), "UTF-8"));
outputFile.write(tmp.toString());
outputFile.flush();
outputFile.close();
}
private DocumentPart preprocessDocumentPart(DocumentPart dp) {
if (foundXliffElement) return dp;
String origSkel = dp.getSkeleton().toString();
Matcher m = XLIFF_ELEMENT_PATTERN.matcher(origSkel);
if (m.find()) {
foundXliffElement = true;
String xliffAttributes = m.group(2);
Matcher attrM = ITS_NAMESPACE_PATTERN.matcher(xliffAttributes);
// If we found the namespace, we don't need to change anything
if (attrM.find()) {
return dp;
}
StringBuilder sb = new StringBuilder();
sb.append(m.group(1));
sb.append(m.group(2));
sb.append(" xmlns:")
.append(Namespaces.ITS_NS_PREFIX)
.append("=\"")
.append(Namespaces.ITS_NS_URI)
.append("\" ");
sb.append(m.group(3));
GenericSkeleton newSkel = new GenericSkeleton(sb.toString());
dp.setSkeleton(newSkel);
}
return dp;
}
}