package org.juxtasoftware.service.importer.jxt;
import static org.juxtasoftware.service.importer.jxt.Util.isContainedIn;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.json.simple.JSONObject;
import org.juxtasoftware.Constants;
import org.juxtasoftware.dao.AlignmentDao;
import org.juxtasoftware.dao.CacheDao;
import org.juxtasoftware.dao.ComparisonSetDao;
import org.juxtasoftware.dao.JuxtaAnnotationDao;
import org.juxtasoftware.dao.JuxtaXsltDao;
import org.juxtasoftware.dao.SourceDao;
import org.juxtasoftware.dao.WitnessDao;
import org.juxtasoftware.dao.WorkspaceDao;
import org.juxtasoftware.model.Alignment;
import org.juxtasoftware.model.Alignment.AlignedAnnotation;
import org.juxtasoftware.model.CollatorConfig;
import org.juxtasoftware.model.ComparisonSet;
import org.juxtasoftware.model.JuxtaAnnotation;
import org.juxtasoftware.model.JuxtaXslt;
import org.juxtasoftware.model.Source;
import org.juxtasoftware.model.Source.Type;
import org.juxtasoftware.model.Witness;
import org.juxtasoftware.model.Workspace;
import org.juxtasoftware.service.ComparisonSetCollator;
import org.juxtasoftware.service.SourceTransformer;
import org.juxtasoftware.service.Tokenizer;
import org.juxtasoftware.service.importer.ImportService;
import org.juxtasoftware.service.importer.JuxtaXsltFactory;
import org.juxtasoftware.service.importer.XmlTemplateParser;
import org.juxtasoftware.service.importer.XmlTemplateParser.TemplateInfo;
import org.juxtasoftware.service.importer.jxt.JxtRevisionExtractor.RevisionOccurrence;
import org.juxtasoftware.service.importer.jxt.ManifestParser.SourceInfo;
import org.juxtasoftware.service.importer.jxt.MovesParser.JxtMoveInfo;
import org.juxtasoftware.util.BackgroundTaskSegment;
import org.juxtasoftware.util.BackgroundTaskStatus;
import org.juxtasoftware.util.NamespaceExtractor;
import org.juxtasoftware.util.NamespaceExtractor.NamespaceInfo;
import org.juxtasoftware.util.NamespaceExtractor.XmlType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Service;
import org.xml.sax.SAXException;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import eu.interedition.text.Name;
import eu.interedition.text.NameRepository;
/**
* Service used to import JXT files from the desktop juxta. It supports only the
* latest version of juxta desktop (1.6.5). There is no support for target xpath
* in the parse templates. For imports of documents using the juxta-document template,
* the biblio data is stripped out to help compensate for this.
*
* @author loufoster
*
*/
@Service
@Scope(BeanDefinition.SCOPE_PROTOTYPE)
public class JxtImportServiceImpl implements ImportService<InputStream> {
@Autowired private ManifestParser manifestParser;
@Autowired private MovesParser movesParser;
@Autowired private XmlTemplateParser templateParser;
@Autowired private JuxtaXsltFactory xsltFactory;
@Autowired private SourceDao sourceDao;
@Autowired private JuxtaXsltDao xsltDao;
@Autowired private NameRepository nameRepo;
@Autowired private CacheDao cacheDao;
@Autowired private SourceTransformer transformer;
@Autowired private ComparisonSetDao setDao;
@Autowired private WitnessDao witnessDao;
@Autowired private Tokenizer tokenizer;
@Autowired private ComparisonSetCollator collator;
@Autowired private JuxtaAnnotationDao annotationDao;
@Autowired private AlignmentDao alignmentDao;
@Autowired private WorkspaceDao workspaceDao;
private Workspace ws;
private ComparisonSet set;
private BackgroundTaskStatus taskStatus;
private BackgroundTaskSegment taskSegment;
protected static final Logger LOG = LoggerFactory.getLogger( Constants.WS_LOGGER_NAME );
/**
* Import a JXT file from juxta version 1.6 or greater. Older versions will cause this method
* to throw
* @param set
* @return
*/
@Override
public void doImport(final ComparisonSet set,final InputStream jxtIs, BackgroundTaskStatus status) throws Exception {
final File sessionDataDir = Files.createTempDir();
// stuff key data into class members
this.set = set;
this.taskStatus = status;
this.ws = this.workspaceDao.find(this.set.getWorkspaceId());
// segment the status tracker to match the steps in the import process:
// unzip, parse manifest, parse moves, parse templates, populate set,
// add transpositions, tokenize, collate
final int importSteps = 8;
this.taskSegment = this.taskStatus.add(1, new BackgroundTaskSegment( importSteps ));
try {
// unzip the jxt file into a temp directory
this.taskStatus.setNote("Inflating JXT data");
unzip(sessionDataDir, jxtIs);
this.taskSegment.incrementValue();
// parse the manifest found in the unzipped
// directory into a list of source data
LOG.info("Parsing manifest");
List<SourceInfo> sources = parseManifest(sessionDataDir);
// parse out any transpositions
LOG.info("Parsing moves");
List<JxtMoveInfo> moves = parseMoves(sessionDataDir);
// Grab the associated templates and parse them out into a map.
// this map is held internally in the template parser.
LOG.info("Parsing templates");
parseTemplates(sessionDataDir);
// combine all of this data into the comparison set
LOG.info("Create set");
prepareSet( );
populateSet( sources, moves );
// add in the transpositions!
addTranspositions( moves );
// tokenize and collate
CollatorConfig cfg = this.setDao.getCollatorConfig(this.set);
this.set.setStatus(ComparisonSet.Status.COLLATING);
tokenize( cfg );
collate( cfg );
this.taskStatus.setNote("Import successful");
} finally {
try {
FileUtils.deleteDirectory(sessionDataDir);
} catch (IOException e) {}
}
}
private void prepareSet() {
// grab all witnesses associated with this set.
// If there are none, there is nothing more to do
List<Witness> witnesses = this.setDao.getWitnesses(this.set);
if ( witnesses.size() == 0) {
return;
}
// clear out all prior data (NOTE: delete all witnesses wil also clear out all
// aligment and annotation data )
this.setDao.deleteAllWitnesses(this.set);
this.cacheDao.deleteAll(this.set.getId());
try {
for (Witness witness : witnesses) {
Source s = this.sourceDao.find(this.ws.getId(), witness.getSourceId());
JuxtaXslt xslt = this.xsltDao.find(witness.getXsltId());
this.witnessDao.delete(witness);
this.sourceDao.delete(s);
this.xsltDao.delete(xslt);
}
} catch (Exception e) {
throw new RuntimeException("Unable to overwrite set; witnesses are in use in another set.");
}
}
private void addTranspositions(List<JxtMoveInfo> moves) {
this.taskStatus.setNote("Adding Transpositions");
Name transQname = this.nameRepo.get(Constants.TRANSPOSITION_NAME) ;
LOG.info("Adding Transpositions");
List<Alignment> moveLinks = new ArrayList<Alignment>();
for ( JxtMoveInfo move : moves ) {
Alignment link = new Alignment();
link.setComparisonSetId(this.set.getId());
link.setManual();
link.setName( transQname );
for ( String title : move.getWitnessTitles() ) {
Witness witness = this.witnessDao.find(this.set, title);
if ( witness == null ) {
LOG.error("Unable to import moves. Witness "+title+" not found");
this.taskSegment.incrementValue();
return;
}
JuxtaAnnotation anno = new JuxtaAnnotation( this.set.getId(), witness, transQname, move.getWitnessRange(title) );
anno.setManual();
Long annoId = this.annotationDao.create(anno);
AlignedAnnotation aa = new AlignedAnnotation(anno.getName(), witness.getId(), annoId, anno.getRange());
link.addAnnotation( aa );
}
moveLinks.add(link);
}
this.alignmentDao.create(moveLinks);
this.taskSegment.incrementValue();
}
/**
* Parse a list of <code>SourceInfo</code> objects from the manifest file
* @param sessionDataDir
* @return
* @throws Exception
*/
private List<SourceInfo> parseManifest( File sessionDataDir) throws Exception {
this.taskStatus.setNote("Parsing manifest");
final File manifest = new File(sessionDataDir, "manifest.xml");
if (!manifest.isFile()) {
throw new IOException("No manifest.xml");
}
this.manifestParser.parse(this.set, sessionDataDir, manifest);
List<SourceInfo> sources = this.manifestParser.getSources();
this.taskSegment.incrementValue();
return sources;
}
/**
* Parse a list of <code>MoveInfo</code> objects from the moves file
* @param sessionDataDir
* @return
* @throws Exception
*/
private List<JxtMoveInfo> parseMoves( File sessionDataDir) throws Exception {
this.taskStatus.setNote("Parsing moves");
final File movesFile = new File(sessionDataDir, "moves.xml");
if (!movesFile.isFile()) {
throw new IOException("No moves.xml");
}
List<JxtMoveInfo> moves = this.movesParser.parse(this.set, movesFile);
this.taskSegment.incrementValue();
return moves;
}
/**
* Parse out all parse templates from the templates.xml file.
* @param sessionDataDir
* @throws Exception
*/
private void parseTemplates( File sessionDataDir ) throws Exception {
this.taskStatus.setNote("Parsing templates");
final File templates = new File(sessionDataDir, "templates.xml");
if (!templates.isFile()) {
throw new IOException("No templates.xml");
}
this.templateParser.parse( new FileInputStream(templates ) );
this.taskSegment.incrementValue();
}
/**
* Create all sources, templates and witnesses and use them to populate the
* comparison set with the new sources. Update the base document too.\
*
* @param sources
* @throws Exception
*/
private void populateSet( List<SourceInfo> sources, List<JxtMoveInfo> moves ) throws Exception {
// Use collected data to create soures, templates, witness and
// add them all to the comparison set
Set<Witness> witnesses = new HashSet<Witness>();
this.taskStatus.setNote("Adding witnesses to comparison set");
for ( SourceInfo srcInfo : sources ) {
// determine type of source
String srcName = JSONObject.escape(srcInfo.getSrcFile().getName());
this.taskStatus.setNote("Adding raw source document: "+srcName);
int extPos = srcName.lastIndexOf('.');
String ext = ".txt";
if ( extPos > -1 ) {
ext = srcName.substring(extPos);
}
Source.Type contentType = Source.Type.TXT;
if ( ext.equalsIgnoreCase(".xml") ) {
contentType = Source.Type.XML;
}
// create the juxta source
Source source = createSource(srcInfo, contentType);
// if the source was associated with a parse template,
// create it and use it to transform to a witness
this.taskStatus.setNote("Transform raw "+srcName+" into witness");
Long witnessId = null;
JuxtaXslt xslt = null;
if ( contentType.equals(Source.Type.XML) ) {
// extract namespace info
Set<NamespaceInfo> namespaces = NamespaceExtractor.extract( this.sourceDao.getContentReader(source) );
NamespaceInfo namespace = NamespaceInfo.createBlankNamespace();
if ( namespaces.size() == 1 ) {
namespace = (NamespaceInfo)namespaces.toArray()[0];
XmlType xmlType = NamespaceExtractor.determineXmlType( this.sourceDao.getContentReader(source) );
if ( xmlType.equals(XmlType.TEI)) {
namespace.setDefaultPrefix("tei");
}
}
// record any accepted revisions this witness may have had
TemplateInfo info = this.templateParser.findTemplateInfo(srcInfo.getTemplateGuid());
xslt = this.xsltFactory.createFromTemplateInfo(source.getWorkspaceId(), srcInfo.getTitle(), info, namespace);
addRevisonExclusions(source, xslt, namespace, srcInfo.getAcceptedRevsions() );
witnessId = this.transformer.transform(source, xslt, srcInfo.getTitle());
} else {
// Just null transform it to a witness
witnessId = this.transformer.transform(source, null, source.getName());
}
// add all witnesses to the set and update with base witness
Witness newWitness = this.witnessDao.find(witnessId);
witnesses.add( newWitness );
}
this.taskStatus.setNote("Create comparison set");
this.setDao.addWitnesses(this.set, witnesses);
this.setDao.update(this.set);
this.taskSegment.incrementValue();
}
private void addRevisonExclusions(Source source, JuxtaXslt xslt, NamespaceInfo namespace, List<Integer> acceptedRevsions) throws SAXException, IOException {
if ( acceptedRevsions.size() == 0 ) {
// when none are accepted, add an exclusion for all
// add tag and addSpan tags. The deletes remain
xslt.addGlobalExclusion( namespace.addNamespacePrefix("add") );
xslt.addGlobalExclusion( namespace.addNamespacePrefix("addSpan") );
} else {
// extract the exclusion info and add single exclusions to the XSLT
JxtRevisionExtractor extractor = new JxtRevisionExtractor();
extractor.extract( this.sourceDao.getContentReader(source), acceptedRevsions);
for (RevisionOccurrence rev : extractor.getExcludedRevisions() ) {
xslt.addSingleExclusion( namespace.addNamespacePrefix(rev.getTagName()), rev.getOccurrence() );
}
}
this.xsltDao.update(xslt.getId(), new StringReader(xslt.getXslt()));
}
private Source createSource(SourceInfo srcInfo, Type contentType) throws FileNotFoundException, IOException, XMLStreamException {
String name = srcInfo.getTitle();
if ( this.sourceDao.exists(this.ws, name)) {
name = this.sourceDao.makeUniqueName(this.ws, name);
srcInfo.setTitle(name);
}
FileInputStream fis = new FileInputStream(srcInfo.getSrcFile());
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
Long srcId = this.sourceDao.create(this.ws, name, contentType, isr);
IOUtils.closeQuietly(isr);
return this.sourceDao.find(this.ws.getId(), srcId);
}
/**
* Tokenize the comparison set
* @param cfg
* @throws IOException
*/
private void tokenize( CollatorConfig cfg ) throws IOException {
this.taskStatus.setNote("Tokenizing comparison set");
this.tokenizer.tokenize(this.set, cfg, this.taskStatus);
this.taskSegment.incrementValue();
}
/**
* Collate the comparison set
* @throws IOException
*/
private void collate( CollatorConfig cfg ) throws IOException {
this.taskStatus.setNote("Collating comparison set");
this.collator.collate(this.set, cfg, this.taskStatus);
this.taskSegment.incrementValue();
}
private void unzip(File to, InputStream jxtIs) throws IOException {
final ZipInputStream zip = new ZipInputStream(jxtIs);
while (true) {
final ZipEntry entry = zip.getNextEntry();
if (entry == null) {
break;
}
final File entryFile = new File(to, entry.getName());
Preconditions.checkArgument(isContainedIn(to, entryFile));
if (!entry.isDirectory()) {
final File parentFile = entryFile.getParentFile();
if (!parentFile.isDirectory()) {
parentFile.mkdirs();
}
Preconditions.checkState(parentFile.isDirectory());
FileOutputStream entryStream = null;
try {
ByteStreams.copy(zip, entryStream = new FileOutputStream(entryFile));
} finally {
Closeables.close(entryStream, false);
}
}
}
}
}