package org.juxtasoftware.service.importer.ps;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* Filter the TEI parallel segmentation input stream for
* witList information and collect a list of witnesses.
*
* @author loufoster
*
*/
@Component
@Scope(BeanDefinition.SCOPE_PROTOTYPE)
public class WitnessParser extends DefaultHandler {
private static SAXParserFactory parserFactory;
private List<PsWitnessInfo> witnesses = new ArrayList<PsWitnessInfo>();
private Stack<String> groupIdStack = new Stack<String>();
private PsWitnessInfo currWitness = null;
private StringBuilder currDesc = new StringBuilder();
static {
parserFactory = SAXParserFactory.newInstance();
parserFactory.setNamespaceAware(false);
parserFactory.setValidating(false);
}
public void parse( Reader teiReader ) throws ParserConfigurationException, SAXException, IOException {
SAXParser parser = parserFactory.newSAXParser();
parser.parse( new InputSource(teiReader), this);
}
@Override
public InputSource resolveEntity(String systemId, String publicId) throws IOException, SAXException {
if (publicId.contains(".dtd")) {
return new InputSource(new StringReader(""));
} else {
return null;
}
}
public List<PsWitnessInfo> getWitnesses() {
return this.witnesses;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ( qName.equals("listWit")) {
String id = getId(attributes);
if ( id != null ) {
this.groupIdStack.push( id );
}
} else if ( qName.equals("witness") ) {
this.currWitness = new PsWitnessInfo( getId(attributes) );
if ( this.groupIdStack.empty() == false ) {
this.currWitness.groupId = this.groupIdStack.peek();
}
}
}
private String getId(Attributes attributes) {
String id = attributes.getValue("xml:id");
if ( id == null ) {
id = attributes.getValue("id");
}
return id;
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if ( qName.equals("listWit")) {
if ( this.groupIdStack.empty() == false ) {
this.groupIdStack.pop();
}
} else if ( qName.equals("witness") ) {
String out = this.currDesc.toString();
out = out.trim();
out = out.replaceAll("\\t+", " ");
out = out.replaceAll("\\n+", " ");
out = out.replaceAll(" +", " ");
this.currWitness.description = out;
this.witnesses.add( this.currWitness );
this.currWitness = null;
this.currDesc = new StringBuilder();
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if ( this.currWitness != null ) {
this.currDesc.append( ch , start, length);
}
}
/**
* Collection of basic data from the witList tags
*/
public static class PsWitnessInfo {
private final String id;
private String groupId;
private String description;
private PsWitnessInfo( final String id) {
this.id = id;
}
public boolean hasGroupAlias() {
return (this.groupId != null && this.groupId.length() > 0);
}
public String getId() {
return id;
}
public String getGroupId() {
return groupId;
}
public String getDescription() {
return description;
}
public String getName() {
if ( this.description != null && this.description.length() >0 ) {
return this.id+" : "+this.description;
}
return this.id;
}
@Override
public String toString() {
return "PsWitnessInfo [id=" + id + ", description=" + description + "]";
}
}
}