/*
* Copyright (C) 2014 Jan Pokorsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.ocr;
import com.yourmediashelf.fedora.generated.management.DatastreamProfile;
import cz.cas.lib.proarc.common.export.mets.MetsLSResolver;
import cz.cas.lib.proarc.common.fedora.DigitalObjectException;
import cz.cas.lib.proarc.common.fedora.FedoraObject;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils;
import cz.cas.lib.proarc.common.fedora.XmlStreamEditor;
import java.io.IOException;
import java.net.URI;
import javax.xml.XMLConstants;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.xml.sax.SAXException;
/**
* ALTO data stream.
*
* @author Jan Pokorsky
*/
public final class AltoDatastream {
public static final String ALTO_ID = "ALTO";
public static final String ALTO_LABEL = "ALTO for this object";
public static final String ALTO_FORMAT_URI = "http://www.loc.gov/standards/alto/ns-v2#";
private static Schema ALTO_SCHEMA;
private static final String ALTO_SCHEMA_PATH = "/xml/alto-v2.1.xsd";
public static DatastreamProfile altoProfile() {
return FoxmlUtils.managedProfile(ALTO_ID, ALTO_FORMAT_URI, ALTO_LABEL);
}
/**
* Adds ALTO content to a fedora object
* @param fo fedora object
* @param altoUri OCR
* @param msg log message
* @throws DigitalObjectException failure
*/
public static void importAlto(FedoraObject fo, URI altoUri, String msg) throws DigitalObjectException {
try {
if (!isAlto(altoUri)) {
throw new DigitalObjectException(fo.getPid(),
String.format("%s: missing expected ALTO version: %s",
altoUri.toASCIIString(), AltoDatastream.ALTO_FORMAT_URI),
null);
}
} catch (Exception ex) {
throw new DigitalObjectException(fo.getPid(), altoUri.toASCIIString(), ex);
}
XmlStreamEditor editor = fo.getEditor(altoProfile());
editor.write(altoUri, editor.getLastModified(), msg);
}
/**
* Checks whether URI content contains proper ALTO data.
* @param alto URI
* @throws IOException failure
*/
static boolean isAlto(URI alto) throws IOException, SAXException {
getSchema().newValidator().validate(new StreamSource(alto.toASCIIString()));
return true;
}
public static Schema getSchema() throws SAXException {
if (ALTO_SCHEMA == null) {
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
schemaFactory.setResourceResolver(MetsLSResolver.getInstance());
ALTO_SCHEMA = schemaFactory.newSchema(AltoDatastream.class.getResource(ALTO_SCHEMA_PATH));
}
return ALTO_SCHEMA;
}
}