/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.xmi; import static org.apache.commons.io.IOUtils.closeQuietly; import java.io.IOException; import java.io.InputStream; import org.apache.uima.cas.CAS; import org.apache.uima.cas.impl.XmiCasDeserializer; import org.apache.uima.collection.CollectionException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.MimeTypeCapability; import org.apache.uima.fit.descriptor.TypeCapability; import org.xml.sax.SAXException; import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase; import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes; import de.tudarmstadt.ukp.dkpro.core.api.resources.CompressionUtils; /** * Reader for UIMA XMI files. */ @MimeTypeCapability({MimeTypes.APPLICATION_VND_XMI_XML, MimeTypes.APPLICATION_X_UIMA_XMI}) @TypeCapability( outputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData"}) public class XmiReader extends ResourceCollectionReaderBase { /** * In lenient mode, unknown types are ignored and do not cause an exception to be thrown. */ public static final String PARAM_LENIENT = "lenient"; @ConfigurationParameter(name=PARAM_LENIENT, mandatory=true, defaultValue="false") private boolean lenient; @Override public void getNext(CAS aCAS) throws IOException, CollectionException { Resource res = nextFile(); initCas(aCAS, res); InputStream is = null; try { is = CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()); XmiCasDeserializer.deserialize(is, aCAS, lenient); // Override language using PARAM_LANG if that is set if (getLanguage() != null) { aCAS.setDocumentLanguage(getLanguage()); } } catch (SAXException e) { throw new IOException(e); } finally { closeQuietly(is); } } }