package eu.europeana.cloud.service.dps.storm.transform.text.edm;
import eu.europeana.cloud.service.dps.storm.transform.text.MethodsEnumeration;
import eu.europeana.cloud.service.dps.storm.transform.text.TextExtractor;
import eu.europeana.corelib.definitions.jibx.RDF;
import eu.europeana.corelib.edm.utils.SolrConstructor;
import java.io.InputStream;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.codehaus.jackson.map.ObjectMapper;
import org.jibx.runtime.BindingDirectory;
import org.jibx.runtime.IBindingFactory;
import org.jibx.runtime.IUnmarshallingContext;
import org.jibx.runtime.JiBXException;
/**
* JIBX text extractor for EDM files.
* @author Pavel Kefurt <Pavel.Kefurt@gmail.com>
*/
public class JibxExtractor implements TextExtractor
{
private static final Logger LOGGER = LoggerFactory.getLogger(JibxExtractor.class);
private static IBindingFactory bfact;
static
{
try
{
//Should be placed in a static block for performance reasons
bfact = BindingDirectory.getFactory(RDF.class);
}
catch(JiBXException ex)
{
LOGGER.error("Cannot create the JibX factory because: "+ex.getMessage());
}
}
@Override
public String extractText(InputStream is)
{
if(is == null)
{
return null;
}
try
{
IUnmarshallingContext uctx = bfact.createUnmarshallingContext();
RDF rdf = (RDF)uctx.unmarshalDocument(is, null);
//excract data from EDM
Map<String, Object> res = new HashMap<>();
SolrInputDocument solrDoc = new SolrConstructor().constructSolrDocument(rdf);
for(Map.Entry<String, SolrInputField> field: solrDoc.entrySet())
{
Object o = field.getValue().getValue();
if(o != null)
{
res.put(field.getKey(), o);
}
}
return new ObjectMapper().writeValueAsString(res);
}
catch (JiBXException | IOException ex)
{
LOGGER.warn("Cannot convert EDM to string because: "+ex.getMessage());
}
catch (InstantiationException | IllegalAccessException ex) //builder wants these two exceptions (NetBeans not)
{
LOGGER.warn("Cannot convert EDM to string because: "+ex.getMessage());
}
catch(NullPointerException ex) //wrong EDM file
{
LOGGER.warn("Cannot convert EDM to string because: not valid EDM");
return null;
}
return null;
}
@Override
public MethodsEnumeration getExtractionMethod()
{
return EdmExtractionMethods.JIBX_EXTRACTOR;
}
@Override
public Map<String, String> getExtractedMetadata()
{
return null;
}
@Override
public String getRepresentationName()
{
return "edm_as_json";
}
}