/**
* P4Indexer.java
* Author: Philip Kahle (philip.kahle@uibk.ac.at)
*
* This file is part of PrestoPRIME Preservation Platform (P4).
*
* Copyright (C) 2009-2012 University of Innsbruck, Austria
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package eu.prestoprime.search;
import it.eurix.archtools.data.DataException;
import it.eurix.archtools.data.model.DIP;
import it.eurix.archtools.data.model.DIP.DCField;
import it.eurix.archtools.data.model.IPException;
import it.eurix.archtools.data.model.InformationPackage;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import javax.xml.bind.JAXBException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import eu.prestoprime.search.util.P4IndexObject;
public class P4Indexer extends AbstractPOJOIndexer {
public P4Indexer() {
LOGGER.info("Instance of Indexer was created.");
}
public boolean indexIP(InformationPackage ip) {
boolean success = false;
try {
P4IndexObject iObject = createIndexObject(ip);
if (iObject != null) {
success = super.addObjectToIndex(iObject);
}
} catch (IPException e) {
success = false;
LOGGER.error("Indexing IP " + ip.getId() + " failed!");
LOGGER.error(e.getMessage());
}
return success;
}
/**
* Helper method that gets all DublinCore data of an IP from the DB and
* builds a P4IndexObject
*
* @param ip
* an Information Package object, i.e. SIP, AIP, DIP
* @return a P4IndexObject ready to be committed to the Solr Index
* @throws JAXBException
* @throws DataException
* @throws IPException
*/
public P4IndexObject createIndexObject(InformationPackage ip) throws IPException {
P4IndexObject object = new P4IndexObject();
if (ip != null) {
object.setId(ip.getId());
if (ip.getCreateDate() != null) {
LOGGER.debug("Adding create date: " + ip.getCreateDate().getTime().toGMTString());
object.setCreateDate(ip.getCreateDate().getTime());
LOGGER.debug("Added create date...");
} else {
LOGGER.warn("DIP " + ip.getId() + " has no create date!");
}
LOGGER.debug("Retrieving DCFields...");
Map<String, List<String>> dcRec = ip.getDCFields();
LOGGER.debug("Retrieved DCFields " + dcRec);
if (dcRec != null && !dcRec.isEmpty()) {
LOGGER.debug("Adding DC record: " + dcRec.get(DCField.identifier.toString()));
addDcRecord(object, dcRec);
} else {
LOGGER.warn("DIP " + ip.getId() + " has no dc record!");
}
// Convert DcDate Strings like '2012-10-15' to java.util.Date
object.setDcDate(convertToDates(dcRec.get(DCField.date.toString())));
// set user annotations
if (ip instanceof DIP) {
DIP dip = (DIP) ip;
List<Node> userAnnotNodes = dip.getMDResourceAsDOM("usermd");
if (userAnnotNodes != null && !userAnnotNodes.isEmpty()) {
String[] keywordsToAdd = extractKeywords(userAnnotNodes);
if (keywordsToAdd != null && keywordsToAdd.length > 0) {
LOGGER.debug("Adding " + keywordsToAdd.length + " user annotations to index object.");
object.setUserAnnot(keywordsToAdd);
}
}
}
// extract techMd
object.setAspect(getTechMdProp(ip, "aspect_ratio"));
object.setCodec(getTechMdProp(ip, "video_codec"));
object.setDuration(getTechMdFloatProp(ip, "duration"));
object.setDimensions(getTechMdIntProp(ip, "width"), getTechMdIntProp(ip, "height"));
} else {
object = null;
}
return object;
}
private Date[] convertToDates(List<String> list) {
// 2012-08-09T00:00:00Z - 2012-10-15
ArrayList<Date> dateArr = new ArrayList<>(list.size());
String inFormat = SearchConstants.getString("dcDateFormat");
if (inFormat == null) {
// default:
inFormat = "yyyy-MM-dd";
}
DateFormat formatter = new SimpleDateFormat(inFormat);
Date date;
for (String s : list) {
try {
date = formatter.parse(s);
dateArr.add(date);
} catch (ParseException e) {
LOGGER.warn("Invalid date in DcDate = " + s);
}
}
return (Date[]) dateArr.toArray(new Date[0]);
}
/**
* for each Node apply XPath to retrieve userAnnot and then add to index
*
* @param userAnnotNodes
* @return
*/
public String[] extractKeywords(List<Node> userAnnotNodes) {
List<String> keywordsToAdd = new ArrayList<>();
for (Node n : userAnnotNodes) {
if (n.getChildNodes() != null && n.getChildNodes().getLength() > 0) {
Element e = (Element) n.getChildNodes().item(0);
NodeList keywords = e.getElementsByTagName("mpeg7:Keyword");
if (keywords.getLength() > 0) {
String keyword = "";
for (int i = 0; i < keywords.getLength(); i++) {
if (keywords.item(i).getTextContent() != null && !keywords.item(i).getTextContent().isEmpty()) {
keyword = keywords.item(i).getTextContent();
if (keyword.startsWith("http") || keyword.startsWith("C:")) {
// don't use these for now
LOGGER.debug("Discarding keyword : " + keyword);
} else {
LOGGER.debug("Adding keyword to list: " + keyword);
keywordsToAdd.add(keyword);
}
}
}
}
}
}
if (keywordsToAdd.isEmpty()) {
return null;
} else {
return keywordsToAdd.toArray(new String[0]);
}
}
private void addDcRecord(P4IndexObject object, Map<String, List<String>> dcRec) throws IPException {
for (String field : dcRec.keySet()) {
DCField dcField = DCField.valueOf(field);
switch (dcField) {
case contributor:
object.setDcContrib(dcRec.get(DCField.contributor.toString()).toArray(new String[0]));
break;
case coverage:
object.setDcCoverage(dcRec.get(DCField.coverage.toString()).toArray(new String[0]));
break;
case creator:
object.setDcCreator(dcRec.get(DCField.creator.toString()).toArray(new String[0]));
break;
// case date: //date is set elsewhere
// object.setDcDate(dcRec.get(DCField.date.toString()).toArray(new
// Date[0]));
// break;
case description:
object.setDcDescription(dcRec.get(DCField.description.toString()).toArray(new String[0]));
break;
case format:
object.setDcFormat(dcRec.get(DCField.format.toString()).toArray(new String[0]));
break;
case identifier:
object.setDcIdentifier(dcRec.get(DCField.identifier.toString()).toArray(new String[0]));
break;
case language:
object.setDcLang(dcRec.get(DCField.language.toString()).toArray(new String[0]));
break;
case publisher:
object.setDcPublisher(dcRec.get(DCField.publisher.toString()).toArray(new String[0]));
break;
case relation:
object.setDcRelation(dcRec.get(DCField.relation.toString()).toArray(new String[0]));
break;
case source:
object.setDcSource(dcRec.get(DCField.source.toString()).toArray(new String[0]));
break;
case subject:
object.setDcSubject(dcRec.get(DCField.subject.toString()).toArray(new String[0]));
break;
case title:
object.setDcTitle(dcRec.get(DCField.title.toString()).toArray(new String[0]));
break;
case type:
object.setDcType(dcRec.get(DCField.type.toString()).toArray(new String[0]));
break;
}
}
LOGGER.debug("...done");
for (String s : object.getDcCreator()) {
LOGGER.debug("Creator= " + s);
}
}
public boolean removeIPfromIndex(String ipId) {
return super.removeObjectFromIndex(ipId);
}
public boolean clearIndex() {
return super.clearIndex();
}
public String getTechMdProp(InformationPackage ip, String name) {
String value = null;
String xPath = "//dnx:section[@id='videoMD']/dnx:record/dnx:key[@id='" + name + "']/text()";
try {
value = ip.executeQuery(xPath).get(0);
} catch (Exception e) {
LOGGER.warn("techMD property '" + name + "' could not be found in the DIP! " + e.getMessage());
LOGGER.warn("XPath = " + xPath);
// e.printStackTrace();
}
return value;
}
public Integer getTechMdIntProp(InformationPackage ip, String name) {
String prop = getTechMdProp(ip, name);
Integer intProp = null;
if (prop != null) {
try {
intProp = Integer.parseInt(prop);
} catch (NumberFormatException e) {
LOGGER.warn(prop + " is not an Integer.");
}
}
return intProp;
}
public Float getTechMdFloatProp(InformationPackage ip, String name) {
String prop = getTechMdProp(ip, name);
Float floatProp = null;
if (prop != null) {
try {
floatProp = Float.parseFloat(prop);
} catch (NumberFormatException e) {
LOGGER.warn(prop + " is not a Float.");
}
}
return floatProp;
}
}