package org.bbaw.wsp.cms.mdsystem.metadata.convert2rdf.transformer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import org.bbaw.wsp.cms.dochandler.parser.document.PdfDocument;
import org.bbaw.wsp.cms.dochandler.parser.text.parser.EdocIndexMetadataFetcherTool;
import org.bbaw.wsp.cms.document.MetadataRecord;
import org.bbaw.wsp.cms.mdsystem.metadata.convert2rdf.util.TemplateMapper;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
/**
* Instances of this (singleton) class transform eDoc metadata to the
* destination OAI/ORE files.
*
* @author Sascha Feldmann (wsp-shk1)
* @date 08.10.12
*
*/
public class EdocToRdfTransformer extends ToRdfTransformer {
/**
* Specify the RDF template here.
*/
private static final String RDF_TEMPLATE_URL = "C:/Dokumente und Einstellungen/wsp-shk1/Eigene Dateien/ParserTest/XSLTTest/templates/eDocToRdfTemplate.xml";
/**
* The prefix of the aggregation name is it is stored in the quad.
*/
public static final String AGGREGATION_NAME_PREFIX = "http://wsp.bbaw.de/edoc/";
private static EdocToRdfTransformer instance;
private EdocToRdfTransformer() throws ApplicationException {
super(ToRdfTransformer.MODE_DIRECT);
}
/**
*
* @return the only existing instance.
* @throws ApplicationException
* if the mode wasn't specified correctly.
*/
public static EdocToRdfTransformer getInstance() throws ApplicationException {
if (instance == null) {
return new EdocToRdfTransformer();
}
return instance;
}
/*
* (non-Javadoc)
*
* @see
* bbaw.wsp.parser.metadata.transformer.ToRdfTransformer#doTransformation(
* java.lang.String, java.lang.String)
*/
public void doTransformation(final String inputUrl, final String outputUrl) throws ApplicationException {
super.doTransformation(inputUrl, outputUrl);
MetadataRecord mdRecord = new MetadataRecord();
EdocIndexMetadataFetcherTool.fetchHtmlDirectly(inputUrl, mdRecord);
// map here
System.out.println("Processing transformation from eDoc to RDF...");
TemplateMapper mapper = new TemplateMapper(RDF_TEMPLATE_URL);
HashMap<String, String> eDocMap = createMap(mdRecord);
System.out.println("Mapping template...");
mapper.mapPlaceholder(eDocMap);
try {
FileWriter writer = new FileWriter(new File(outputUrl));
BufferedWriter buffer = new BufferedWriter(writer);
buffer.write(mapper.getMappedTemplate());
buffer.flush();
buffer.close();
} catch ( IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// check validation
File f = new File(outputUrl);
if(!this.checkValidation(f.getAbsolutePath())) {
System.out.println("the generated output file - "+f+" isn't XML valid. Please check the file!");
}
else
{
System.out.println("The generated output file - "+f+" is xml valid!");
}
}
private HashMap<String, String> createMap(final MetadataRecord mdRecord) throws ApplicationException {
HashMap<String, String> eDocPlaceholderMap = new HashMap<String, String>();
eDocPlaceholderMap.put("%%aggregation_uri%%", AGGREGATION_NAME_PREFIX+EdocIndexMetadataFetcherTool.getDocId(mdRecord.getRealDocUrl())+"/aggregation");
eDocPlaceholderMap.put("%%creator_name%%", ToRdfTransformer.TRANSFORMER_CREATOR_NAME);
eDocPlaceholderMap.put("%%creator_url%%", ToRdfTransformer.TRANSFORMER_CREATOR_URL);
String uri = mdRecord.getRealDocUrl();
if (uri == null) {
uri = "";
}
eDocPlaceholderMap.put("%%resource_identifier%%", uri);
String urn = mdRecord.getUrn();
if (urn == null) {
urn = "";
}
eDocPlaceholderMap.put("%%resource_urn_identifier%%", urn);
String title = mdRecord.getTitle();
if (title == null) {
uri = "";
}
eDocPlaceholderMap.put("%%dc_title%%", title);
Date actual = new Date();
String actualDate = DateFormat.getDateInstance(DateFormat.MEDIUM, Locale.CANADA_FRENCH).format(actual);
eDocPlaceholderMap.put("%%actual_date%%", actualDate);
Date dateCreated = mdRecord.getCreationDate();
String dateCreatedString = "";
if (dateCreated == null) {
dateCreatedString = "";
} else {
// KOBF format yyyy -> format to W3CDTF
dateCreatedString = new SimpleDateFormat("yyyy").format(dateCreated);
}
eDocPlaceholderMap.put("%%date_created%%", dateCreatedString);
Date dateIssued = mdRecord.getPublishingDate();
System.out.println("date published: " + dateIssued);
String dateIssuedString = "";
if (dateIssued == null) {
dateIssuedString = "";
} else {
// KOBV format dd.mm.yyyy -> format to yyyy-dd-mm
dateIssuedString = DateFormat.getDateInstance(DateFormat.MEDIUM, Locale.CANADA_FRENCH).format(dateIssued);
}
eDocPlaceholderMap.put("%%date_issued%%", dateIssuedString);
String publisher = mdRecord.getPublisher();
if (publisher == null) {
publisher = "";
}
eDocPlaceholderMap.put("%%publisher%%", publisher);
String language = mdRecord.getLanguage();
if (language.equals("Deutsch")) {
language = "deu";
}
eDocPlaceholderMap.put("%%language%%", language);
eDocPlaceholderMap.put("%%mime_type%%", PdfDocument.MIME_TYPE);
String description = mdRecord.getDescription();
if (description == null) {
description = "";
}
eDocPlaceholderMap.put("%%dc_description%%", description);
// Map SWD subjects and other subjects
String subject = mdRecord.getSwd(); // swd
String subString = "";
if (subject == null) {
subject = "";
} else {
subString = "";
String[] subjects = subject.split(",");
for (String sub : subjects) {
if (!sub.trim().isEmpty()) {
subString += "<dc:subject>"+sub+"</dc:subject>\n\t\t\t\t";
}
}
}
String freeWords = mdRecord.getSubject(); // freie schlagwörter
if (freeWords != null) {
String[] swds = freeWords.split(",");
for (String sub : swds) {
if(!sub.trim().isEmpty()) {
subString += "\t\t\t<dc:subject>"+sub+"</dc:subject>\n\t\t\t\t";
}
}
}
eDocPlaceholderMap.put("%%subjects%%", subString);
String documentType = mdRecord.getDocumentType();
if (documentType == null) {
documentType = "";
}
eDocPlaceholderMap.put("%%document_type%%", documentType);
String creator = mdRecord.getCreator();
if (creator == null) {
creator = "";
}
// split to given and family name
try {
String givenName = creator.substring(creator.indexOf(",")+1).trim();
eDocPlaceholderMap.put("%%given_name%%", givenName);
}
catch(StringIndexOutOfBoundsException e) {
eDocPlaceholderMap.put("%%given_name%%", "");
}
try {
String familyName = creator.substring(0, creator.indexOf(",")).trim();
eDocPlaceholderMap.put("%%family_name%%", familyName);
}
catch(StringIndexOutOfBoundsException e) {
eDocPlaceholderMap.put("%%family_name%%", "");
}
String ddc = mdRecord.getDdc();
if (ddc == null) {
ddc = "";
}
eDocPlaceholderMap.put("%%ddc%%", ddc);
String numberPages = mdRecord.getPageCount()+"";
eDocPlaceholderMap.put("%%number_pages%%", numberPages);
String source = mdRecord.getInPublication();
if (source == null) {
source = "";
}
eDocPlaceholderMap.put("%%source%%", source);
return eDocPlaceholderMap;
}
}