/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.bulkedit;
import org.dspace.content.*;
import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.*;
/**
* Utility class to read and write CSV files
*
* **************
* Important Note
* **************
*
* This class has been made serializable, as it is stored in a Session.
* Is it wise to:
* a) be putting this into a user's session?
* b) holding an entire CSV upload in memory?
*
* @author Stuart Lewis
*/
public class DSpaceCSV implements Serializable
{
/** The headings of the CSV file */
private List<String> headings;
/** An array list of CSV lines */
private List<DSpaceCSVLine> lines;
/** A counter of how many CSV lines this object holds */
private int counter;
/** The value separator (defaults to double pipe '||') */
protected static String valueSeparator;
/** The value separator in an escaped form for using in regexs */
protected static String escapedValueSeparator;
/** The field separator (defaults to comma) */
protected static String fieldSeparator;
/** The field separator in an escaped form for using in regexs */
protected static String escapedFieldSeparator;
/** Whether to export all metadata such as handles and provenance information */
private boolean exportAll;
/** A list of metadata elements to ignore */
private Map<String, String> ignore;
/**
* Create a new instance of a CSV line holder
*
* @param exportAll Whether to export all metadata such as handles and provenance information
*/
public DSpaceCSV(boolean exportAll)
{
// Initialise the class
init();
// Store the exportAll setting
this.exportAll = exportAll;
}
/**
* Create a new instance, reading the lines in from file
*
* @param f The file to read from
* @param c The DSpace Context
*
* @throws Exception thrown if there is an error reading or processing the file
*/
public DSpaceCSV(File f, Context c) throws Exception
{
// Initialise the class
init();
// Open the CSV file
BufferedReader input = null;
try
{
input = new BufferedReader(new InputStreamReader(new FileInputStream(f),"UTF-8"));
// Read the heading line
String head = input.readLine();
String[] headingElements = head.split(escapedFieldSeparator);
int columnCounter = 0;
for (String element : headingElements)
{
columnCounter++;
// Remove surrounding quotes if there are any
if ((element.startsWith("\"")) && (element.endsWith("\"")))
{
element = element.substring(1, element.length() - 1);
}
// Store the heading
if ("collection".equals(element))
{
// Store the heading
headings.add(element);
}
// Store the action
else if ("action".equals(element))
{
// Store the heading
headings.add(element);
}
else if (!"id".equals(element))
{
// Verify that the heading is valid in the metadata registry
String[] clean = element.split("\\[");
String[] parts = clean[0].split("\\.");
if (parts.length < 2) {
throw new MetadataImportInvalidHeadingException(element,
MetadataImportInvalidHeadingException.ENTRY,
columnCounter);
}
String metadataSchema = parts[0];
String metadataElement = parts[1];
String metadataQualifier = null;
if (parts.length > 2) {
metadataQualifier = parts[2];
}
// Check that the scheme exists
MetadataSchema foundSchema = MetadataSchema.find(c, metadataSchema);
if (foundSchema == null) {
throw new MetadataImportInvalidHeadingException(clean[0],
MetadataImportInvalidHeadingException.SCHEMA,
columnCounter);
}
// Check that the metadata element exists in the schema
int schemaID = foundSchema.getSchemaID();
MetadataField foundField = MetadataField.findByElement(c, schemaID, metadataElement, metadataQualifier);
if (foundField == null) {
throw new MetadataImportInvalidHeadingException(clean[0],
MetadataImportInvalidHeadingException.ELEMENT,
columnCounter);
}
// Store the heading
headings.add(element);
}
}
// Read each subsequent line
StringBuilder lineBuilder = new StringBuilder();
String lineRead;
while ((lineRead = input.readLine()) != null)
{
if (lineBuilder.length() > 0) {
// Already have a previously read value - add this line
lineBuilder.append("\n").append(lineRead);
// Count the number of quotes in the buffer
int quoteCount = 0;
for (int pos = 0; pos < lineBuilder.length(); pos++) {
if (lineBuilder.charAt(pos) == '"') {
quoteCount++;
}
}
if (quoteCount % 2 == 0) {
// Number of quotes is a multiple of 2, add the item
addItem(lineBuilder.toString());
lineBuilder = new StringBuilder();
}
} else if (lineRead.indexOf('"') > -1) {
// Get the number of quotes in the line
int quoteCount = 0;
for (int pos = 0; pos < lineRead.length(); pos++) {
if (lineRead.charAt(pos) == '"') {
quoteCount++;
}
}
if (quoteCount % 2 == 0) {
// Number of quotes is a multiple of 2, add the item
addItem(lineRead);
} else {
// Uneven quotes - add to the buffer and leave for later
lineBuilder.append(lineRead);
}
} else {
// No previously read line, and no quotes in the line - add item
addItem(lineRead);
}
}
}
finally
{
if (input != null)
{
input.close();
}
}
}
/**
* Initialise this class with values from dspace.cfg
*/
private void init()
{
// Set the value separator
setValueSeparator();
// Set the field separator
setFieldSeparator();
// Create the headings
headings = new ArrayList<String>();
// Create the blank list of items
lines = new ArrayList<DSpaceCSVLine>();
// Initialise the counter
counter = 0;
// Set the metadata fields to ignore
ignore = new HashMap<String, String>();
String toIgnore = ConfigurationManager.getProperty("bulkedit", "ignore-on-export");
if ((toIgnore == null) || ("".equals(toIgnore.trim())))
{
// Set a default value
toIgnore = "dc.date.accessioned, dc.date.available, " +
"dc.date.updated, dc.description.provenance";
}
String[] toIgnoreArray = toIgnore.split(",");
for (String toIgnoreString : toIgnoreArray)
{
if (!"".equals(toIgnoreString.trim()))
{
ignore.put(toIgnoreString.trim(), toIgnoreString.trim());
}
}
}
/**
* Decide if this CSV file has an 'action' (case-dependent!) header.
*
* @return Whether or not there is an 'action' header
*/
public boolean hasActions() {
// Look for a heading called 'action'
for (String header : headings) {
if (header.equals("action")) {
return true;
}
}
return false;
}
/**
* Set the value separator for multiple values stored in one csv value.
*
* Is set in bulkedit.cfg as valueseparator
*
* If not set, defaults to double pipe '||'
*/
private void setValueSeparator()
{
// Get the value separator
valueSeparator = ConfigurationManager.getProperty("bulkedit", "valueseparator");
if ((valueSeparator != null) && (!"".equals(valueSeparator.trim())))
{
valueSeparator = valueSeparator.trim();
}
else
{
valueSeparator = "||";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(valueSeparator);
escapedValueSeparator = match.replaceAll("\\\\$1");
}
/**
* Set the field separator use to separate fields in the csv.
*
* Is set in bulkedit.cfg as fieldseparator
*
* If not set, defaults to comma ','.
*
* Special values are 'tab', 'hash' and 'semicolon' which will
* get substituted from the text to the value.
*/
private void setFieldSeparator()
{
// Get the value separator
fieldSeparator = ConfigurationManager.getProperty("bulkedit", "fieldseparator");
if ((fieldSeparator != null) && (!"".equals(fieldSeparator.trim())))
{
fieldSeparator = fieldSeparator.trim();
if ("tab".equals(fieldSeparator))
{
fieldSeparator = "\t";
}
else if ("semicolon".equals(fieldSeparator))
{
fieldSeparator = ";";
}
else if ("hash".equals(fieldSeparator))
{
fieldSeparator = "#";
}
else
{
fieldSeparator = fieldSeparator.trim();
}
}
else
{
fieldSeparator = ",";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(fieldSeparator);
escapedFieldSeparator = match.replaceAll("\\\\$1");
}
/**
* Add a DSpace item to the CSV file
*
* @param i The DSpace item
*
* @throws Exception if something goes wrong with adding the Item
*/
public final void addItem(Item i) throws Exception
{
// Create the CSV line
DSpaceCSVLine line = new DSpaceCSVLine(i.getID());
// Add in owning collection
String owningCollectionHandle = i.getOwningCollection().getHandle();
line.add("collection", owningCollectionHandle);
// Add in any mapped collections
Collection[] collections = i.getCollections();
for (Collection c : collections)
{
// Only add if it is not the owning collection
if (!c.getHandle().equals(owningCollectionHandle))
{
line.add("collection", c.getHandle());
}
}
// Populate it
DCValue md[] = i.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
for (DCValue value : md)
{
// Get the key (schema.element)
String key = value.schema + "." + value.element;
// Add the qualifier if there is one (schema.element.qualifier)
if (value.qualifier != null)
{
key = key + "." + value.qualifier;
}
// Add the language if there is one (schema.element.qualifier[langauge])
//if ((value.language != null) && (!"".equals(value.language)))
if (value.language != null)
{
key = key + "[" + value.language + "]";
}
// Store the item
if (exportAll || okToExport(value))
{
line.add(key, value.value);
if (!headings.contains(key))
{
headings.add(key);
}
}
}
lines.add(line);
counter++;
}
/**
* Add an item to the CSV file, from a CSV line of elements
*
* @param line The line of elements
* @throws Exception Thrown if an error occurs when adding the item
*/
public final void addItem(String line) throws Exception
{
// Check to see if the last character is a field separator, which hides the last empy column
boolean last = false;
if (line.endsWith(fieldSeparator))
{
// Add a space to the end, then remove it later
last = true;
line += " ";
}
// Split up on field separator
String[] parts = line.split(escapedFieldSeparator);
ArrayList<String> bits = new ArrayList<String>();
bits.addAll(Arrays.asList(parts));
// Merge parts with embedded separators
boolean alldone = false;
while (!alldone)
{
boolean found = false;
int i = 0;
for (String part : bits)
{
int bitcounter = part.length() - part.replaceAll("\"", "").length();
if ((part.startsWith("\"")) && ((!part.endsWith("\"")) || ((bitcounter & 1) == 1)))
{
found = true;
String add = bits.get(i) + fieldSeparator + bits.get(i + 1);
bits.remove(i);
bits.add(i, add);
bits.remove(i + 1);
break;
}
i++;
}
alldone = !found;
}
// Deal with quotes around the elements
int i = 0;
for (String part : bits)
{
if ((part.startsWith("\"")) && (part.endsWith("\"")))
{
part = part.substring(1, part.length() - 1);
bits.set(i, part);
}
i++;
}
// Remove embedded quotes
i = 0;
for (String part : bits)
{
if (part.contains("\"\""))
{
part = part.replaceAll("\"\"", "\"");
bits.set(i, part);
}
i++;
}
// Add elements to a DSpaceCSVLine
String id = parts[0].replaceAll("\"", "");
DSpaceCSVLine csvLine;
// Is this an existing item, or a new item (where id = '+')
if ("+".equals(id))
{
csvLine = new DSpaceCSVLine();
}
else
{
try
{
csvLine = new DSpaceCSVLine(Integer.parseInt(id));
}
catch (NumberFormatException nfe)
{
System.err.println("Invalid item identifier: " + id);
System.err.println("Please check your CSV file for information. " +
"Item id must be numeric, or a '+' to add a new item");
throw(nfe);
}
}
// Add the rest of the parts
i = 0;
for (String part : bits)
{
if (i > 0)
{
// Is this a last empty item?
if ((last) && (i == headings.size()))
{
part = "";
}
// Make sure we register that this column was there
if (headings.size() < i) {
throw new MetadataImportInvalidHeadingException("",
MetadataImportInvalidHeadingException.MISSING,
i + 1);
}
csvLine.add(headings.get(i - 1), null);
String[] elements = part.split(escapedValueSeparator);
for (String element : elements)
{
if ((element != null) && (!"".equals(element)))
{
csvLine.add(headings.get(i - 1), element);
}
}
}
i++;
}
lines.add(csvLine);
counter++;
}
/**
* Get the lines in CSV holders
*
* @return The lines
*/
public final List<DSpaceCSVLine> getCSVLines()
{
// Return the lines
return lines;
}
/**
* Get the CSV lines as an array of CSV formatted strings
*
* @return the array of CSV formatted Strings
*/
public final String[] getCSVLinesAsStringArray()
{
// Create the headings line
String[] csvLines = new String[counter + 1];
csvLines[0] = "id" + fieldSeparator + "collection";
Collections.sort(headings);
for (String value : headings)
{
csvLines[0] = csvLines[0] + fieldSeparator + value;
}
Iterator<DSpaceCSVLine> i = lines.iterator();
int c = 1;
while (i.hasNext())
{
csvLines[c++] = i.next().toCSV(headings);
}
return csvLines;
}
/**
* Save the CSV file to the given filename
*
* @param filename The filename to save the CSV file to
*
* @throws IOException Thrown if an error occurs when writing the file
*/
public final void save(String filename) throws IOException
{
// Save the file
BufferedWriter out = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(filename), "UTF-8"));
for (String csvLine : getCSVLinesAsStringArray()) {
out.write(csvLine + "\n");
}
out.flush();
out.close();
}
/**
* Is it Ok to export this value? When exportAll is set to false, we don't export
* some of the metadata elements.
*
* The list can be configured via the key ignore-on-export in bulkedit.cfg
*
* @param md The DCValue to examine
* @return Whether or not it is OK to export this element
*/
private final boolean okToExport(DCValue md)
{
// Now compare with the list to ignore
String key = md.schema + "." + md.element;
if (md.qualifier != null)
{
key += "." + md.qualifier;
}
if (ignore.get(key) != null) {
return false;
}
// Must be OK, so don't ignore
return true;
}
/**
* Get the headings used in this CSV file
*
* @return The headings
*/
public List<String> getHeadings()
{
return headings;
}
/**
* Return the csv file as one long formatted string
*
* @return The formatted String as a csv
*/
public final String toString()
{
// Return the csv as one long string
StringBuffer csvLines = new StringBuffer();
String[] lines = this.getCSVLinesAsStringArray();
for (String line : lines)
{
csvLines.append(line).append("\n");
}
return csvLines.toString();
}
}