/* * Utils.java * * Version: $Revision: 3762 $ * * Date: $Date: 2009-05-07 04:36:47 +0000 (Thu, 07 May 2009) $ * * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.core; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.math.BigInteger; import java.rmi.dgc.VMID; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.ParseException; import java.util.Random; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Date; import java.util.Calendar; import java.util.GregorianCalendar; import java.text.SimpleDateFormat; import java.text.ParseException; import org.apache.log4j.Logger; /** * Utility functions for DSpace. * * @author Peter Breton * @version $Revision: 3762 $ */ public class Utils { /** log4j logger */ private static Logger log = Logger.getLogger(Utils.class); private static final Pattern DURATION_PATTERN = Pattern .compile("(\\d+)([smhdwy])"); private static final long MS_IN_SECOND = 1000L; private static final long MS_IN_MINUTE = 60000L; private static final long MS_IN_HOUR = 3600000L; private static final long MS_IN_DAY = 86400000L; private static final long MS_IN_WEEK = 604800000L; private static final long MS_IN_YEAR = 31536000000L; private static int counter = 0; private static Random random = new Random(); private static VMID vmid = new VMID(); // for parseISO8601Date private static SimpleDateFormat parseFmt[] = { // first try at parsing, has milliseconds (note General time zone) new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSz"), // second try at parsing, no milliseconds (note General time zone) new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssz"), // finally, try without any timezone (defaults to current TZ) new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS"), new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss") }; // for formatISO8601Date // output canonical format (note RFC22 time zone, easier to hack) private static SimpleDateFormat outFmtSecond = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssZ"); // output format with millsecond precision private static SimpleDateFormat outFmtMillisec = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSZ"); private static Calendar outCal = GregorianCalendar.getInstance(); /** Private Constructor */ private Utils() { } /** * Return an MD5 checksum for data in hex format. * * @param data * The data to checksum. * @return MD5 checksum for the data in hex format. */ public static String getMD5(String data) { return getMD5(data.getBytes()); } /** * Return an MD5 checksum for data in hex format. * * @param data * The data to checksum. * @return MD5 checksum for the data in hex format. */ public static String getMD5(byte[] data) { return toHex(getMD5Bytes(data)); } /** * Return an MD5 checksum for data as a byte array. * * @param data * The data to checksum. * @return MD5 checksum for the data as a byte array. */ public static byte[] getMD5Bytes(byte[] data) { try { MessageDigest digest = MessageDigest.getInstance("MD5"); return digest.digest(data); } catch (NoSuchAlgorithmException nsae) { } // Should never happen return null; } /** * Return a hex representation of the byte array * * @param data * The data to transform. * @return A hex representation of the data. */ public static String toHex(byte[] data) { if ((data == null) || (data.length == 0)) { return null; } StringBuffer result = new StringBuffer(); // This is far from the most efficient way to do things... for (int i = 0; i < data.length; i++) { int low = (int) (data[i] & 0x0F); int high = (int) (data[i] & 0xF0); result.append(Integer.toHexString(high).substring(0, 1)); result.append(Integer.toHexString(low)); } return result.toString(); } /** * Generate a unique key. The key is a long (length 38 to 40) sequence of * digits. * * @return A unique key as a long sequence of base-10 digits. */ public static String generateKey() { return new BigInteger(generateBytesKey()).abs().toString(); } /** * Generate a unique key. The key is a 32-character long sequence of hex * digits. * * @return A unique key as a long sequence of hex digits. */ public static String generateHexKey() { return toHex(generateBytesKey()); } /** * Generate a unique key as a byte array. * * @return A unique key as a byte array. */ public static synchronized byte[] generateBytesKey() { byte[] junk = new byte[16]; random.nextBytes(junk); String input = new StringBuffer().append(vmid).append( new java.util.Date()).append(junk).append(counter++).toString(); return getMD5Bytes(input.getBytes()); } // The following two methods are taken from the Jakarta IOUtil class. /** * Copy stream-data from source to destination. This method does not buffer, * flush or close the streams, as to do so would require making non-portable * assumptions about the streams' origin and further use. If you wish to * perform a buffered copy, use {@link #bufferedCopy}. * * @param input * The InputStream to obtain data from. * @param output * The OutputStream to copy data to. */ public static void copy(final InputStream input, final OutputStream output) throws IOException { final int BUFFER_SIZE = 1024 * 4; final byte[] buffer = new byte[BUFFER_SIZE]; while (true) { final int count = input.read(buffer, 0, BUFFER_SIZE); if (-1 == count) { break; } // write out those same bytes output.write(buffer, 0, count); } // needed to flush cache // output.flush(); } /** * Copy stream-data from source to destination, with buffering. This is * equivalent to passing {@link #copy}a * <code>java.io.BufferedInputStream</code> and * <code>java.io.BufferedOuputStream</code> to {@link #copy}, and * flushing the output stream afterwards. The streams are not closed after * the copy. * * @param source * The InputStream to obtain data from. * @param destination * The OutputStream to copy data to. */ public static void bufferedCopy(final InputStream source, final OutputStream destination) throws IOException { final BufferedInputStream input = new BufferedInputStream(source); final BufferedOutputStream output = new BufferedOutputStream( destination); copy(input, output); output.flush(); } /** * Replace characters that could be interpreted as HTML codes with symbolic * references (entities). This function should be called before displaying * any metadata fields that could contain the characters " <", ">", "&", * "'", and double quotation marks. This will effectively disable HTML links * in metadata. * * @param value * the metadata value to be scrubbed for display * * @return the passed-in string, with html special characters replaced with * entities. */ public static String addEntities(String value) { if (value==null || value.length() == 0) return value; value = value.replaceAll("&", "&"); value = value.replaceAll("\"", """); // actually, ' is an XML entity, not in HTML. // that's why it's commented out. // value = value.replaceAll("'", "'"); value = value.replaceAll("<", "<"); value = value.replaceAll(">", ">"); return value; } /** * Utility method to parse durations defined as \d+[smhdwy] (seconds, * minutes, hours, days, weeks, years) * * @param duration * specified duration * * @return number of milliseconds equivalent to duration. * * @throws ParseException * if the duration is of incorrect format */ public static long parseDuration(String duration) throws ParseException { Matcher m = DURATION_PATTERN.matcher(duration.trim()); if (!m.matches()) { throw new ParseException("'" + duration + "' is not a valid duration definition", 0); } String units = m.group(2); long multiplier = MS_IN_SECOND; if ("s".equals(units)) { multiplier = MS_IN_SECOND; } else if ("m".equals(units)) { multiplier = MS_IN_MINUTE; } else if ("h".equals(units)) { multiplier = MS_IN_HOUR; } else if ("d".equals(units)) { multiplier = MS_IN_DAY; } else if ("w".equals(units)) { multiplier = MS_IN_WEEK; } else if ("y".equals(units)) { multiplier = MS_IN_YEAR; } else { throw new ParseException(units + " is not a valid time unit (must be 'y', " + "'w', 'd', 'h', 'm' or 's')", duration.indexOf(units)); } long qint = Long.parseLong(m.group(1)); return qint * multiplier; } /** * Translates timestamp from an ISO 8601-standard format, which * is commonly used in XML and RDF documents. * This method is synchronized because it depends on a non-reentrant * static DateFormat (more efficient than creating a new one each call). * * @param s the input string * @return Date object, or null if there is a problem translating. */ public static synchronized Date parseISO8601Date(String s) { // attempt to normalize the timezone to something we can parse; // SimpleDateFormat can't handle "Z" char tzSign = s.charAt(s.length()-6); if (s.endsWith("Z")) s = s.substring(0, s.length()-1) + "GMT+00:00"; // check for trailing timezone else if (tzSign == '-' || tzSign == '+') s = s.substring(0, s.length()-6) + "GMT" + s.substring(s.length()-6); // try to parse without millseconds ParseException lastError = null; for (int i = 0; i < parseFmt.length; ++i) { try { return parseFmt[i].parse(s); } catch (ParseException e) { lastError = e; } } if (lastError != null) log.error("Error parsing date:", lastError); return null; } /** * Convert a Date to String in the ISO 8601 standard format. * The RFC822 timezone is almost right, still need to insert ":". * This method is synchronized because it depends on a non-reentrant * static DateFormat (more efficient than creating a new one each call). * * @param d the input Date * @return String containing formatted date. */ public static synchronized String formatISO8601Date(Date d) { String result; outCal.setTime(d); if (outCal.get(Calendar.MILLISECOND) == 0) result = outFmtSecond.format(d); else result = outFmtMillisec.format(d); int rl = result.length(); return result.substring(0, rl-2) + ":" + result.substring(rl-2); } }