/*
* Copyright (C) 2014 Jörg Prante
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.xbib.io.archive;
import org.xbib.io.Packet;
import org.xbib.io.StringPacket;
import org.xbib.io.archive.entry.ArchiveEntryEncoding;
import org.xbib.io.archive.entry.ArchiveEntryEncodingHelper;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.StringTokenizer;
/**
* Archive utilities
*/
public class ArchiveUtils {
/**
* Private constructor to prevent instantiation of this utility class.
*/
private ArchiveUtils() {
}
/**
* Strips Windows' drive letter as well as any leading slashes,
* turns path separators into forward slahes.
*/
public static String normalizeFileName(String fileName, boolean preserveLeadingSlashes) {
String osname = System.getProperty("os.name").toLowerCase(Locale.ENGLISH);
if (osname.startsWith("windows")) {
if (fileName.length() > 2) {
char ch1 = fileName.charAt(0);
char ch2 = fileName.charAt(1);
if (ch2 == ':' && ((ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z'))) {
fileName = fileName.substring(2);
}
}
} else if (osname.contains("netware")) {
int colon = fileName.indexOf(':');
if (colon != -1) {
fileName = fileName.substring(colon + 1);
}
}
fileName = fileName.replace(File.separatorChar, '/');
// No absolute pathnames. Windows paths can start with "\\NetworkDrive\", so we loop on starting /'s.
while (!preserveLeadingSlashes && fileName.startsWith("/")) {
fileName = fileName.substring(1);
}
return fileName;
}
public static final ArchiveEntryEncoding DEFAULT_ENCODING = ArchiveEntryEncodingHelper.getEncoding(null);
public static final ArchiveEntryEncoding FALLBACK_ENCODING = new ArchiveEntryEncoding() {
public boolean canEncode(String name) {
return true;
}
public ByteBuffer encode(String name) {
final int length = name.length();
byte[] buf = new byte[length];
for (int i = 0; i < length; ++i) {
buf[i] = (byte) name.charAt(i);
}
return ByteBuffer.wrap(buf);
}
public String decode(byte[] buffer) {
final int length = buffer.length;
StringBuilder result = new StringBuilder(length);
for (byte b : buffer) {
if (b == 0) {
break;
}
result.append((char) (b & 0xFF));
}
return result.toString();
}
};
/**
* Copy a name into a buffer.
* Copies characters from the name into the buffer
* starting at the specified offset.
* If the buffer is longer than the name, the buffer
* is filled with trailing NULs.
* If the name is longer than the buffer,
* the output is truncated.
*
* @param name The header name from which to copy the characters.
* @param buf The buffer where the name is to be stored.
* @param offset The starting offset into the buffer
* @param length The maximum number of header bytes to copy.
* @return The updated offset, i.e. offset + length
*/
public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
try {
return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
} catch (IOException ex) {
try {
return formatNameBytes(name, buf, offset, length, ArchiveUtils.FALLBACK_ENCODING);
} catch (IOException ex2) {
// impossible
throw new RuntimeException(ex2);
}
}
}
/**
* Copy a name into a buffer.
* Copies characters from the name into the buffer
* starting at the specified offset.
* If the buffer is longer than the name, the buffer
* is filled with trailing NULs.
* If the name is longer than the buffer,
* the output is truncated.
*
* @param name The header name from which to copy the characters.
* @param buf The buffer where the name is to be stored.
* @param offset The starting offset into the buffer
* @param length The maximum number of header bytes to copy.
* @param encoding name of the encoding to use for file names
* @return The updated offset, i.e. offset + length
*/
public static int formatNameBytes(String name, byte[] buf, final int offset,
final int length,
final ArchiveEntryEncoding encoding)
throws IOException {
int len = name.length();
ByteBuffer b = encoding.encode(name);
while (b.limit() > length && len > 0) {
b = encoding.encode(name.substring(0, --len));
}
final int limit = b.limit();
System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
// Pad any remaining output bytes with NUL
for (int i = limit; i < length; ++i) {
buf[offset + i] = 0;
}
return offset + length;
}
/**
* Generates a string containing the name, isDirectory setting and size of an entry.
* <p>
* For example:
* <tt>- 2000 main.c</tt>
* <tt>d 100 testfiles</tt>
*
* @return the representation of the entry
*/
public static String toString(ArchiveEntry entry) {
StringBuilder sb = new StringBuilder();
sb.append(entry.isDirectory() ? 'd' : '-');// c.f. "ls -l" output
String size = Long.toString((entry.getEntrySize()));
sb.append(' ');
// Pad output to 7 places, leading spaces
for (int i = 7; i > size.length(); i--) {
sb.append(' ');
}
sb.append(size);
sb.append(' ').append(entry.getName());
return sb.toString();
}
/**
* Check if buffer contents matches ascii String.
*
* @param expected the expected string
* @param buffer the buffer
* @param offset the offset
* @param length the length
* @return {@code true} if buffer is the same as the expected string
*/
public static boolean matchAsciiBuffer(String expected, byte[] buffer, int offset, int length) {
byte[] buffer1;
try {
buffer1 = expected.getBytes("ASCII");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e); // Should not happen
}
return isEqual(buffer1, 0, buffer1.length, buffer, offset, length, false);
}
/**
* Convert a string to Ascii bytes.
* Used for comparing "magic" strings which need to be independent of the default Locale.
*
* @param inputString input string
* @return the bytes
*/
public static byte[] toAsciiBytes(String inputString) {
try {
return inputString.getBytes("ASCII");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e); // Should never happen
}
}
/**
* Convert an input byte array to a String using the ASCII character set.
*
* @param inputBytes input byet array
* @return the bytes, interpreted as an Ascii string
*/
public static String toAsciiString(final byte[] inputBytes) {
try {
return new String(inputBytes, "ASCII");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e); // Should never happen
}
}
/**
* Convert an input byte array to a String using the ASCII character set.
*
* @param inputBytes input byte array
* @param offset offset within array
* @param length length of array
* @return the bytes, interpreted as an Ascii string
*/
public static String toAsciiString(final byte[] inputBytes, int offset, int length) {
try {
return new String(inputBytes, offset, length, "ASCII");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e); // Should never happen
}
}
/**
* Compare byte buffers, optionally ignoring trailing nulls
*
* @param buffer1 first buffer
* @param offset1 offset of first buffer
* @param length1 length of first buffer
* @param buffer2 second buffer
* @param offset2 offset of second buffer
* @param length2 length of second buffer
* @param ignoreTrailingNulls ignore trailing null if true
* @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls
*/
public static boolean isEqual(final byte[] buffer1, final int offset1, final int length1,
final byte[] buffer2, final int offset2, final int length2,
boolean ignoreTrailingNulls) {
int minLen = length1 < length2 ? length1 : length2;
for (int i = 0; i < minLen; i++) {
if (buffer1[offset1 + i] != buffer2[offset2 + i]) {
return false;
}
}
if (length1 == length2) {
return true;
}
if (ignoreTrailingNulls) {
if (length1 > length2) {
for (int i = length2; i < length1; i++) {
if (buffer1[offset1 + i] != 0) {
return false;
}
}
} else {
for (int i = length1; i < length2; i++) {
if (buffer2[offset2 + i] != 0) {
return false;
}
}
}
return true;
}
return false;
}
public final static String[] keys = new String[]{
"index", "type", "id", "field"
};
private final static String EMPTY = "null";
/**
* Encode archive entry name. Ensure there is no '/' File.separator at the end of the name, otherwise 'tar' will
* recognize it as directory entry.
*
* @param packet the packet
* @return teh entry name
*/
public static String encodeArchiveEntryName(StringPacket packet) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < keys.length; i++) {
if (i > 0) {
sb.append(File.separator);
}
Object o = packet.meta().get(keys[i]);
if (o == null) {
o = EMPTY; // writing "null" here avoids File.separator at the end of the name
}
sb.append(encode(o.toString(), UTF8));
}
return sb.toString();
}
/**
* Decode archive entry name
*
* @param packet the packet
* @param archiveEntryName the entry name
*/
public static void decodeArchiveEntryName(Packet packet, String archiveEntryName) {
String[] components = split(archiveEntryName, File.separator);
for (int i = 0; i < components.length; i++) {
packet.meta(keys[i], decode(components[i], UTF8));
}
}
/**
* Split "str" into tokens by delimiters and optionally remove white spaces
* from the splitted tokens.
*/
private static String[] split(String str, String delims) {
StringTokenizer tokenizer = new StringTokenizer(str, delims);
int n = tokenizer.countTokens();
String[] list = new String[n];
for (int i = 0; i < n; i++) {
list[i] = tokenizer.nextToken();
}
return list;
}
/**
* Decodes an octet according to RFC 2396. According to this spec,
* any characters outside the range 0x20 - 0x7E must be escaped because
* they are not printable characters, except for any characters in the
* fragment identifier. This method will translate any escaped characters
* back to the original.
*
* @param s the URI to decode
* @param encoding the encoding to decode into
* @return The decoded URI
*/
public static String decode(String s, Charset encoding) {
if (s == null || s.isEmpty()) {
return null;
}
StringBuilder sb = new StringBuilder();
boolean fragment = false;
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
switch (ch) {
case '+':
sb.append(' ');
break;
case '#':
sb.append(ch);
fragment = true;
break;
case '%':
if (!fragment) {
// fast hex decode
sb.append((char) ((Character.digit(s.charAt(++i), 16) << 4)
| Character.digit(s.charAt(++i), 16)));
} else {
sb.append(ch);
}
break;
default:
sb.append(ch);
break;
}
}
return new String(sb.toString().getBytes(LATIN1), encoding);
}
/**
* <p>Escape a string into URI syntax</p>
* <p>This function applies the URI escaping rules defined in
* section 2 of [RFC 2396], as amended by [RFC 2732], to the string
* supplied as the first argument, which typically represents all or part
* of a URI, URI reference or IRI. The effect of the function is to
* replace any special character in the string by an escape sequence of
* the form %xx%yy..., where xxyy... is the hexadecimal representation of
* the octets used to represent the character in US-ASCII for characters
* in the ASCII repertoire, and a different character encoding for
* non-ASCII characters.</p>
* <p>If the second argument is true, all characters are escaped
* other than lower case letters a-z, upper case letters A-Z, digits 0-9,
* and the characters referred to in [RFC 2396] as "marks": specifically,
* "-" | "_" | "." | "!" | "~" | "" | "'" | "(" | ")". The "%" character
* itself is escaped only if it is not followed by two hexadecimal digits
* (that is, 0-9, a-f, and A-F).</p>
* <p>[RFC 2396] does not define whether escaped URIs should use
* lower case or upper case for hexadecimal digits. To ensure that escaped
* URIs can be compared using string comparison functions, this function
* must always use the upper-case letters A-F.</p>
* <p>The character encoding used as the basis for determining the
* octets depends on the setting of the second argument.</p>
*
* @param s the String to convert
* @param encoding The encoding to use for unsafe characters
* @return The converted String
*/
public static String encode(String s, Charset encoding) {
if (s == null || s.isEmpty()) {
return null;
}
int length = s.length();
int start = 0;
int i = 0;
StringBuilder result = new StringBuilder(length);
while (true) {
while ((i < length) && isSafe(s.charAt(i))) {
i++;
}
// Safe character can just be added
result.append(s.substring(start, i));
// Are we done?
if (i >= length) {
return result.toString();
} else if (s.charAt(i) == ' ') {
result.append('+'); // Replace space char with plus symbol.
i++;
} else {
// Get all unsafe characters
start = i;
char c;
while ((i < length) && ((c = s.charAt(i)) != ' ') && !isSafe(c)) {
i++;
}
// Convert them to %XY encoded strings
String unsafe = s.substring(start, i);
byte[] bytes = unsafe.getBytes(encoding);
for (byte aByte : bytes) {
result.append('%');
result.append(hex.charAt(((int) aByte & 0xf0) >> 4));
result.append(hex.charAt((int) aByte & 0x0f));
}
}
start = i;
}
}
/**
* Returns true if the given char is
* either a uppercase or lowercase letter from 'a' till 'z', or a digit
* froim '0' till '9', or one of the characters '-', '_', '.' or ''. Such
* 'safe' character don't have to be url encoded.
*
* @param c the character
* @return true or false
*/
private static boolean isSafe(char c) {
return (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))
|| ((c >= '0') && (c <= '9')) || (c == '-') || (c == '_') || (c == '.') || (c == '*'));
}
private static final String hex = "0123456789ABCDEF";
private static final Charset LATIN1 = Charset.forName("ISO-8859-1");
private static final Charset UTF8 = Charset.forName("UTF-8");
}