/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.apache.commons.compress.archivers.zip; import static org.junit.Assert.*; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.Enumeration; import java.util.zip.CRC32; import org.apache.commons.compress.AbstractTestCase; import org.apache.commons.compress.utils.CharsetNames; import org.junit.Test; public class UTF8ZipFilesTest extends AbstractTestCase { private static final String CP437 = "cp437"; private static final String ASCII_TXT = "ascii.txt"; private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt"; private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt"; @Test public void testUtf8FileRoundtripExplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.UTF_8, true, true); } @Test public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.UTF_8, false, true); } @Test public void testCP437FileRoundtripExplicitUnicodeExtra() throws IOException { testFileRoundtrip(CP437, false, true); } @Test public void testASCIIFileRoundtripExplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.US_ASCII, false, true); } @Test public void testUtf8FileRoundtripImplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.UTF_8, true, false); } @Test public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.UTF_8, false, false); } @Test public void testCP437FileRoundtripImplicitUnicodeExtra() throws IOException { testFileRoundtrip(CP437, false, false); } @Test public void testASCIIFileRoundtripImplicitUnicodeExtra() throws IOException { testFileRoundtrip(CharsetNames.US_ASCII, false, false); } /* * 7-ZIP created archive, uses EFS to signal UTF-8 filenames. * * 7-ZIP doesn't use EFS for strings that can be encoded in CP437 * - which is true for OIL_BARREL_TXT. */ @Test public void testRead7ZipArchive() throws IOException { final File archive = getFile("utf8-7zip-test.zip"); ZipFile zf = null; try { zf = new ZipFile(archive, CP437, false); assertNotNull(zf.getEntry(ASCII_TXT)); assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); assertNotNull(zf.getEntry(OIL_BARREL_TXT)); } finally { ZipFile.closeQuietly(zf); } } @Test public void testRead7ZipArchiveForStream() throws IOException { final FileInputStream archive = new FileInputStream(getFile("utf8-7zip-test.zip")); ZipArchiveInputStream zi = null; try { zi = new ZipArchiveInputStream(archive, CP437, false); assertEquals(ASCII_TXT, zi.getNextEntry().getName()); assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); } finally { if (zi != null) { zi.close(); } } } /* * WinZIP created archive, uses Unicode Extra Fields but only in * the central directory. */ @Test public void testReadWinZipArchive() throws IOException { final File archive = getFile("utf8-winzip-test.zip"); ZipFile zf = null; try { zf = new ZipFile(archive, null, true); assertCanRead(zf, ASCII_TXT); assertCanRead(zf, EURO_FOR_DOLLAR_TXT); assertCanRead(zf, OIL_BARREL_TXT); } finally { ZipFile.closeQuietly(zf); } } private void assertCanRead(final ZipFile zf, final String fileName) throws IOException { final ZipArchiveEntry entry = zf.getEntry(fileName); assertNotNull("Entry doesn't exist", entry); final InputStream is = zf.getInputStream(entry); assertNotNull("InputStream is null", is); try { is.read(); } finally { is.close(); } } @Test public void testReadWinZipArchiveForStream() throws IOException { final FileInputStream archive = new FileInputStream(getFile("utf8-winzip-test.zip")); ZipArchiveInputStream zi = null; try { zi = new ZipArchiveInputStream(archive, null, true); assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); assertEquals(ASCII_TXT, zi.getNextEntry().getName()); } finally { if (zi != null) { zi.close(); } } } @Test public void testZipFileReadsUnicodeFields() throws IOException { final File file = File.createTempFile("unicode-test", ".zip"); file.deleteOnExit(); ZipArchiveInputStream zi = null; try { createTestFile(file, CharsetNames.US_ASCII, false, true); final FileInputStream archive = new FileInputStream(file); zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true); assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName()); assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName()); assertEquals(ASCII_TXT, zi.getNextEntry().getName()); } finally { if (zi != null) { zi.close(); } tryHardToDelete(file); } } @Test public void testZipArchiveInputStreamReadsUnicodeFields() throws IOException { final File file = File.createTempFile("unicode-test", ".zip"); file.deleteOnExit(); ZipFile zf = null; try { createTestFile(file, CharsetNames.US_ASCII, false, true); zf = new ZipFile(file, CharsetNames.US_ASCII, true); assertNotNull(zf.getEntry(ASCII_TXT)); assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); assertNotNull(zf.getEntry(OIL_BARREL_TXT)); } finally { ZipFile.closeQuietly(zf); tryHardToDelete(file); } } @Test public void testRawNameReadFromZipFile() throws IOException { final File archive = getFile("utf8-7zip-test.zip"); ZipFile zf = null; try { zf = new ZipFile(archive, CP437, false); assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT)); } finally { ZipFile.closeQuietly(zf); } } @Test public void testRawNameReadFromStream() throws IOException { final FileInputStream archive = new FileInputStream(getFile("utf8-7zip-test.zip")); ZipArchiveInputStream zi = null; try { zi = new ZipArchiveInputStream(archive, CP437, false); assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry()); } finally { if (zi != null) { zi.close(); } } } private static void testFileRoundtrip(final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra) throws IOException { final File file = File.createTempFile(encoding + "-test", ".zip"); file.deleteOnExit(); try { createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra); testFile(file, encoding); } finally { tryHardToDelete(file); } } private static void createTestFile(final File file, final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra) throws UnsupportedEncodingException, IOException { final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); ZipArchiveOutputStream zos = null; try { zos = new ZipArchiveOutputStream(file); zos.setEncoding(encoding); zos.setUseLanguageEncodingFlag(withEFS); zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? ZipArchiveOutputStream .UnicodeExtraFieldPolicy.NEVER : ZipArchiveOutputStream .UnicodeExtraFieldPolicy.ALWAYS); ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT); if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) { final ByteBuffer en = zipEncoding.encode(ze.getName()); ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position())); } zos.putArchiveEntry(ze); zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII)); zos.closeArchiveEntry(); ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT); if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) { final ByteBuffer en = zipEncoding.encode(ze.getName()); ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position())); } zos.putArchiveEntry(ze); zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII)); zos.closeArchiveEntry(); ze = new ZipArchiveEntry(ASCII_TXT); if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) { final ByteBuffer en = zipEncoding.encode(ze.getName()); ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position())); } zos.putArchiveEntry(ze); zos.write("ascii".getBytes(CharsetNames.US_ASCII)); zos.closeArchiveEntry(); zos.finish(); } finally { if (zos != null) { try { zos.close(); } catch (final IOException e) { /* swallow */ } } } } private static void testFile(final File file, final String encoding) throws IOException { ZipFile zf = null; try { zf = new ZipFile(file, encoding, false); final Enumeration<ZipArchiveEntry> e = zf.getEntries(); while (e.hasMoreElements()) { final ZipArchiveEntry ze = e.nextElement(); if (ze.getName().endsWith("sser.txt")) { assertUnicodeName(ze, OIL_BARREL_TXT, encoding); } else if (ze.getName().endsWith("_for_Dollar.txt")) { assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding); } else if (!ze.getName().equals(ASCII_TXT)) { throw new AssertionError("Unrecognized ZIP entry with name [" + ze.getName() + "] found."); } } } finally { ZipFile.closeQuietly(zf); } } private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) { return (UnicodePathExtraField) ze.getExtraField(UnicodePathExtraField.UPATH_ID); } private static void assertUnicodeName(final ZipArchiveEntry ze, final String expectedName, final String encoding) throws IOException { if (!expectedName.equals(ze.getName())) { final UnicodePathExtraField ucpf = findUniCodePath(ze); assertNotNull(ucpf); final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding); final ByteBuffer ne = enc.encode(ze.getName()); final CRC32 crc = new CRC32(); crc.update(ne.array(), ne.arrayOffset(), ne.limit() - ne.position()); assertEquals(crc.getValue(), ucpf.getNameCRC32()); assertEquals(expectedName, new String(ucpf.getUnicodeName(), CharsetNames.UTF_8)); } } @Test public void testUtf8Interoperability() throws IOException { final File file1 = getFile("utf8-7zip-test.zip"); final File file2 = getFile("utf8-winzip-test.zip"); testFile(file1,CP437); testFile(file2,CP437); } private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) { final byte[] b = ze.getRawName(); assertNotNull(b); final int len = ASCII_TXT.length(); assertEquals(len, b.length); for (int i = 0; i < len; i++) { assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]); } assertNotSame(b, ze.getRawName()); } }