/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.tool;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.Arrays;
import org.apache.avro.AvroTestUtil;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.BinaryData;
import org.apache.avro.util.Utf8;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public class TestDataFileRepairTool {
private static final Schema SCHEMA = Schema.create(Schema.Type.STRING);
private static File corruptBlockFile;
private static File corruptRecordFile;
private File repairedFile;
@BeforeClass
public static void writeCorruptFile() throws IOException {
// Write a data file
DataFileWriter<Utf8> w = new DataFileWriter<Utf8>(new GenericDatumWriter<Utf8>(SCHEMA));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
w.create(SCHEMA, baos);
w.append(new Utf8("apple"));
w.append(new Utf8("banana"));
w.append(new Utf8("celery"));
w.sync();
w.append(new Utf8("date"));
w.append(new Utf8("endive"));
w.append(new Utf8("fig"));
long pos = w.sync();
w.append(new Utf8("guava"));
w.append(new Utf8("hazelnut"));
w.close();
byte[] original = baos.toByteArray();
// Corrupt the second block by inserting some zero bytes before the sync marker
int corruptPosition = (int) pos - DataFileConstants.SYNC_SIZE;
int corruptedBytes = 3;
byte[] corrupted = new byte[original.length + corruptedBytes];
System.arraycopy(original, 0, corrupted, 0, corruptPosition);
System.arraycopy(original, corruptPosition,
corrupted, corruptPosition + corruptedBytes, original.length - corruptPosition);
corruptBlockFile = AvroTestUtil.tempFile(TestDataFileRepairTool.class,
"corruptBlock.avro");
corruptBlockFile.deleteOnExit();
FileOutputStream out = new FileOutputStream(corruptBlockFile);
out.write(corrupted);
out.close();
// Corrupt the "endive" record by changing the length of the string to be negative
corruptPosition = (int) pos - DataFileConstants.SYNC_SIZE -
(1 + "fig".length() + 1 + "endive".length());
corrupted = new byte[original.length];
System.arraycopy(original, 0, corrupted, 0, original.length);
BinaryData.encodeLong(-1, corrupted, corruptPosition);
corruptRecordFile = AvroTestUtil.tempFile(TestDataFileRepairTool.class,
"corruptRecord.avro");
corruptRecordFile.deleteOnExit();
out = new FileOutputStream(corruptRecordFile);
out.write(corrupted);
out.close();
}
@Before
public void setUp() {
repairedFile = AvroTestUtil.tempFile(TestDataFileRepairTool.class, "repaired.avro");
}
@After
public void tearDown() {
repairedFile.delete();
}
private String run(Tool tool, String... args) throws Exception {
return run(tool, null, args);
}
private String run(Tool tool, InputStream stdin, String... args) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stdout = new PrintStream(out);
tool.run(
stdin,
stdout,
System.err,
Arrays.asList(args));
return out.toString("UTF-8").replace("\r", "");
}
@Test
public void testReportCorruptBlock() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "report", corruptBlockFile.getPath());
assertTrue(output, output.contains("Number of blocks: 2 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 5 Number of corrupt records: 0"));
}
@Test
public void testReportCorruptRecord() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "report", corruptRecordFile.getPath());
assertTrue(output, output.contains("Number of blocks: 3 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 8 Number of corrupt records: 2"));
}
@Test
public void testRepairAllCorruptBlock() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "all",
corruptBlockFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 2 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 5 Number of corrupt records: 0"));
checkFileContains(repairedFile, "apple", "banana", "celery", "guava", "hazelnut");
}
@Test
public void testRepairAllCorruptRecord() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "all",
corruptRecordFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 3 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 8 Number of corrupt records: 2"));
checkFileContains(repairedFile, "apple", "banana", "celery", "date", "guava",
"hazelnut");
}
@Test
public void testRepairPriorCorruptBlock() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "prior",
corruptBlockFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 2 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 5 Number of corrupt records: 0"));
checkFileContains(repairedFile, "apple", "banana", "celery");
}
@Test
public void testRepairPriorCorruptRecord() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "prior",
corruptRecordFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 3 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 8 Number of corrupt records: 2"));
checkFileContains(repairedFile, "apple", "banana", "celery", "date");
}
@Test
public void testRepairAfterCorruptBlock() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "after",
corruptBlockFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 2 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 5 Number of corrupt records: 0"));
checkFileContains(repairedFile, "guava", "hazelnut");
}
@Test
public void testRepairAfterCorruptRecord() throws Exception {
String output = run(new DataFileRepairTool(), "-o", "after",
corruptRecordFile.getPath(), repairedFile.getPath());
assertTrue(output, output.contains("Number of blocks: 3 Number of corrupt blocks: 1"));
assertTrue(output, output.contains("Number of records: 8 Number of corrupt records: 2"));
checkFileContains(repairedFile, "guava", "hazelnut");
}
private void checkFileContains(File repairedFile, String... lines) throws IOException {
DataFileReader r = new DataFileReader<Utf8>(repairedFile,
new GenericDatumReader<Utf8>(SCHEMA));
for (String line : lines) {
assertEquals(line, r.next().toString());
}
assertFalse(r.hasNext());
}
}