/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.tool.merge;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.diqube.context.Profiles;
import org.diqube.file.DiqubeFileFactory;
import org.diqube.file.DiqubeFileReader;
import org.diqube.file.DiqubeFileWriter;
import org.diqube.util.BigByteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
/**
* Implementation of merging two .diqube files into a single one.
*
* @author Bastian Gloeckle
*/
public class MergeImplementation {
private static final Logger logger = LoggerFactory.getLogger(MergeImplementation.class);
private File outputFile;
private String comment;
private List<File> inputFiles;
public MergeImplementation(List<File> inputFiles, File outputFile, String comment) {
this.inputFiles = inputFiles;
this.outputFile = outputFile;
this.comment = (comment != null) ? comment : "";
}
public void merge() {
try (AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext()) {
ctx.getEnvironment().setActiveProfiles(Profiles.CONFIG, Profiles.TOOL);
ctx.scan("org.diqube");
ctx.refresh();
DiqubeFileFactory fileFactory = ctx.getBean(DiqubeFileFactory.class);
Map<File, FileInfo> fileInfos = new HashMap<>();
logger.info("Reading metadata of input files...");
// instantiate DiqubeFileReaders, catch IOExceptions thrown as these indicate an invalid file.
for (File inputFile : inputFiles) {
try (FileChannel inputFileChannel = new RandomAccessFile(inputFile, "r").getChannel()) {
DiqubeFileReader reader =
fileFactory.createDiqubeFileReader(new BigByteBuffer(inputFileChannel, MapMode.READ_ONLY, null));
FileInfo info = new FileInfo();
info.totalNumberOfRows = reader.getNumberOfRows();
info.numberOfTableShards = reader.getNumberOfTableShards();
info.firstTableShardByte = reader.getTableShardDataFirstByteIndex();
info.lastTableShardByte = reader.getTableShardDataLastByteIndex();
fileInfos.put(inputFile, info);
} catch (IOException e) {
logger.error("Cannot read {}.", inputFile.getAbsolutePath(), e);
return;
}
}
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
try (DiqubeFileWriter fileWriter = fileFactory.createDiqubeFileWriter(fos)) {
fileWriter.setComment(comment);
for (File inputFile : inputFiles) {
FileInfo fileInfo = fileInfos.get(inputFile);
logger.info("Copying data of {}", inputFile.getAbsolutePath());
try (RandomAccessFile inputRandomFile = new RandomAccessFile(inputFile, "r")) {
BigByteBuffer buf = new BigByteBuffer(inputRandomFile.getChannel(), MapMode.READ_ONLY, null);
fileWriter.writeSerializedTableShards(
buf.createPartialInputStream(fileInfo.firstTableShardByte, fileInfo.lastTableShardByte + 1),
fileInfo.totalNumberOfRows, fileInfo.numberOfTableShards);
buf.close();
}
}
}
} catch (IOException e) {
logger.error("Could not write output file", e);
return;
}
logger.info("Done.");
}
}
private static class FileInfo {
private long totalNumberOfRows;
private int numberOfTableShards;
private long firstTableShardByte;
private long lastTableShardByte;
}
}