/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package org.opencb.opencga.storage.core.alignment.json; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.ObjectMapper; import org.opencb.biodata.models.alignment.Alignment; import org.opencb.biodata.models.alignment.AlignmentRegion; import org.opencb.biodata.models.alignment.stats.MeanCoverage; import org.opencb.biodata.models.alignment.stats.RegionCoverage; import org.opencb.commons.io.DataWriter; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.GZIPOutputStream; /** * @author Jacobo Coll Moragon <jcoll@ebi.ac.uk> * <p> * CoverageFileName : <name>.coverage.json.gz * MeanCoverageFileName : <name>.mean-coverage.json.gz */ public class AlignmentCoverageJsonDataWriter implements DataWriter<AlignmentRegion> { public static final int DEFAULT_CHUNK_SIZE = 1000; private final String coverageFilename; private final String meanCoverageFilename; private final JsonFactory factory; private final ObjectMapper jsonObjectMapper; private final boolean gzip; private int chunkSize = DEFAULT_CHUNK_SIZE; private OutputStream coverageOutputStream; private OutputStream meanCoverageOutputStream; private JsonGenerator coverageGenerator; private JsonGenerator meanCoverageGenerator; private RegionCoverage bufferedCoverage; private boolean writeMeanCoverage; private boolean writeCoverage; public AlignmentCoverageJsonDataWriter(String coverageFilename) { this.coverageFilename = coverageFilename; this.meanCoverageFilename = null; this.gzip = this.coverageFilename.endsWith(".gz"); this.factory = new JsonFactory(); this.jsonObjectMapper = new ObjectMapper(this.factory); chunkSize = DEFAULT_CHUNK_SIZE; } public AlignmentCoverageJsonDataWriter(String baseFilename, boolean writeCoverage, boolean writeMeanCoverage, boolean gzip) { this.coverageFilename = baseFilename + ".coverage" + (gzip ? ".json.gz" : ".json"); this.meanCoverageFilename = baseFilename + ".mean-coverage" + (gzip ? ".json.gz" : ".json"); this.gzip = gzip; this.factory = new JsonFactory(); this.jsonObjectMapper = new ObjectMapper(this.factory); chunkSize = DEFAULT_CHUNK_SIZE; this.writeMeanCoverage = writeMeanCoverage; this.writeCoverage = writeCoverage; if (!this.writeCoverage && !this.writeMeanCoverage) { throw new IllegalStateException("Writer needs to write region coverage or mean coverage."); } } @Override public boolean open() { bufferedCoverage = new RegionCoverage(chunkSize); bufferedCoverage.setChromosome(""); try { if (writeCoverage) { coverageOutputStream = new FileOutputStream(coverageFilename); if (gzip) { coverageOutputStream = new GZIPOutputStream(coverageOutputStream); } } if (writeMeanCoverage) { if (meanCoverageFilename == null) { meanCoverageOutputStream = coverageOutputStream; } else { meanCoverageOutputStream = new FileOutputStream(meanCoverageFilename); if (gzip) { meanCoverageOutputStream = new GZIPOutputStream(meanCoverageOutputStream); } } } } catch (FileNotFoundException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. return false; } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. return false; } return true; } @Override public boolean pre() { jsonObjectMapper.addMixIn(Alignment.AlignmentDifference.class, AlignmentDifferenceJsonMixin.class); try { coverageGenerator = factory.createGenerator(coverageOutputStream); meanCoverageGenerator = factory.createGenerator(meanCoverageOutputStream); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. close(); return false; } return true; } @Override public boolean post() { try { writeRegionCoverageJson(bufferedCoverage); if (writeCoverage) { coverageGenerator.flush(); } if (writeMeanCoverage) { meanCoverageGenerator.flush(); } } catch (IOException ex) { Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; } @Override public boolean close() { try { if (writeCoverage) { coverageGenerator.close(); } if (writeMeanCoverage) { meanCoverageGenerator.close(); } } catch (IOException ex) { Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; } private void writeRegionCoverageJson(RegionCoverage coverage) throws IOException { boolean empty = true; for (int i = 0; i < chunkSize; i++) { if (coverage.getAll()[i] != 0) { empty = false; break; } } if (!empty) { coverageGenerator.writeObject(coverage); coverageGenerator.writeRaw("\n"); } } private void writeMeanCoverageJson(List<MeanCoverage> meanCoverage) throws IOException { Collections.sort(meanCoverage, (o1, o2) -> { if (o1.getRegion().getChromosome().equals(o2.getRegion().getChromosome())) { return o1.getRegion().getStart() - o2.getRegion().getStart(); } else { return o1.getRegion().getChromosome().compareTo(o2.getRegion().getChromosome()); } }); for (MeanCoverage mc : meanCoverage) { meanCoverageGenerator.writeObject(mc); meanCoverageGenerator.writeRaw("\n"); } } /** * Writes coverage in batches. * * @param elem AlignmentRegion which contains the RegionCoverage * @return Exit or fail at writing */ @Override public boolean write(AlignmentRegion elem) { final RegionCoverage coverage = elem.getCoverage(); //Current RegionCoverage to be written. int coverageIndex = 0; //Index over the current coverage if (writeMeanCoverage) { try { writeMeanCoverageJson(elem.getMeanCoverage()); } catch (IOException ex) { Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); return false; } } if (writeCoverage) { if (coverage.getStart() - bufferedCoverage.getStart() > chunkSize || !bufferedCoverage.getChromosome().equals(coverage.getChromosome())) { //Current coverage is out of the bufferedCoverage region. //Write all the bufferedCoverage. if (bufferedCoverage.getChromosome() != null) { //If it's a valid coverage try { writeRegionCoverageJson(bufferedCoverage); } catch (IOException ex) { Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); return false; } } bufferedCoverage.setChromosome(coverage.getChromosome()); bufferedCoverage.setStart((coverage.getStart() - 1) / chunkSize * chunkSize + 1); //1-based position bufferedCoverage.setEnd(bufferedCoverage.getStart() + chunkSize - 1); Arrays.fill(bufferedCoverage.getAll(), (short) 0); Arrays.fill(bufferedCoverage.getA(), (short) 0); Arrays.fill(bufferedCoverage.getC(), (short) 0); Arrays.fill(bufferedCoverage.getG(), (short) 0); Arrays.fill(bufferedCoverage.getT(), (short) 0); } int offset = (int) (coverage.getStart() - bufferedCoverage.getStart()); //Difference between the bufferedCoverage and the // current coverage int lim = coverage.getAll().length; if (coverageIndex < -offset) { coverageIndex = -offset; } for (; coverageIndex < lim; coverageIndex++) { if (coverageIndex + offset == chunkSize) { //Buffer filled. Write and move start and end of the region. try { writeRegionCoverageJson(bufferedCoverage); } catch (IOException ex) { Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); return false; } bufferedCoverage.setStart(bufferedCoverage.getStart() + chunkSize); bufferedCoverage.setEnd(bufferedCoverage.getEnd() + chunkSize); offset = (int) (coverage.getStart() - bufferedCoverage.getStart()); } //Copy coverage to the buffer bufferedCoverage.getAll()[coverageIndex + offset] = coverage.getAll()[coverageIndex]; bufferedCoverage.getA()[coverageIndex + offset] = coverage.getA()[coverageIndex]; bufferedCoverage.getC()[coverageIndex + offset] = coverage.getC()[coverageIndex]; bufferedCoverage.getG()[coverageIndex + offset] = coverage.getG()[coverageIndex]; bufferedCoverage.getT()[coverageIndex + offset] = coverage.getT()[coverageIndex]; } Arrays.fill(bufferedCoverage.getAll(), coverageIndex + offset, chunkSize, (short) 0); Arrays.fill(bufferedCoverage.getA(), coverageIndex + offset, chunkSize, (short) 0); Arrays.fill(bufferedCoverage.getC(), coverageIndex + offset, chunkSize, (short) 0); Arrays.fill(bufferedCoverage.getG(), coverageIndex + offset, chunkSize, (short) 0); Arrays.fill(bufferedCoverage.getT(), coverageIndex + offset, chunkSize, (short) 0); } return true; } // public boolean write_old(AlignmentRegion elem) { // RegionCoverage coverage = elem.getCoverage(); // List<MeanCoverage> meanCoverage = elem.getMeanCoverage(); // // try { // coverageGenerator.writeObject(coverage); // coverageGenerator.writeObject(meanCoverage); // coverageGenerator.writeRaw("\n"); // } catch (IOException ex) { // Logger.getLogger(AlignmentCoverageJsonDataWriter.class.getName()).log(Level.SEVERE, null, ex); // return false; // } // return true; // } @Override public boolean write(List<AlignmentRegion> batch) { for (AlignmentRegion ar : batch) { if (!write(ar)) { return false; } } return true; } public int getChunkSize() { return chunkSize; } public void setChunkSize(int chunkSize) { this.chunkSize = chunkSize; } public String getMeanCoverageFilename() { return meanCoverageFilename; } public String getCoverageFilename() { return coverageFilename; } }