/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.mongodb.variant.converters; import org.apache.commons.lang.StringUtils; import org.bson.Document; import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ComplexTypeConverter; import org.opencb.commons.utils.CryptoUtils; /** * Creates a sorted key for MongoDB. * * Format: * CHR:POS:REF:ALT * * Where CHR starts with " " if it's a single number chromosome, to sort 2 digits chromosomes. * Where POS has a left padding of 10 positions * Where REF and ALT are a SHA1 of the original allele if is bigger than {@link Variant#SV_THRESHOLD} * * Created on 12/05/16 * * @author Jacobo Coll <jacobo167@gmail.com> */ public class VariantStringIdConverter implements ComplexTypeConverter<Variant, Document> { public static final String SEPARATOR = ":"; public static final char SEPARATOR_CHAR = ':'; public static final String ID_FIELD = "_id"; public static final String END_FIELD = "end"; public static final String REF_FIELD = "ref"; public static final String ALT_FIELD = "alt"; public Variant convertToDataModelType(String object) { String[] split = object.split(SEPARATOR, -1); return new Variant(split[0].trim(), Integer.parseInt(split[1].trim()), split[2], split[3]); } @Override public Variant convertToDataModelType(Document object) { String[] split = object.getString(ID_FIELD).split(SEPARATOR, -1); return new Variant(split[0].trim(), Integer.parseInt(split[1].trim()), object.getInteger(END_FIELD), object.getString(REF_FIELD), object.getString(ALT_FIELD)); } @Override public Document convertToStorageType(Variant variant) { return new Document(ID_FIELD, buildId(variant)) .append(REF_FIELD, variant.getReference()) .append(ALT_FIELD, variant.getAlternate()) .append(END_FIELD, variant.getEnd()); } public String buildId(Variant variant) { return buildId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate()); } public String buildId(String chromosome, int start, String reference, String alternate) { StringBuilder stringBuilder = buildId(chromosome, start, new StringBuilder()); stringBuilder.append(SEPARATOR_CHAR); if (reference.length() > Variant.SV_THRESHOLD) { stringBuilder.append(new String(CryptoUtils.encryptSha1(reference))); } else if (!reference.equals("-")) { stringBuilder.append(reference); } stringBuilder.append(SEPARATOR_CHAR); if (alternate.length() > Variant.SV_THRESHOLD) { stringBuilder.append(new String(CryptoUtils.encryptSha1(alternate))); } else if (!alternate.equals("-")) { stringBuilder.append(alternate); } return stringBuilder.toString(); } public static String buildId(String chromosome, int start) { return buildId(chromosome, start, new StringBuilder()).toString(); } private static StringBuilder buildId(String chromosome, int start, StringBuilder stringBuilder) { appendChromosome(chromosome, stringBuilder) .append(SEPARATOR_CHAR) .append(StringUtils.leftPad(Integer.toString(start), 10, " ")); return stringBuilder; } public static String convertChromosome(String chromosome) { return appendChromosome(chromosome, new StringBuilder()).toString(); } protected static StringBuilder appendChromosome(String chromosome, StringBuilder stringBuilder) { if (chromosome.length() == 1 && Character.isDigit(chromosome.charAt(0))) { stringBuilder.append(' '); } return stringBuilder.append(chromosome); } }