/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.core.variant.annotation.annotators; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.storage.core.config.StorageConfiguration; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotatorException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; /** * Created by jacobo on 9/01/15. */ public abstract class AbstractCellBaseVariantAnnotator extends VariantAnnotator { public static final String ANNOTATOR_CELLBASE_USE_CACHE = "annotator.cellbase.use_cache"; public static final String ANNOTATOR_CELLBASE_INCLUDE = "annotator.cellbase.include"; public static final String ANNOTATOR_CELLBASE_EXCLUDE = "annotator.cellbase.exclude"; protected static Logger logger = LoggerFactory.getLogger(AbstractCellBaseVariantAnnotator.class); protected final String species; protected final String assembly; protected final String cellbaseVersion; protected final QueryOptions queryOptions; public AbstractCellBaseVariantAnnotator(StorageConfiguration storageConfiguration, ObjectMap params) throws VariantAnnotatorException { super(storageConfiguration, params); species = toCellBaseSpeciesName(params.getString(VariantAnnotationManager.SPECIES)); assembly = params.getString(VariantAnnotationManager.ASSEMBLY); cellbaseVersion = storageConfiguration.getCellbase().getVersion(); queryOptions = new QueryOptions(); if (StringUtils.isNotEmpty(params.getString(ANNOTATOR_CELLBASE_INCLUDE))) { queryOptions.put(QueryOptions.INCLUDE, params.getString(ANNOTATOR_CELLBASE_INCLUDE)); } else if (StringUtils.isNotEmpty(params.getString(ANNOTATOR_CELLBASE_EXCLUDE))) { queryOptions.put(QueryOptions.EXCLUDE, params.getString(ANNOTATOR_CELLBASE_EXCLUDE)); } if (!params.getBoolean(ANNOTATOR_CELLBASE_USE_CACHE)) { queryOptions.append("useCache", false); } checkNotNull(cellbaseVersion, "cellbase version"); checkNotNull(species, "species"); checkNotNull(assembly, "assembly"); } protected static void checkNotNull(String value, String name) throws VariantAnnotatorException { if (value == null || value.isEmpty()) { throw new VariantAnnotatorException("Missing defaultValue: " + name); } } public static String toCellBaseSpeciesName(String scientificName) { if (scientificName != null && scientificName.contains(" ")) { String[] split = scientificName.split(" ", 2); scientificName = (split[0].charAt(0) + split[1]).toLowerCase(); } return scientificName; } @Override public final List<VariantAnnotation> annotate(List<Variant> variants) throws VariantAnnotatorException { List<Variant> nonStructuralVariations = filterStructuralVariants(variants); return annotateFiltered(nonStructuralVariations); } protected abstract List<VariantAnnotation> annotateFiltered(List<Variant> variants) throws VariantAnnotatorException; private List<Variant> filterStructuralVariants(List<Variant> variants) { List<Variant> nonStructuralVariants = new ArrayList<>(variants.size()); for (Variant variant : variants) { // If Variant is SV some work is needed if (variant.getAlternate().length() + variant.getReference().length() > Variant.SV_THRESHOLD * 2) { // TODO: Manage SV variants // logger.info("Skip variant! {}", genomicVariant); logger.info("Skip variant! {}", variant.getChromosome() + ":" + variant.getStart() + ":" + (variant.getReference().length() > 10 ? variant.getReference().substring(0, 10) + "...[" + variant.getReference().length() + "]" : variant.getReference()) + ":" + (variant.getAlternate().length() > 10 ? variant.getAlternate().substring(0, 10) + "...[" + variant.getAlternate().length() + "]" : variant.getAlternate()) ); logger.debug("Skip variant! {}", variant); } else { nonStructuralVariants.add(variant); } } return nonStructuralVariants; } protected List<VariantAnnotation> getVariantAnnotationList(List<Variant> variants, List<QueryResult<VariantAnnotation>> queryResults) { List<VariantAnnotation> variantAnnotationList = new ArrayList<>(variants.size()); if (queryResults != null) { for (QueryResult<VariantAnnotation> queryResult : queryResults) { variantAnnotationList.addAll(queryResult.getResult()); } } return variantAnnotationList; } }