package org.opencb.opencga.storage.core.variant.io; import com.fasterxml.jackson.databind.ObjectMapper; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.storage.core.metadata.ExportMetadata; import org.opencb.opencga.storage.core.metadata.StudyConfiguration; import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.nio.file.Paths; import java.util.*; /** * Created on 12/12/16. * * @author Jacobo Coll <jacobo167@gmail.com> */ public class VariantMetadataImporter { public ExportMetadata importMetaData(URI inputUri, StudyConfigurationManager scm) throws IOException { // Check if can be loaded Map<String, Integer> studies = scm.getStudies(QueryOptions.empty()); if (!studies.isEmpty()) { throw new IllegalStateException("Unable to import data if there are other loaded studies: " + studies); } // Load metadata ExportMetadata exportMetadata = readMetadata(inputUri); // Get list of returned samples Map<Integer, List<Integer>> returnedSamples = getReturnedSamplesMap(exportMetadata); for (StudyConfiguration studyConfiguration : exportMetadata.getStudies()) { processStudyConfiguration(returnedSamples, studyConfiguration); scm.updateStudyConfiguration(studyConfiguration, QueryOptions.empty()); } return exportMetadata; } protected void processStudyConfiguration(Map<Integer, List<Integer>> returnedSamples, StudyConfiguration studyConfiguration) { // Remove non indexed files LinkedHashSet<Integer> indexedFiles = studyConfiguration.getIndexedFiles(); for (Iterator<Integer> iterator = studyConfiguration.getFileIds().values().iterator(); iterator.hasNext();) { Integer fileId = iterator.next(); if (!indexedFiles.contains(fileId)) { iterator.remove(); } } for (Iterator<Integer> iterator = studyConfiguration.getSamplesInFiles().keySet().iterator(); iterator.hasNext();) { Integer fileId = iterator.next(); if (!indexedFiles.contains(fileId)) { iterator.remove(); } } if (returnedSamples != null) { List<Integer> samples = returnedSamples.get(studyConfiguration.getStudyId()); // Remove missing samples from StudyConfiguration if (samples != null) { Iterator<Map.Entry<String, Integer>> iterator = studyConfiguration.getSampleIds().entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<String, Integer> entry = iterator.next(); if (!samples.contains(entry.getValue())) { iterator.remove(); for (LinkedHashSet<Integer> samplesInFile : studyConfiguration.getSamplesInFiles().values()) { samplesInFile.remove(entry.getValue()); } for (Set<Integer> samplesInCohort : studyConfiguration.getCohorts().values()) { samplesInCohort.remove(entry.getValue()); } } } } } } protected Map<Integer, List<Integer>> getReturnedSamplesMap(ExportMetadata exportMetadata) { Map<Integer, List<Integer>> returnedSamples; if (exportMetadata.getQuery() != null) { Map<Integer, StudyConfiguration> studyConfigurationMap = new HashMap<>(); for (StudyConfiguration studyConfiguration : exportMetadata.getStudies()) { studyConfigurationMap.put(studyConfiguration.getStudyId(), studyConfiguration); } returnedSamples = VariantDBAdaptorUtils.getReturnedSamples(exportMetadata.getQuery(), exportMetadata.getQueryOptions(), studyConfigurationMap.keySet(), studyConfigurationMap::get); } else { returnedSamples = null; } return returnedSamples; } public static ExportMetadata readMetadata(URI inputUri) throws IOException { ExportMetadata exportMetadata; try (InputStream is = FileUtils.newInputStream(Paths.get(inputUri.getPath() + VariantExporter.METADATA_FILE_EXTENSION))) { ObjectMapper objectMapper = new ObjectMapper(); exportMetadata = objectMapper.readValue(is, ExportMetadata.class); } return exportMetadata; } }