package nl.tudelft.lifetiles.annotation.model; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Stream; /** * Static class which parses gene annotations. * * @author Jos * */ public final class GeneAnnotationParser { /** * The index of the start field in a annotation line. */ private static final int START_FIELD = 3; /** * The index of the end field in a annotation line. */ private static final int END_FIELD = 4; /** * The index of the extra field in a annotation line. */ private static final int EXTRA_FIELD = 8; /** * The index of the name field in the extra field in a annotation line. */ private static final String NAME_FIELD = "Name"; /** * The standard null name for annotations without a name. */ private static final String NULL_NAME = ""; /** * Static class can not have a public or default constructor. */ private GeneAnnotationParser() { // noop } /** * Parses a file of genes into a map from gene name to gene. * * @param file * the file with gene annotations. * @throws IOException * When there is an error reading the specified file. * @return map from gene name to gene. */ public static List<GeneAnnotation> parseGeneAnnotations(final File file) throws IOException { List<GeneAnnotation> genomeAnnotations = new ArrayList<>(); Stream<String> annotationLines = Files.lines(file.toPath()); annotationLines.map(GeneAnnotationParser::parseGeneAnnotation) .filter(genome -> genome != null) .forEach(genome -> genomeAnnotations.add(genome)); annotationLines.close(); return genomeAnnotations; } /** * Parses a single line of the gene file into a gene. * * @param line * Single line of the gene file. * @return parsed gene. */ private static GeneAnnotation parseGeneAnnotation(final String line) { String[] columns = line.split("\t"); long start = Long.parseLong(columns[START_FIELD]); long end = Long.parseLong(columns[END_FIELD]); Map<String, String> fields = extractGeneFields(columns[EXTRA_FIELD] .split(";")); String name; if (fields.containsKey(NAME_FIELD)) { name = fields.get(NAME_FIELD); } else { name = NULL_NAME; } return new GeneAnnotation(start, end, name); } /** * Method which extract the gene fields into a map. * * @param fields * Fields of the gene. * @return Map of attributes in the gene. */ private static Map<String, String> extractGeneFields(final String... fields) { final Map<String, String> genomeFields = new HashMap<String, String>(); for (String field : fields) { String[] attribute = field.split("="); genomeFields.put(attribute[0], attribute[1]); } return genomeFields; } }