/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.Utils;
import act.shared.Organism;
import org.apache.commons.collections4.trie.PatriciaTrie;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
public class OrgMinimalPrefixGenerator {
Map<String, String> orgNameToMinimalPrefix;
/*
We initialize a prefix tree using the PatriciaTrie API with all the organism names in our database. The API orders the
strings lexicographically. The API also provides us a prefixMap function that creates a SortedMap of all strings that
have the passed in string as a prefix. Hence, every string in a prefix map of a given string can be assigned that
original string as a minimal prefix.
In a lexicographic ordering, a prefix string must come before a longer string that has that prefix. This ensures that
when we generate prefix maps in lexicographic order, we will be assigning the smallest length prefix for all organism
names.
*/
public OrgMinimalPrefixGenerator(Iterator<Organism> orgIterator) {
Map<String, Long> orgMap = new HashMap<>();
while (orgIterator.hasNext()) {
Organism org = orgIterator.next();
orgMap.put(org.getName(), 1L);
}
PatriciaTrie orgPrefixTrie = new PatriciaTrie<>(orgMap);
orgNameToMinimalPrefix = new HashMap<>();
while (orgPrefixTrie.size() != 0) {
String firstKey = (String) orgPrefixTrie.firstKey();
orgNameToMinimalPrefix.put(firstKey, firstKey);
orgPrefixTrie.remove(firstKey);
SortedMap<String, Long> keyPrefixMap = orgPrefixTrie.prefixMap(firstKey);
List<String> namesToRemove = new ArrayList<>();
for (String orgWithPrefix : keyPrefixMap.keySet()) {
orgNameToMinimalPrefix.put(orgWithPrefix, firstKey);
namesToRemove.add(orgWithPrefix);
}
for (String nameToRemove : namesToRemove) {
orgPrefixTrie.remove(nameToRemove);
}
}
}
public Map<String, String> getMinimalPrefixMapping() {
return orgNameToMinimalPrefix;
}
}