package org.nextprot.api.user.utils;
import com.google.common.collect.Sets;
import org.nextprot.api.commons.exception.EntryNotFoundException;
import org.nextprot.api.commons.exception.EntrySetNotFoundException;
import org.nextprot.api.commons.exception.NPreconditions;
import org.nextprot.api.commons.exception.NextProtException;
import org.nextprot.api.user.domain.UserProteinList;
import org.nextprot.api.user.service.UserProteinListService.Operator;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
/**
* This utility class provides methods operating on <tt>UserProteinList</tt> instances
*
* @author fnikitin
* @author dteixeira
*/
public class UserProteinListUtils {
/**
* Apply the given operator to two user protein lists in a new instance of
* {@code UserProteinList}
*
* @param l1 first user protein list
* @param l2 second user protein list
* @param operator operator applied to operands
* @param username combined list user name
* @param name combined list name
* @param description combined list description
* @return a new user protein list combining l1 and l2
*/
public static UserProteinList combine(UserProteinList l1, UserProteinList l2, Operator operator,
String username, String name, String description) {
NPreconditions.checkNotNull(l1, "The first user protein list should not be null");
NPreconditions.checkNotNull(l2, "The second user protein list should not be null");
NPreconditions.checkNotNull(operator, "The combine operator should not be null");
NPreconditions.checkNotNull(name, "The user protein list name should not be null");
NPreconditions.checkNotNull(username, "The user protein list user name should not be null");
NPreconditions.checkTrue(!l1.equals(l2), "Can't make combination with the same lists");
Set<String> combined = new HashSet<>();
if (operator.equals(Operator.AND)) {
combined.addAll(Sets.intersection(l1.getAccessionNumbers(), l2.getAccessionNumbers()));
} else if (operator.equals(Operator.OR)) {
combined = Sets.union(l1.getAccessionNumbers(), l2.getAccessionNumbers());
} else if (operator.equals(Operator.NOT_IN)) {
combined.addAll(Sets.difference(l1.getAccessionNumbers(), l2.getAccessionNumbers()));
}
if (combined.isEmpty())
throw new NextProtException("The combined list is empty. Only combinations resulting on non-empty lists are saved.");
UserProteinList combinedProteinList = new UserProteinList();
combinedProteinList.setName(name);
combinedProteinList.setOwner(username);
combinedProteinList.setDescription(description);
combinedProteinList.setAccessions(combined);
return combinedProteinList;
}
/**
* Extract the set of accession numbers from uploaded file. Only nextprot
* and uniprot accession numbers found in {@code validAccessionNumbers} are
* allowed.
*
* <p>
* uniprot accession numbers should be converted in nextprot (prefixed with
* "NX_")
* </p>
*
* @param reader the reader
* @param validAccessionNumbers a set of possible nextprot accession numbers
* @param ignoreEntryNotFoundException if true this method ignores EntryNotFoundException else throw it
*
* @return a set of valid accession numbers
* @throws IOException
* if input exception occurred
* @throws EntrySetNotFoundException
* if entries was not found in validAccessionNumbers
*/
public static Set<String> parseAccessionNumbers(Reader reader, Set<String> validAccessionNumbers, boolean ignoreEntryNotFoundException) throws IOException {
NPreconditions.checkNotNull(reader, "The reader should not be null");
NPreconditions.checkNotNull(validAccessionNumbers, "The valid accession numbers should not be null");
NPreconditions.checkTrue(!validAccessionNumbers.isEmpty(), "The valid accession numbers should not be null");
Set<String> foundEntries = new HashSet<>();
BufferedReader br = new BufferedReader(reader);
String line;
Set<String> unknownEntries = new HashSet<>();
while ((line = br.readLine()) != null) {
try {
checkFormatAndCollectValidAccessionNumber(line, foundEntries, validAccessionNumbers);
} catch (EntryNotFoundException e) {
unknownEntries.add(e.getEntry());
}
}
if (!ignoreEntryNotFoundException && !unknownEntries.isEmpty())
throw new EntrySetNotFoundException(unknownEntries);
return foundEntries;
}
public static Set<String> parseAccessionNumbers(Reader reader, Set<String> validAccessionNumbers) throws IOException {
return parseAccessionNumbers(reader, validAccessionNumbers, false);
}
/**
* Extract set of accession numbers from uploaded file. Only nextprot or
* uniprot accession numbers allowed.
*
* <p>
* uniprot accession numbers should be converted in nextprot (prefixed with "NX_")
* </p>
*
* @param file the uploaded file
* @param ignoreEntryNotFoundException if true this method ignores EntryNotFoundException else throw it
*
* @return a set of accession numbers
* @throws IOException
* input exception occurred
*/
public static Set<String> parseAccessionNumbers(MultipartFile file, Set<String> validAccessionNumbers, boolean ignoreEntryNotFoundException) throws NextProtException {
NPreconditions.checkNotNull(file, "The uploaded file should not be null");
InputStream inputStream;
try {
inputStream = file.getInputStream();
if (file.getInputStream() != null)
return parseAccessionNumbers(new InputStreamReader(inputStream), validAccessionNumbers, ignoreEntryNotFoundException);
} catch (IOException e) {
throw new NextProtException(e);
}
return new HashSet<>();
}
/**
* Apply nextprot format on if needed and check for validity
*
* @param uncheckedAccessionNumbers set of accession numbers to check
* @param validAccessionNumbers set of all valid entries
* @return a well formatted set of accession numbers
*/
public static Set<String> checkAndFormatAccessionNumbers(Collection<String> uncheckedAccessionNumbers, Set<String> validAccessionNumbers) {
NPreconditions.checkNotNull(uncheckedAccessionNumbers, "The collection of accessions should not be null");
NPreconditions.checkNotNull(validAccessionNumbers, "The valid accession numbers should not be null");
NPreconditions.checkTrue(!validAccessionNumbers.isEmpty(), "The valid accession numbers should not be null");
Set<String> collector = new HashSet<>(uncheckedAccessionNumbers.size());
Set<String> unknownEntries = new HashSet<>();
for (String uncheckedAccessionNumber : uncheckedAccessionNumbers) {
try {
checkFormatAndCollectValidAccessionNumber(uncheckedAccessionNumber, collector, validAccessionNumbers);
} catch (EntryNotFoundException e) {
unknownEntries.add(e.getEntry());
}
}
if (!unknownEntries.isEmpty())
throw new EntrySetNotFoundException(unknownEntries);
return collector;
}
/**
* Apply nextprot format on uncheckedAccessionNumber if needed, check for validity and give it to collector
*
* @param uncheckedAccessionNumber accession number to check for validity
* @param allNPAccessionNumbers set of all valid entries
* @param validAccessionNumberCollector a collector of all valid accession numbers
* @throws EntryNotFoundException if invalid accession number
*/
public static void checkFormatAndCollectValidAccessionNumber(String uncheckedAccessionNumber, Set<String> validAccessionNumberCollector, Set<String> allNPAccessionNumbers) {
NPreconditions.checkNotNull(allNPAccessionNumbers, "The collector should not be null");
String trimmed = uncheckedAccessionNumber.trim().toUpperCase();
if (uncheckedAccessionNumber.charAt(0) != '#') {
if (!trimmed.startsWith("NX_"))
trimmed = "NX_" + trimmed;
if (!allNPAccessionNumbers.contains(trimmed)) throw new EntryNotFoundException(uncheckedAccessionNumber);
validAccessionNumberCollector.add(trimmed);
}
}
}