/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.db.model;
import com.act.lcms.db.io.DB;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.joda.time.LocalDateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ScanFile {
private static final String DATE_FORMAT = "MMddyyyy";
private static final Integer TWENTYN_INCEPTION = 2014;
public static final String TABLE_NAME = "scan_files";
public enum SCAN_MODE {
POS,
NEG,
}
public enum SCAN_FILE_TYPE {
NC,
MZML,
RAW,
}
private enum DB_FIELD implements DBFieldEnumeration {
ID(1, -1, "id"),
FILENAME(2, 1, "filename"),
MODE(3, 2, "mode"),
FILE_TYPE(4, 3, "file_type"),
PLATE_ID(5, 4, "plate_id"),
PLATE_ROW(6, 5, "plate_row"),
PLATE_COLUMN(7, 6, "plate_column"),
;
private final int offset;
private final int insertUpdateOffset;
private final String fieldName;
DB_FIELD(int offset, int insertUpdateOffset, String fieldName) {
this.offset = offset;
this.insertUpdateOffset = insertUpdateOffset;
this.fieldName = fieldName;
}
@Override
public int getOffset() {
return offset;
}
@Override
public int getInsertUpdateOffset() {
return insertUpdateOffset;
}
@Override
public String getFieldName() {
return fieldName;
}
@Override
public String toString() {
return this.fieldName;
}
public static String[] names() {
DB_FIELD[] values = DB_FIELD.values();
String[] names = new String[values.length];
for (int i = 0; i < values.length; i++) {
names[i] = values[i].getFieldName();
}
return names;
}
}
protected static final List<String> ALL_FIELDS = Collections.unmodifiableList(Arrays.asList(DB_FIELD.names()));
// id is auto-generated on insertion.
protected static final List<String> INSERT_UPDATE_FIELDS =
Collections.unmodifiableList(ALL_FIELDS.subList(1, ALL_FIELDS.size()));
protected static List<ScanFile> fromResultSet(ResultSet resultSet) throws SQLException {
List<ScanFile> results = new ArrayList<>();
while (resultSet.next()) {
Integer id = resultSet.getInt(DB_FIELD.ID.getOffset());
String filename = resultSet.getString(DB_FIELD.FILENAME.getOffset());
SCAN_MODE scanMode = SCAN_MODE.valueOf(resultSet.getString(DB_FIELD.MODE.getOffset()));
SCAN_FILE_TYPE fileType = SCAN_FILE_TYPE.valueOf(resultSet.getString(DB_FIELD.FILE_TYPE.getOffset()));
Integer plateId = resultSet.getInt(DB_FIELD.PLATE_ID.getOffset());
if (resultSet.wasNull()) {
plateId = null;
}
Integer plateRow = resultSet.getInt(DB_FIELD.PLATE_ROW.getOffset());
if (resultSet.wasNull()) {
plateRow = null;
}
Integer plateColumn = resultSet.getInt(DB_FIELD.PLATE_COLUMN.getOffset());
if (resultSet.wasNull()) {
plateColumn = null;
}
results.add(new ScanFile(id, filename, scanMode, fileType, plateId, plateRow, plateColumn));
}
return results;
}
// TODO: hoist this into some DBModel base class, or at least define it via an interface.
protected static ScanFile expectOneResult(ResultSet resultSet, String queryErrStr) throws SQLException{
List<ScanFile> results = fromResultSet(resultSet);
if (results.size() > 1) {
throw new SQLException("Found multiple results where one or zero expected: %s", queryErrStr);
}
if (results.size() == 0) {
return null;
}
return results.get(0);
}
// Select
public static final String QUERY_GET_SCAN_FILE_BY_ID = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where id = ?",
}, " ");
public static ScanFile getScanFileByID(DB db, Integer id) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_SCAN_FILE_BY_ID)) {
stmt.setInt(1, id);
try (ResultSet resultSet = stmt.executeQuery()) {
return expectOneResult(resultSet, String.format("id = %d", id));
}
}
}
public static final String QUERY_GET_SCAN_FILE_BY_FILENAME = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where filename = ?",
}, " ");
public static ScanFile getScanFileByFilename(DB db, String filename) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_SCAN_FILE_BY_FILENAME)) {
stmt.setString(1, filename);
try (ResultSet resultSet = stmt.executeQuery()) {
return expectOneResult(resultSet, String.format("filename = %s", filename));
}
}
}
public static final String QUERY_GET_SCAN_FILE_BY_PLATE_ID = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where plate_id = ?",
}, " ");
public static List<ScanFile> getScanFileByPlateID(DB db, Integer plateId) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_SCAN_FILE_BY_PLATE_ID)) {
stmt.setInt(1, plateId);
try (ResultSet resultSet = stmt.executeQuery()) {
return fromResultSet(resultSet);
}
}
}
public static final String QUERY_GET_SCAN_FILE_BY_PLATE_ID_ROW_AND_COLUMN = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where plate_id = ?",
"and plate_row = ?",
"and plate_column = ?",
"order by filename"
}, " ");
public static List<ScanFile> getScanFileByPlateIDRowAndColumn(
DB db, Integer plateId, Integer plateRow, Integer plateColumn) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_SCAN_FILE_BY_PLATE_ID_ROW_AND_COLUMN)) {
stmt.setInt(1, plateId);
stmt.setInt(2, plateRow);
stmt.setInt(3, plateColumn);
try (ResultSet resultSet = stmt.executeQuery()) {
return fromResultSet(resultSet);
}
}
}
// Insert/Update
public static final String QUERY_INSERT_SCAN_FILE = StringUtils.join(new String[] {
"INSERT INTO", TABLE_NAME, "(", StringUtils.join(INSERT_UPDATE_FIELDS, ", "), ") VALUES (",
"?,", // 1 = filename
"?,", // 2 = mode
"?,", // 3 = file_type
"?,", // 4 = plate_id
"?,", // 5 = plate_row
"?", // 6 = plate_column
")"
}, " ");
protected static void bindInsertOrUpdateParameters(
PreparedStatement stmt, String filename, SCAN_MODE mode, SCAN_FILE_TYPE fileType,
Integer plateId, Integer plateRow, Integer plateColumn) throws SQLException {
stmt.setString(DB_FIELD.FILENAME.getInsertUpdateOffset(), filename);
stmt.setString(DB_FIELD.MODE.getInsertUpdateOffset(), mode.name());
stmt.setString(DB_FIELD.FILE_TYPE.getInsertUpdateOffset(), fileType.name());
if (plateId != null) {
stmt.setInt(DB_FIELD.PLATE_ID.getInsertUpdateOffset(), plateId);
} else {
stmt.setNull(DB_FIELD.PLATE_ID.getInsertUpdateOffset(), Types.INTEGER);
}
if (plateRow != null) {
stmt.setInt(DB_FIELD.PLATE_ROW.getInsertUpdateOffset(), plateRow);
} else {
stmt.setNull(DB_FIELD.PLATE_ROW.getInsertUpdateOffset(), Types.INTEGER);
}
if (plateColumn != null) {
stmt.setInt(DB_FIELD.PLATE_COLUMN.getInsertUpdateOffset(), plateColumn);
} else {
stmt.setNull(DB_FIELD.PLATE_COLUMN.getInsertUpdateOffset(), Types.INTEGER);
}
}
protected static void bindInsertOrUpdateParameters(PreparedStatement stmt, ScanFile sf) throws SQLException {
bindInsertOrUpdateParameters(stmt, sf.getFilename(), sf.getMode(), sf.getFileType(),
sf.getPlateId(), sf.getPlateRow(), sf.getPlateColumn());
}
public static ScanFile insertScanFile(
DB db,
String filename, SCAN_MODE mode, SCAN_FILE_TYPE fileType,
Integer plateId, Integer plateRow, Integer plateColumn) throws SQLException {
Connection conn = db.getConn();
try (PreparedStatement stmt =
conn.prepareStatement(QUERY_INSERT_SCAN_FILE, Statement.RETURN_GENERATED_KEYS)) {
bindInsertOrUpdateParameters(stmt, filename, mode, fileType, plateId, plateRow, plateColumn);
stmt.executeUpdate();
try (ResultSet resultSet = stmt.getGeneratedKeys()) {
if (resultSet.next()) {
// Get auto-generated id.
int id = resultSet.getInt(1);
return new ScanFile(id, filename, mode, fileType, plateId, plateRow, plateColumn);
} else {
System.err.format("ERROR: could not retrieve autogenerated key for scan file %s\n", filename);
return null;
}
}
}
}
protected static final List<String> UPDATE_STATEMENT_FIELDS_AND_BINDINGS;
static {
List<String> fields = new ArrayList<>(INSERT_UPDATE_FIELDS.size());
for (String field : INSERT_UPDATE_FIELDS) {
fields.add(String.format("%s = ?", field));
}
UPDATE_STATEMENT_FIELDS_AND_BINDINGS = Collections.unmodifiableList(fields);
}
public static final String QUERY_UPDATE_SCAN_FILE_BY_ID = StringUtils.join(new String[] {
"UPDATE ", TABLE_NAME, "SET",
StringUtils.join(UPDATE_STATEMENT_FIELDS_AND_BINDINGS.iterator(), ", "),
"WHERE",
"id = ?", // 7
}, " ");
public static boolean updateScanFile(DB db, ScanFile sf) throws SQLException {
Connection conn = db.getConn();
try (PreparedStatement stmt = conn.prepareStatement(QUERY_UPDATE_SCAN_FILE_BY_ID)) {
bindInsertOrUpdateParameters(stmt, sf);
stmt.setInt(INSERT_UPDATE_FIELDS.size() + 1, sf.getId());
return stmt.executeUpdate() > 0;
}
}
// Parsing/loading
protected enum SCAN_NAME_COMPONENT {
PLATE_NAME, // Optional, must use this or barcode
PLATE_BARCODE, // Optional, must use this or name
ROW, // Required
COLUMN, // Required
DATE, // Required
MODE, // Optional, default is POSITIVE
SCAN_PART, // Optional, default is 1 (primary component); mzML files are assumed to contain all components
FILE_TYPE, // Required
}
protected static final List<Pair<Pattern, Map<SCAN_NAME_COMPONENT, Integer>>> NAME_EXTRACTION_PATTERNS =
new ArrayList<Pair<Pattern, Map<SCAN_NAME_COMPONENT, Integer>>> () {{
add(Pair.of(
Pattern.compile("^Plate(\\d+)_([A-Z]+)(\\d+)_(\\d{8})(?:_(Pos|Neg))?(\\d{2})?.(nc|mzML|raw)$"),
new HashMap<SCAN_NAME_COMPONENT, Integer>() {{
put(SCAN_NAME_COMPONENT.PLATE_BARCODE, 1);
put(SCAN_NAME_COMPONENT.ROW, 2);
put(SCAN_NAME_COMPONENT.COLUMN, 3);
put(SCAN_NAME_COMPONENT.DATE, 4);
put(SCAN_NAME_COMPONENT.MODE, 5);
put(SCAN_NAME_COMPONENT.SCAN_PART, 6);
put(SCAN_NAME_COMPONENT.FILE_TYPE, 7);
}}
));
add(Pair.of(
Pattern.compile("^Plate(\\d+)_([A-Z]+)(\\d+)_(\\d+)_Full(\\d{2})?.(nc|mzML|raw)$"),
new HashMap<SCAN_NAME_COMPONENT, Integer>() {{
put(SCAN_NAME_COMPONENT.PLATE_BARCODE, 1);
put(SCAN_NAME_COMPONENT.ROW, 2);
put(SCAN_NAME_COMPONENT.COLUMN, 3);
put(SCAN_NAME_COMPONENT.DATE, 4);
put(SCAN_NAME_COMPONENT.SCAN_PART, 5);
put(SCAN_NAME_COMPONENT.FILE_TYPE, 6);
}}
));
add(Pair.of( // Matches munged Jaffna vial plate format.
Pattern.compile("^Plate_(\\w+)_([A-Z]+)(\\d+)_(\\d{8})(?:_(Pos|Neg))?(\\d{2})?.(nc|mzML|raw)"),
new HashMap<SCAN_NAME_COMPONENT, Integer>() {{
put(SCAN_NAME_COMPONENT.PLATE_BARCODE, 1);
put(SCAN_NAME_COMPONENT.ROW, 2);
put(SCAN_NAME_COMPONENT.COLUMN, 3);
put(SCAN_NAME_COMPONENT.DATE, 4);
put(SCAN_NAME_COMPONENT.MODE, 5);
put(SCAN_NAME_COMPONENT.SCAN_PART, 6);
put(SCAN_NAME_COMPONENT.FILE_TYPE, 7);
}}
));
add(Pair.of(
Pattern.compile("^(\\w+)_([A-Z]+)(\\d+)_(\\d{8})(?:_(Pos|Neg))?(\\d{2})?.(nc|mzML|raw)"),
new HashMap<SCAN_NAME_COMPONENT, Integer>() {{
put(SCAN_NAME_COMPONENT.PLATE_NAME, 1);
put(SCAN_NAME_COMPONENT.ROW, 2);
put(SCAN_NAME_COMPONENT.COLUMN, 3);
put(SCAN_NAME_COMPONENT.DATE, 4);
put(SCAN_NAME_COMPONENT.MODE, 5);
put(SCAN_NAME_COMPONENT.SCAN_PART, 6);
put(SCAN_NAME_COMPONENT.FILE_TYPE, 7);
}}
));
}};
public static final Integer LCMS_MAIN_SCAN_PART = 1;
public static List<Pair<Integer, DB.OPERATION_PERFORMED>> insertOrUpdateScanFilesInDirectory(DB db, File directory)
throws SQLException, IOException {
if (directory == null || !directory.isDirectory()) {
throw new RuntimeException(
String.format("Scan files directory at %s is not a directory",
directory == null ? null : directory.getAbsolutePath()));
}
List<Pair<Integer, DB.OPERATION_PERFORMED>> results = new ArrayList<>();
File[] contentsArr = directory.listFiles();
if (contentsArr == null || contentsArr.length == 0) {
System.err.format("WARNING: no files found in directory %s", directory.getAbsolutePath());
return null;
}
List<File> contents = Arrays.asList(contentsArr);
Collections.sort(contents, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
return o1.getName().compareTo(o2.getName());
}
});
for (File f : contents) {
for (Pair<Pattern, Map<SCAN_NAME_COMPONENT, Integer>> scan : NAME_EXTRACTION_PATTERNS) {
Pattern p = scan.getLeft();
Map<SCAN_NAME_COMPONENT, Integer> groupMap = scan.getRight();
Matcher m = p.matcher(f.getName());
if (m.matches()) {
if (groupMap.containsKey(SCAN_NAME_COMPONENT.SCAN_PART)) {
String scanPartStr = m.group(groupMap.get(SCAN_NAME_COMPONENT.SCAN_PART));
if (scanPartStr != null && !scanPartStr.isEmpty()) {
Integer scanPart = Integer.parseInt(scanPartStr);
if (!LCMS_MAIN_SCAN_PART.equals(scanPart)) {
break;
}
}
}
Plate plate;
Integer plateId = null;
if (f.getName().startsWith("STD_MEOH")) {
// The toffeStandard plate doesn't follow the usual naming convention, so we fake it here.
plate = Plate.getPlateByBarcode(db, "toffeeStandards");
} else if (groupMap.containsKey(SCAN_NAME_COMPONENT.PLATE_BARCODE)) {
plate = Plate.getPlateByBarcode(db, m.group((groupMap.get(SCAN_NAME_COMPONENT.PLATE_BARCODE))));
} else if (groupMap.containsKey(SCAN_NAME_COMPONENT.PLATE_NAME)) {
plate = Plate.getPlateByName(db, m.group((groupMap.get(SCAN_NAME_COMPONENT.PLATE_NAME))));
} else {
// The occurrence of this exception represents a developer oversight.
throw new RuntimeException(String.format("No plate identifier available for pattern %s", p));
}
if (plate == null) {
System.err.format("WARNING: unable to find plate for scan file %s\n", f.getName());
} else {
plateId = plate.getId();
}
Integer plateRow = null, plateColumn = null;
if (groupMap.containsKey(SCAN_NAME_COMPONENT.ROW)) {
String plateRowStr = m.group(groupMap.get(SCAN_NAME_COMPONENT.ROW));
if (plateRowStr != null && !plateRowStr.isEmpty()) {
if (plateRowStr.length() > 1) {
// TODO: handle larger plates?
throw new RuntimeException(String.format("Unable to handle multi-character plate row %s for scan %s",
plateRowStr, f.getName()));
}
plateRow = plateRowStr.charAt(0) - 'A';
}
}
if (groupMap.containsKey(SCAN_NAME_COMPONENT.COLUMN)) {
String plateColumnStr = m.group(groupMap.get(SCAN_NAME_COMPONENT.COLUMN));
if (plateColumnStr != null && !plateColumnStr.isEmpty()) {
plateColumn = Integer.parseInt(plateColumnStr) - 1; // Wells are one-indexed.
}
}
SCAN_MODE scanMode = SCAN_MODE.POS; // Assume positive scans by default.
if (groupMap.containsKey(SCAN_NAME_COMPONENT.MODE)) {
String scanModeStr = m.group(groupMap.get(SCAN_NAME_COMPONENT.MODE));
if (scanModeStr != null && !scanModeStr.isEmpty()) {
scanMode = SCAN_MODE.valueOf(scanModeStr.toUpperCase());
}
}
SCAN_FILE_TYPE fileType = null;
if (groupMap.containsKey(SCAN_NAME_COMPONENT.FILE_TYPE)) {
String fileTypeStr = m.group(groupMap.get(SCAN_NAME_COMPONENT.FILE_TYPE));
if (fileTypeStr != null && !fileTypeStr.isEmpty()) {
fileType = SCAN_FILE_TYPE.valueOf(fileTypeStr.toUpperCase());
}
}
ScanFile scanFile = getScanFileByFilename(db, f.getName());
DB.OPERATION_PERFORMED op;
if (scanFile == null) {
scanFile = insertScanFile(db, f.getName(), scanMode, fileType, plateId, plateRow, plateColumn);
op = DB.OPERATION_PERFORMED.CREATE;
} else {
scanFile.setFilename(f.getName());
scanFile.setMode(scanMode);
scanFile.setFileType(fileType);
scanFile.setPlateId(plateId);
scanFile.setPlateRow(plateRow);
scanFile.setPlateColumn(plateColumn);
updateScanFile(db, scanFile);
op = DB.OPERATION_PERFORMED.UPDATE;
}
// Should only be null if we can't insert the scanFile into the DB for some reason.
if (scanFile == null) {
results.add(Pair.of((Integer)null, op));
} else {
results.add(Pair.of(scanFile.getId(), op));
}
break;
}
}
}
return results;
}
private Integer id;
private String fileName;
private SCAN_MODE mode;
private SCAN_FILE_TYPE fileType;
private Integer plateId;
private Integer plateRow;
private Integer plateColumn;
private ScanFile() { }
protected ScanFile(Integer id, String filename, SCAN_MODE mode, SCAN_FILE_TYPE fileType,
Integer plateId, Integer plateRow, Integer plateColumn) {
this.id = id;
this.fileName = filename;
this.mode = mode;
this.fileType = fileType;
this.plateId = plateId;
this.plateRow = plateRow;
this.plateColumn = plateColumn;
}
public Integer getId() {
return id;
}
public String getFilename() {
return fileName;
}
public void setFilename(String filename) {
this.fileName = filename;
}
public SCAN_MODE getMode() {
return mode;
}
public void setMode(SCAN_MODE mode) {
this.mode = mode;
}
public SCAN_FILE_TYPE getFileType() {
return fileType;
}
public void setFileType(SCAN_FILE_TYPE fileType) {
this.fileType = fileType;
}
public Integer getPlateId() {
return plateId;
}
public void setPlateId(Integer plateId) {
this.plateId = plateId;
}
public Integer getPlateRow() {
return plateRow;
}
public void setPlateRow(Integer plateRow) {
this.plateRow = plateRow;
}
public Integer getPlateColumn() {
return plateColumn;
}
public void setPlateColumn(Integer plateColumn) {
this.plateColumn = plateColumn;
}
/**
* This function check if the scan file is of negative scan mode.
* @return Returns true if the scan is of negative mode
*/
public Boolean isNegativeScanFile() throws Exception {
Integer endIndex = this.fileName.indexOf(".");
Integer lengthOfSearchDistance = "Neg01".length();
if (endIndex < lengthOfSearchDistance) {
throw new RuntimeException("File name is not long enough to check for scan file type");
}
return this.fileName.toLowerCase().substring(endIndex - lengthOfSearchDistance, endIndex).contains("neg");
}
/**
* This function parses the date from a given scan file's name.
* @return a local date time
*/
public LocalDateTime getDateFromScanFileTitle() throws Exception {
for (Pair<Pattern, Map<SCAN_NAME_COMPONENT, Integer>> scan : NAME_EXTRACTION_PATTERNS) {
Pattern p = scan.getLeft();
Map<SCAN_NAME_COMPONENT, Integer> groupMap = scan.getRight();
Matcher m = p.matcher(this.fileName);
if (m.matches() && groupMap.containsKey(SCAN_NAME_COMPONENT.DATE)) {
DateTimeFormatter formatter = DateTimeFormat.forPattern(DATE_FORMAT);
LocalDateTime dateTime = LocalDateTime.parse(m.group(groupMap.get(SCAN_NAME_COMPONENT.DATE)), formatter);
if (dateTime.getYear() < TWENTYN_INCEPTION) {
throw new RuntimeException("The date parsed from the file name is malformed.");
}
return dateTime;
}
}
// We assume a date will appear in every file name format.
throw new RuntimeException(String.format("Unable to extract date from scan file name: %s", this.fileName));
}
}