/******************************************************************************* * Copyright 2014 Virginia Polytechnic Institute and State University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package edu.vt.vbi.patric.common; import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import edu.vt.vbi.patric.dao.DBSearch; import edu.vt.vbi.patric.dao.ResultType; //NOTES: //1. In allowing getter/setter methods to add new headers/fields, it opens up the // possibility of mismatching header and fields arrays (ie, one longer than the other) //2. This class knows whether it's using HSSF or XSSF by which of wb or xwb are null //OPTIONS INFORMATION //-Boarder Options: // see createBorderStyle for options //-Alternating row color // 0 = not alternating // 1 = alternating //-Empty cells // 0 = don't highlight // 1 = highlight //TABLE OF CONTENTS: //1. Global variables //2. Constructors //3. Primary Methods //4. Helper Methods //5. Getters/setters //6. Methods for testing @SuppressWarnings("unchecked") public class ExpressionDataFileReader { // 1. Global variables // ------------------------------------------------------------------- // Excel Workbooks private Workbook wb; private XSSFWorkbook xwb; private XSSFSheet xsheet; private Sheet sheet; private String finalDataUrlString; private String finalSampleUrlString; private boolean samplefileThere; private String dataFormat; private String datafileType; private String separator; private String orientation; private String idType; private String samplefileType; private int countGeneIDs = 0; private int countSamples = 0; private JSONArray expression, gene, sample, snapshot_array; private ArrayList<String> sampleIDs = new ArrayList<String>(); private JSONObject mapping; private JSONObject sample_order_list; private final String samplePreTag = "S"; private final int snapshot_size = 30; public final static String CONTENT_EXPRESSION = "expression"; public final static String CONTENT_SAMPLE = "sample"; public final static String CONTENT_MAPPING = "mapping"; private String collectionID; private final HashMap<String, String> idTypes = new HashMap<String, String>(); // Constructor // ----------------------------------------------------------------------- /** * This constructor is used when you're reading in a file instead of generating one */ public ExpressionDataFileReader(JSONObject config) { samplefileThere = Boolean.parseBoolean(config.get("sampleFilePresent").toString()); finalDataUrlString = (String) config.get("dataURL"); finalSampleUrlString = (String) config.get("sampleURL"); datafileType = (String) config.get("dataFileType"); // xls, xlsx or txt samplefileType = (String) config.get("sampleFileType"); // xls, xlsx or // txt dataFormat = (String) config.get("dataFileFormat"); // matrix or list orientation = (String) config.get("dataFileOrientation"); // gvs or svg (optional) idType = (String) config.get("idMappingType"); // refseq etc (optional) collectionID = config.get("collectionID").toString(); // mandatory sample = new JSONArray(); gene = new JSONArray(); expression = new JSONArray(); snapshot_array = new JSONArray(); mapping = new JSONObject(); idTypes.put("refseq_source_id", "Refseq Locus Tag"); idTypes.put("source_id", "PATRIC Locus Tag"); } public boolean doRead() throws IOException { boolean sample_success = false; InputStream inp; InputStreamReader stream = null; BufferedReader reader = null; /* * If sample file is provided */ if (samplefileThere) { inp = getInputStreamReader(finalSampleUrlString); if (samplefileType.equals("xls") || samplefileType.equals("xlsx")) { try { sample_success = readExcelFormat(inp, "sample"); } catch (InvalidFormatException e) { e.printStackTrace(); } } else if (samplefileType.equals("txt") || samplefileType.equals("csv")) { separator = (samplefileType.equals("txt")) ? "\t" : ","; stream = new InputStreamReader(inp); reader = new BufferedReader(stream); sample_success = readTXTSampleFile(reader); } if (this.sample.size() > 0) { countSamples = sample.size(); } } else { sample_success = true; } boolean data_success = false; if (sample_success) { try { inp = getInputStreamReader(finalDataUrlString); } catch (MalformedURLException ex) { return false; } if (datafileType.equals("xls") || datafileType.equals("xlsx")) { try { data_success = readExcelFormat(inp, "data"); } catch (InvalidFormatException e) { e.printStackTrace(); } } else if (datafileType.equals("txt") || datafileType.equals("csv")) { separator = (datafileType.equals("txt")) ? "\t" : ","; stream = new InputStreamReader(inp); reader = new BufferedReader(stream); data_success = readTXTDataFile(reader); } if (this.expression.size() > 0) { countGeneIDs = gene.size(); } if (this.sample.size() > 0) { countSamples = sample.size(); } } return sample_success && data_success; } public InputStream getInputStreamReader(String path) throws MalformedURLException { InputStream inp = null; System.out.println(path); try { URL url = new URL(path); URLConnection connection = url.openConnection(); inp = connection.getInputStream(); } catch (MalformedURLException mfuex) { throw mfuex; } catch (IOException e) { e.printStackTrace(); } return inp; } /** * Reads in an Excel file via inputstream * * @param inp InputStream * @param input - reading whether sample or gene file * @throws IOException * @throws InvalidFormatException */ public boolean readExcelFormat(InputStream inp, String input) throws InvalidFormatException, IOException { Iterator<Row> rowIter = null; if (samplefileThere && input.equals("sample")) { // sample file if (samplefileType.equals("xlsx")) { xwb = (XSSFWorkbook) WorkbookFactory.create(inp); xsheet = xwb.getSheetAt(0); rowIter = xsheet.rowIterator(); } else if (samplefileType.equals("xls")) { wb = WorkbookFactory.create(inp); sheet = wb.getSheetAt(0); rowIter = sheet.rowIterator(); } return readXLSSampleFile(rowIter); } else { // data file if (datafileType.equals("xlsx")) { xwb = (XSSFWorkbook) WorkbookFactory.create(inp); xsheet = xwb.getSheetAt(0); rowIter = xsheet.rowIterator(); } else if (datafileType.equals("xls")) { wb = WorkbookFactory.create(inp); sheet = wb.getSheetAt(0); rowIter = sheet.rowIterator(); } return readXLSDataFile(rowIter); } } /** * Reads in a file via BufferedReader Reads only Data Text File * * @param in BufferedReader input * @return true/false for success/failure */ public boolean readTXTDataFile(BufferedReader in) { try { String strLine = null; int rowCount = 0; while ((strLine = in.readLine()) != null && strLine != "") { String snapshot = ""; String[] separated = strLine.split(separator); if (separated != null && separated[0] != null && separated[0].length() > 0 && (separated[0].charAt(0) == '!' || separated[0].charAt(0) == '#')) { continue; } if (dataFormat.equals("list")) { JSONObject a = new JSONObject(); a.put("exp_locus_tag", separated[0].trim()); if (getGene(separated[0]) == null) { gene.add(a); } /* * If the data file is in list format correctly, First column is gene separated[0], Second column is sample user given id * separated[1], and Third column is expression value separated[2] */ String pid = AddToSampleJSONArray(separated[1].trim()); a.put("pid", pid); a.put("log_ratio", getFloatValue(separated[2].trim())); expression.add(a); snapshot = strLine; } else { if (orientation.equals("svg")) { if (separated[0] == null || separated[0].trim().equals("")) { continue; } for (int i = 1; i < separated.length; i++) { JSONObject a; if (rowCount == 0) { // process header a = new JSONObject(); a.put("sampleUserGivenId", separated[i].trim()); // If the sample file is not provided if (!samplefileThere) { a.put("pid", collectionID + samplePreTag + (i - 1)); a.put("expname", separated[i].trim()); sample.add(a); } sampleIDs.add(a.get("sampleUserGivenId").toString()); snapshot += "\t" + separated[i].trim(); } else { // process data cells if (separated[i] != null && !separated[i].trim().equals("")) { if (i == 1) { a = new JSONObject(); a.put("exp_locus_tag", separated[0].trim()); gene.add(a); snapshot = separated[0].trim(); } JSONObject s = getSample(sampleIDs.get(i-1)); String myCell = getFloatValue(separated[i].trim()); a = new JSONObject(); a.put("exp_locus_tag", separated[0].trim()); a.put("pid", s.get("pid").toString()); a.put("log_ratio", myCell); expression.add(a); snapshot += "\t" + myCell; } else { if (i == 1) { a = new JSONObject(); a.put("exp_locus_tag", separated[0].trim()); gene.add(a); snapshot = separated[0].trim() + "\t"; } else { snapshot += "\t"; } } } } } else if (orientation.equals("gvs")) { // For gvs matrix format, the first line is always genes count is used to track line numbers if (rowCount == 0) { for (int i = 1; i < separated.length; i++) { JSONObject a = new JSONObject(); a.put("exp_locus_tag", separated[i].trim()); gene.add(a); } } else { for (int i = 1; i < separated.length; i++) { JSONObject a = new JSONObject(); a.put("log_ratio", getFloatValue(separated[i].trim())); // If sample file is not provided The first column is always sample names JSONObject b; String pid = ""; if (i - 1 == 0) { b = new JSONObject(); pid = collectionID + samplePreTag + rowCount; b.put("pid", pid); a.put("expname", separated[i]); b.put("sampleUserGivenId", separated[i].trim()); if (!samplefileThere) { sample.add(b); } } a.put("pid", pid); b = (JSONObject) gene.get(i - 1); a.put("exp_locus_tag", b.get("exp_locus_tag")); expression.add(a); snapshot += "\t" + separated[i].trim(); } } } } // System.out.println(snapshot); // To provide user a snapshot of uploaded file. if (snapshot_array.size() < snapshot_size && snapshot.length() >= 1) { JSONObject snapshot_obj = new JSONObject(); snapshot_obj.put("line", snapshot); snapshot_array.add(snapshot_obj); } rowCount++; } } catch (IOException e) { System.out.println("File read Exception thrown. Uncomment in readFile(String fileName) to see stack trace."); return false; } return true; } /* * processData * * @param Iterator<Row> */ public boolean readXLSDataFile(Iterator<Row> rowIter) { Row myRow; Cell myCell; Iterator<Cell> cellIter; int rowCount = 0; int cellCount = 0; while (rowIter.hasNext()) { myRow = rowIter.next(); cellIter = myRow.cellIterator(); String snapshot = ""; if (dataFormat.equals("list")) { cellCount = 0; JSONObject geneA = new JSONObject(); while (cellIter.hasNext()) { myCell = cellIter.next(); if (IsCellNotNull(myCell)) { String strMyCell = myCell.toString().trim(); if (cellCount == 0) { geneA.put("exp_locus_tag", strMyCell); if (getGene(strMyCell) == null) gene.add(geneA); } else if (cellCount == 1) { String pid = AddToSampleJSONArray(strMyCell); geneA.put("pid", pid); } else if (cellCount == 2) { geneA.put("log_ratio", getFloatValue(strMyCell)); } // add to a snapshot if (cellCount == 0) { snapshot += strMyCell; } else if (cellCount == 2) { snapshot += "\t" + getFloatValue(strMyCell); } else { snapshot += "\t" + strMyCell; } } cellCount++; } expression.add(geneA); } else if (orientation.equals("svg")) { if (rowCount == 0) { // process header while (cellIter.hasNext()) { myCell = cellIter.next(); JSONObject th = new JSONObject(); if (cellCount > 0) { if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { th.put("sampleUserGivenId", String.format("%.0f", myCell.getNumericCellValue())); } else { th.put("sampleUserGivenId", myCell.toString().trim()); } if (!samplefileThere) { th.put("pid", collectionID + samplePreTag + cellCount); if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { th.put("expname", String.format("%.0f", myCell.getNumericCellValue())); } else { th.put("expname", myCell.toString().trim()); } sample.add(th); } sampleIDs.add(th.get("sampleUserGivenId").toString()); } // snapshot if (IsCellNotNull(myCell)) { if (cellCount == 0) { snapshot += myCell.toString(); } else { snapshot += "\t" + myCell.toString(); } } else { snapshot += "\t"; } cellCount++; } //System.out.println(sample.toJSONString()); } else { // process data cells JSONObject geneID = new JSONObject(); for (int i = 0; i < cellCount; i++) { JSONObject td = new JSONObject(); myCell = myRow.getCell(i); if (IsCellNotNull(myCell)) { String strMyCell = myCell.toString().trim(); if (i == 0) { geneID.put("exp_locus_tag", strMyCell); gene.add(geneID); snapshot += strMyCell; } else { JSONObject s = getSample(sampleIDs.get(i-1)); //System.out.println("i=" + i + ", " + s.get("sampleUserGivenId")); td.put("exp_locus_tag", geneID.get("exp_locus_tag")); td.put("pid", s.get("pid")); if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { td.put("log_ratio", myCell.getNumericCellValue()); strMyCell = String.valueOf(myCell.getNumericCellValue()); } else { td.put("log_ratio", 0.0d); strMyCell = "0.0"; } expression.add(td); snapshot += "\t" + strMyCell; } } else { if (i == 0) { break; } else { // convert to zero JSONObject s = getSample(sampleIDs.get(i-1)); //System.out.println("i=" + i + ", " + s.get("sampleUserGivenId")); td.put("exp_locus_tag", geneID.get("exp_locus_tag")); td.put("pid", s.get("pid")); td.put("log_ratio", "0.0"); expression.add(td); snapshot += "\t0.0"; } } // System.out.println(td.toJSONString()); } // System.out.println(snapshot); } } /* * else if (orientation.equals("svg")) { int cellNullcounter = 0; while (cellIter.hasNext()) { myCell = cellIter.next(); if (rowCount == 0 * && cellCount > fcn) { // process header JSONObject b = new JSONObject(); if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { * b.put("sampleUserGivenId", String.format("%.0f", myCell.getNumericCellValue())); } else { b.put("sampleUserGivenId", * myCell.toString().trim()); } * * if (!samplefileThere) { b.put("pid", collectionID + samplePreTag + cellCount); * * if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { b.put("expname", String.format("%.0f", myCell.getNumericCellValue())); } else { * b.put("expname", myCell.toString().trim()); } sample.add(b); } samples_temp.add(b); } else if (rowCount > 0) { // process data * JSONObject geneA = new JSONObject(); * * if (cellCount == 0) { if (IsCellNotNull(myCell)) { geneA.put("exp_locus_tag", myCell.toString().trim()); gene.add(geneA); } else { * break; } } else { if (IsCellNotNull(myCell)) { JSONObject a = (JSONObject) gene.get(rowCount - 1); geneA.put("exp_locus_tag", * a.get("exp_locus_tag")); * * a = (JSONObject) samples_temp.get(cellCount - 1); a = getSample(a.get("sampleUserGivenId").toString().trim()); * * geneA.put("pid", a.get("pid")); geneA.put("log_ratio", getFloatValue(myCell.toString().trim())); expression.add(geneA); } else { * cellNullcounter += 1; if (cellNullcounter == samples_temp.size()) { gene.remove(gene.size() - 1); rowCount--; } } } } // add to * snapshot if (IsCellNotNull(myCell)) { if (cellCount == 0 && rowCount > 0 || cellCount > fcn && rowCount == 0) { if (cellCount > fcn) * snapshot += "\t"; snapshot += myCell.toString(); } else if (cellCount == 0 && rowCount == 0) snapshot += "Gene"; else if (cellCount > 0 * && rowCount > 0) snapshot += "\t" + getFloatValue(myCell.toString()); } else { snapshot += "\t"; } * * cellCount++; } } */ else if (orientation.equals("gvs")) { String sampleId = ""; cellCount = 0; while (cellIter.hasNext()) { JSONObject geneA = new JSONObject(); myCell = cellIter.next(); if (rowCount == 0) { JSONObject a = new JSONObject(); a.put("exp_locus_tag", myCell.toString().trim()); gene.add(a); } else if (rowCount > 0) { if (cellCount == 0) { String pid = ""; JSONObject b; if (!samplefileThere) { b = new JSONObject(); pid = collectionID + samplePreTag + (rowCount - 1); b.put("pid", pid); b.put("expname", myCell.toString().trim()); b.put("sampleUserGivenId", myCell.toString().trim()); sample.add(b); } else { b = getSample(myCell.toString().trim()); pid = b.get("pid").toString(); } sampleId = pid; } else { JSONObject a = (JSONObject) gene.get(cellCount - 1); geneA.put("exp_locus_tag", a.get("exp_locus_tag")); geneA.put("pid", sampleId); geneA.put("log_ratio", getFloatValue(myCell.toString().trim())); expression.add(geneA); } } snapshot += "\t" + myCell.toString().trim(); cellCount++; } } if (snapshot_array.size() < snapshot_size && snapshot.length() >= 1) { JSONObject snapshot_obj = new JSONObject(); snapshot_obj.put("line", snapshot); snapshot_array.add(snapshot_obj); } rowCount++; } return true; } public boolean IsCellNotNull(Cell cell) { return cell != null && (cell.toString().trim().equals("") == false); } public String getFloatValue(String number) { String op; String[] n; if (number.contains("e")) { n = number.split("e"); op = n[1].substring(0, 1); number = n[0]; if (op.equals("+")) { number = String.valueOf(1000 * Double.parseDouble(number)); } else { number = String.valueOf(0.0001 * Double.parseDouble(number)); } } String strFloatValue = ""; try { double a = Double.parseDouble(number); a = Math.round(a * 1000) / (double) 1000; strFloatValue = String.valueOf(a); } catch (NumberFormatException e) { strFloatValue = String.valueOf(0.0d); } return strFloatValue; } /* * If sample file is provided and it is in xls/xlsx format. processSample * * @param Iterator<Row> */ public boolean readXLSSampleFile(Iterator<Row> rowIter) { Row myRow; Cell myCell; Iterator<Cell> cellIter; int rowCount = 0; int columnCount = 0; sample_order_list = new JSONObject(); while (rowIter.hasNext()) { myRow = rowIter.next(); if (rowCount == 0) { // process header line cellIter = myRow.cellIterator(); while (cellIter.hasNext()) { myCell = cellIter.next(); String lcase = ""; if (IsCellNotNull(myCell) && myCell.getCellType() != Cell.CELL_TYPE_NUMERIC) { lcase = myCell.toString().trim().toLowerCase(); } if (lcase.equals("pid") || lcase.equals("comparison id")) { sample_order_list.put(columnCount, "pid"); } else if (lcase.equals("accession")) { sample_order_list.put(columnCount, "accession"); } else if (lcase.equals("title")) { sample_order_list.put(columnCount, "expname"); } else if (lcase.equals("pubmed")) { sample_order_list.put(columnCount, "pubmed"); } else if (lcase.equals("organism")) { sample_order_list.put(columnCount, "organism"); } else if (lcase.equals("strain")) { sample_order_list.put(columnCount, "strain"); } else if (lcase.equals("gene modification") || lcase.equals("mutant")) { sample_order_list.put(columnCount, "mutant"); } else if (lcase.equals("experiment condition")) { sample_order_list.put(columnCount, "condition"); } else if (lcase.equals("time point") || lcase.equals("timepoint")) { sample_order_list.put(columnCount, "timepoint"); } columnCount++; } } else { // process data line JSONObject a = new JSONObject(); for (int i = 0; i < columnCount; i++) { myCell = myRow.getCell(i); if (IsCellNotNull(myCell)) { if (sample_order_list.get(i) != null) { if (sample_order_list.get(i).equals("comparison id") || sample_order_list.get(i).equals("pid")) { if (myCell.getCellType() == Cell.CELL_TYPE_NUMERIC) { a.put("sampleUserGivenId", String.format("%.0f", myCell.getNumericCellValue())); } else { a.put("sampleUserGivenId", myCell.toString().trim()); } a.put("pid", collectionID + samplePreTag + (rowCount - 1)); } else { a.put(sample_order_list.get(i), myCell.toString().trim()); } } } else { if (i == 0) { break; } if (sample_order_list.get(i) != null) { a.put(sample_order_list.get(i), ""); } } } if (a.isEmpty() == false) { sample.add(a); } } rowCount++; } // System.out.println("readingXSLSample: " + sample.toJSONString()); return true; } /* * If sample file is provided and it is in txt/csv format. processSample * * @param Iterator<Row> */ public boolean readTXTSampleFile(BufferedReader in) { boolean success = true; sample_order_list = new JSONObject(); try { String strLine = ""; int rowCount = 0; int columnCount = 0; while ((strLine = in.readLine()) != null && strLine.trim().equals("") == false) { String[] separated = strLine.split(separator); JSONObject a = new JSONObject(); if (rowCount == 0) { // process header for (int i = 0; i < separated.length; i++) { String lcase = (separated[i] != null && separated[i].trim().equals("") == false) ? separated[i].trim().toLowerCase() : ""; if (lcase.equals("pid") || lcase.equals("comparison id")) { sample_order_list.put(i, "pid"); } else if (lcase.equals("accession")) { sample_order_list.put(i, "accession"); } else if (lcase.equals("title")) { sample_order_list.put(i, "expname"); } else if (lcase.equals("pubmed")) { sample_order_list.put(i, "pubmed"); } else if (lcase.equals("organism")) { sample_order_list.put(i, "organism"); } else if (lcase.equals("strain")) { sample_order_list.put(i, "strain"); } else if (lcase.equals("gene modification") || lcase.equals("mutant")) { sample_order_list.put(i, "mutant"); } else if (lcase.equals("experiment condition")) { sample_order_list.put(i, "condition"); } else if (lcase.equals("time point") || lcase.equals("timepoint")) { sample_order_list.put(i, "timepoint"); } columnCount++; } } else if (rowCount > 0) { // process data // System.out.println("strLine-> "+strLine); // for (int i = 0; i < separated.length; i++) { for (int i = 0; i < columnCount; i++) { if (separated[i] != null && separated[i].trim().equals("") == false) { if (sample_order_list.get(i) != null) { if (sample_order_list.get(i).equals("pid") || sample_order_list.get(i).equals("comparison id")) { a.put("pid", collectionID + samplePreTag + (rowCount - 1)); a.put("sampleUserGivenId", separated[i]); } else { a.put(sample_order_list.get(i), separated[i].trim()); } } } else { if (i == 0) { break; } if (sample_order_list.get(i) != null) { a.put(sample_order_list.get(i), ""); } } } sample.add(a); } rowCount++; } } catch (IOException e) { System.out.println("File read Exception thrown. Uncomment in readSampleFile(String fileName) to see stack trace."); return false; } System.out.println("readSampleFile: " + sample.toJSONString()); return success; } /* * Calls DBSearch.getIDSearchResult function */ public boolean runIDMappingStatistics() { DBSearch db = new DBSearch(); HashMap<String, String> key = new HashMap<String, String>(); String idList = ((JSONObject) (gene.get(0))).get("exp_locus_tag").toString(); for (int i = 1; i < gene.size(); i++) { idList += "," + ((JSONObject) (gene.get(i))).get("exp_locus_tag").toString(); } key.put("keyword", idList); key.put("to", "PATRIC Locus Tag"); key.put("from", idTypes.get(idType)); ArrayList<ResultType> items = db.getTranscriptomicsIDSearchResult(key, 0, -1); mapping.put("mapped_ids", items.size()); mapping.put("unmapped_ids", gene.size() - items.size()); JSONObject results = new JSONObject(); for (int i = 0; i < items.size(); i++) { ResultType g = items.get(i); JSONObject obj = new JSONObject(); obj.putAll(g); results.put(obj.get(idType), obj); } JSONArray mapped_list = new JSONArray(); JSONArray unmapped_list = new JSONArray(); /* * This for loop is to create mapping.json The output is written to idmapping_stat JSONObject mapped_list -> JSONArray of JSONObjects * (exp_locus_tag & patric_locus_tag) unmapped_list -> JSONArray of JSONObjects(exp_locus_tag) */ for (int j = 0; j < gene.size(); j++) { JSONObject b = (JSONObject) gene.get(j); JSONObject a = (JSONObject) results.get(b.get("exp_locus_tag")); JSONObject c = new JSONObject(); if (a != null) { c.put("exp_locus_tag", a.get(idType)); c.put("na_feature_id", a.get("na_feature_id")); mapped_list.add(c); } else { c.put("exp_locus_tag", b.get("exp_locus_tag")); unmapped_list.add(c); } } mapping.put("mapped_list", mapped_list); mapping.put("unmapped_list", unmapped_list); /* * This for loop is to update expression.json The output is in gene_sample_list pid - exp_locus_tag - refseq_locus_tag (if from == PATRIC * Locus Tag) - log_ratio - na_fature_id */ JSONArray temp_list = new JSONArray(); for (int j = 0; j < expression.size(); j++) { JSONObject b = (JSONObject) expression.get(j); JSONObject a = (JSONObject) results.get(b.get("exp_locus_tag")); if (a != null) { JSONObject c = new JSONObject(); c.put("na_feature_id", a.get("na_feature_id")); c.put("exp_locus_tag", b.get("exp_locus_tag")); if (a.get("refseq_source_id") != null) { c.put("refseq_locus_tag", a.get("refseq_source_id")); } c.put("pid", b.get("pid")); c.put("log_ratio", b.get("log_ratio")); c.put("z_score", b.get("z_score")); temp_list.add(c); } } expression = temp_list; return true; } /* * This function calculated expmean, expstddev and z_score for each gene */ public void calculateExpStats() { HashMap<String, String> sample_values = new HashMap<String, String>(); StdStats stats = new StdStats(); JSONArray temp_sample = new JSONArray(); JSONArray temp_gene_sample_list = new JSONArray(); JSONObject temp_stat = new JSONObject(); for (int i = 0; i < expression.size(); i++) { JSONObject a = (JSONObject) expression.get(i); String temp = ""; String pid = a.get("pid").toString(); String log_ratio = a.get("log_ratio").toString(); if (sample_values.containsKey(pid)) { temp = sample_values.get(pid) + "," + log_ratio; } else { temp = log_ratio; } sample_values.put(pid, temp); } for (int i = 0; i < sample.size(); i++) { JSONObject z = (JSONObject) sample.get(i); String pid = z.get("pid").toString(); String[] a = sample_values.get(pid).toString().split(","); double[] b = new double[a.length]; int count = 0; for (int j = 0; j < a.length; j++) { b[j] = Double.parseDouble(a[j]); if (Math.abs(b[j]) >= 1.0) { count++; } } String mean = getFloatValue(Double.toString(stats.mean(b))); String stddev = getFloatValue(Double.toString(stats.stddev(b))); z.put("expmean", mean); z.put("expstddev", stddev); z.put("sig_log_ratio", count); z.put("sig_z_score", 0); z.put("genes", a.length); temp_stat.put(pid, z); } for (int i = 0; i < expression.size(); i++) { JSONObject a = (JSONObject) expression.get(i); String pid = a.get("pid").toString(); String log_ratio = a.get("log_ratio").toString(); JSONObject b = (JSONObject) temp_stat.get(pid); String expmean = b.get("expmean").toString(); String expstddev = b.get("expstddev").toString(); String z_score = Double.toString((Double.parseDouble(log_ratio) - Double.parseDouble(expmean)) / Double.parseDouble(expstddev)); a.put("z_score", z_score); int count = Integer.parseInt(b.get("sig_z_score").toString()); if (Double.parseDouble(z_score) >= 2) { b.put("sig_z_score", ++count); } else { b.put("sig_z_score", count); } temp_stat.put(pid, b); temp_gene_sample_list.add(a); } for (int i = 0; i < sample.size(); i++) { JSONObject a = (JSONObject) sample.get(i); String pid = a.get("pid").toString(); JSONObject b = (JSONObject) temp_stat.get(pid); temp_sample.add(b); } expression = temp_gene_sample_list; sample = temp_sample; } public void writeData(String type) { String temp_url = "/tmp/"; String id = "", content = ""; try { FileWriter fwrite; if (type.equals(CONTENT_SAMPLE)) { id = "sample.json"; content = CONTENT_SAMPLE; } else if (type.equals(CONTENT_EXPRESSION)) { id = "expression.json"; content = CONTENT_EXPRESSION; } else if (type.equals(CONTENT_MAPPING)) { id = "mapping.json"; content = CONTENT_MAPPING; } File file = new File(temp_url + id); if (file.createNewFile()) { fwrite = new FileWriter(file); fwrite.write(this.get(content).toString()); fwrite.flush(); fwrite.close(); } else { System.err.println("Error creating file"); } } catch (Exception e) { System.err.println("Error: Unable to Write " + e.getMessage()); } } /* * public void writeDataAsList() { try { FileWriter fwrite; File file = new File("/tmp/test_from_patric_list.txt"); * * if (file.createNewFile()) { System.out.println("File is created!"); fwrite = new FileWriter(file); String as = ""; * * for (int i = 0; i < expression.size(); i++) { JSONObject ao = (JSONObject) expression.get(i); as += ao.get("exp_locus_tag").toString() + "\t"; * as += getSampleReverse(ao.get("pid").toString()).get("sampleUserGivenId").toString() + "\t"; as += ao.get("log_ratio").toString(); as += "\n"; * } * * fwrite.write(as); fwrite.flush(); fwrite.close(); } else { System.out.print("Error creating file"); } } catch (Exception e) {// Catch exception * if any System.err.println("Error: Unable to Write " + e.getMessage()); } } */ public JSONObject get(String type) { JSONObject temp = new JSONObject(); if (type.equals(CONTENT_SAMPLE)) { temp.put(CONTENT_SAMPLE, sample); } else if (type.equals(CONTENT_EXPRESSION)) { temp.put(CONTENT_EXPRESSION, expression); } else if (type.equals(CONTENT_MAPPING)) { temp.put(CONTENT_MAPPING, mapping); } else { temp.put("snapshot", snapshot_array); } return temp; } public JSONObject getSample(String sampleUserGivenId) { JSONObject a; for (int i = 0; i < sample.size(); i++) { a = (JSONObject) sample.get(i); if (!sampleUserGivenId.equals("") && a.get("sampleUserGivenId").equals(sampleUserGivenId)) { return a; } } return null; } public JSONObject getSampleReverse(String pid) { JSONObject a; for (int i = 0; i < sample.size(); i++) { a = (JSONObject) sample.get(i); if (!pid.equals("") && a.get("pid").equals(pid)) { return a; } } return null; } public JSONObject getGene(String exp_locus_tag) { JSONObject a; for (int i = 0; i < gene.size(); i++) { a = (JSONObject) gene.get(i); if (!exp_locus_tag.equals("") && a.get("exp_locus_tag").equals(exp_locus_tag)) { return a; } } return null; } public String AddToSampleJSONArray(String data) { JSONObject b = getSample(data); String pid; if (!samplefileThere && b == null) { // Add samples to jSONArray b = new JSONObject(); pid = collectionID + samplePreTag + sample.size(); b.put("pid", pid); b.put("expname", data); b.put("sampleUserGivenId", data); sample.add(b); } else { // Get sample object from samples JSONArray pid = b.get("pid").toString(); } return pid; } public int getCountGeneIDs() { return countGeneIDs; } public int getCountSamples() { return countSamples; } }