/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.metabologna.commandline;
import org.fhcrc.cpl.toolbox.commandline.arguments.*;
import org.fhcrc.cpl.toolbox.filehandler.TabLoader;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModule;
import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl;
import org.apache.log4j.Logger;
import java.io.*;
import java.util.*;
/**
* This is an unwieldy jumble of code to help merge together two spreadsheets based on shared values in the
* 'mergecolumn' column. In several different ways.
*/
public class CombineMS1MS2MetabCLM extends BaseViewerCommandLineModuleImpl
implements CommandLineModule
{
protected static Logger _log = Logger.getLogger(CombineMS1MS2MetabCLM.class);
protected File ms1File;
protected File ms2File;
protected File outFile;
protected boolean shouldCollapseByMax = false;
public CombineMS1MS2MetabCLM()
{
init();
}
protected void init()
{
mCommandName = "metabms1ms2";
mShortDescription = "Combine MS1 and MS2 metabolite ID info";
mHelpMessage = mShortDescription;
CommandLineArgumentDefinition[] argDefs =
{
new FileToReadArgumentDefinition("ms1",true,"input MS1 spreadsheet"),
new FileToReadArgumentDefinition("ms2",true,"input MS1 spreadsheet"),
new FileToWriteArgumentDefinition("out",true,"output file"),
};
addArgumentDefinitions(argDefs);
}
public void assignArgumentValues()
throws ArgumentValidationException
{
ms1File = getFileArgumentValue("ms1");
ms2File = getFileArgumentValue("ms2");
outFile = getFileArgumentValue("out");
}
public void execute() throws CommandLineModuleExecutionException
{
Map<Integer,List<Map<String,Object>>> ms1RowMap = loadRowsByScan(ms1File,"MS2Scans");
Map<Integer,List<Map<String,Object>>> ms2RowMap = loadRowsByScan(ms2File,"scan");
PrintWriter outPW = null;
try
{
outPW = new PrintWriter(outFile);
}
catch (IOException e)
{
throw new CommandLineModuleExecutionException("Failed to load file " + outFile.getAbsolutePath());
}
int scansInCommon = 0;
outPW.println("scan\tid");
List<Integer> agreedScans = new ArrayList<Integer>();
List<Float> ms1Masses = new ArrayList<Float>();
List<Float> massDiffs = new ArrayList<Float>();
for (int scan : ms1RowMap.keySet())
{
if (ms2RowMap.containsKey(scan))
{
List<Map<String,Object>> ms1Rows = ms1RowMap.get(scan);
List<Map<String,Object>> ms2Rows = ms2RowMap.get(scan);
scansInCommon++;
float ms1Mass = 0;
float ms2Mass = 0;
System.err.println("*****" + scan);
Set<String> ms1IDs = new HashSet<String>();
for (Map<String,Object> rows : ms1Rows)
{
System.err.println("\t" + rows.get("Compound"));
ms1IDs.add(stripOuterQuotes((String) rows.get("Compound")));
ms1Mass = Float.parseFloat(rows.get("FeatureMass").toString());
}
Set<String> ms2IDs = new HashSet<String>();
for (Map<String,Object> rows : ms2Rows)
{
System.err.println("\t\t" + rows.get("Metlin ID Name"));
ms2IDs.add(stripOuterQuotes((String) rows.get("Metlin ID Name")));
ms2Mass = Float.parseFloat(rows.get("Metlin Mass").toString());
}
ms1Masses.add(ms1Mass);
massDiffs.add(ms1Mass - ms2Mass);
Set<String> intersectionIds = new HashSet<String>(ms1IDs);
intersectionIds.retainAll(ms2IDs);
if (!intersectionIds.isEmpty())
{
agreedScans.add(scan);
for (String id : intersectionIds)
outPW.println(scan + "\t" + id);
outPW.flush();
}
}
}
outPW.close();
System.err.println(scansInCommon + " scans in common. Of those, " + agreedScans.size() + " agreed");
new PanelWithScatterPlot(ms1Masses, massDiffs, "MS1 vs Mass Diff").displayInTab();
}
protected String stripOuterQuotes(String input)
{
if (input.charAt(0) == '"' && input.charAt(input.length()-1) == '"')
return input.substring(1, input.length()-1);
return input;
}
protected Map<Integer,List<Map<String,Object>>> loadRowsByScan(File file, String scanColName)
throws CommandLineModuleExecutionException
{
ApplicationContext.infoMessage("Loading info from file " + file.getAbsolutePath());
try
{
TabLoader loader = new TabLoader(new FileReader(file),true);
//for (TabLoader.ColumnDescriptor col : loader.getColumns()) System.err.println("\t" + col.name);
Map<Integer,List<Map<String,Object>>> result = new HashMap<Integer,List<Map<String,Object>>>();
for (Map row : (Map[]) loader.load())
{
Object scanObj = row.get(scanColName);
if (scanObj == null)
continue;
List<Integer> scans = new ArrayList<Integer>();
if (scanObj.getClass().isAssignableFrom(String.class))
scans.addAll(MS2ExtraInfoDef.parseIntListString((String) scanObj));
else
scans.add((Integer) scanObj);
for (int scan : scans)
{
System.err.println(scan);
List<Map<String,Object>> rowsThisScan = result.get(scan);
if (rowsThisScan == null)
{
rowsThisScan = new ArrayList<Map<String, Object>>();
result.put(scan, rowsThisScan);
}
rowsThisScan.add(row);
}
}
ApplicationContext.setMessage("Loaded " + result.size() + " scans");
return result;
}
catch (IOException e)
{
throw new CommandLineModuleExecutionException("Failed to load file " + file.getAbsolutePath(),e);
}
}
}