package org.ensembl.healthcheck.testcase.funcgen;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.generic.ComparePreviousVersionBase;
abstract public class ComparePreviousVersionProbeFeaturesFromProbeSetsByArrayBase extends ComparePreviousVersionBase{
public ComparePreviousVersionProbeFeaturesFromProbeSetsByArrayBase() {
setTeamResponsible(Team.FUNCGEN);
}
protected Map getNormalisedProbePerArrayCounts(DatabaseRegistryEntry dbre) {
Map<String, Integer> arrayNameToAverageProbeSetSize = getAverageProbeSetSizePerArrayCounts(dbre);
Map<String, Integer> rawProbeFeaturePerArrayCounts = getRawProbeFeaturePerArrayCounts(dbre);
// Normalise the counts for arrays with probe sets
// Arrays that don't have probe sets will be skipped.
Map<String, Integer> normalisedResult = new HashMap<String, Integer>();
Iterator<String> iterator = rawProbeFeaturePerArrayCounts.keySet().iterator();
while (iterator.hasNext()) {
String currentArray = iterator.next();
int normalisedProbeFeatureCount;
if (arrayNameToAverageProbeSetSize.containsKey(currentArray)) {
normalisedProbeFeatureCount = rawProbeFeaturePerArrayCounts.get(currentArray) / arrayNameToAverageProbeSetSize.get(currentArray);
normalisedResult.put(currentArray, normalisedProbeFeatureCount);
}
}
return normalisedResult;
}
@Override
protected Map getCounts(DatabaseRegistryEntry dbre) {
return getNormalisedProbePerArrayCounts(dbre);
}
protected Map<String, Integer> getRawProbeFeaturePerArrayCounts(DatabaseRegistryEntry dbre) {
String sql = "select array.name, count(distinct probe_feature.probe_feature_id) from array join array_chip using (array_id) join probe using (array_chip_id) join probe_feature using (probe_id) join analysis using (analysis_id) where analysis.logic_name like \"%transcript%\" group by analysis.logic_name, array.name";
return getCountsBySQL(dbre, sql);
}
protected Map<String, Integer> getAverageProbeSetSizePerArrayCounts(DatabaseRegistryEntry dbre) {
// This creates a map from all arrays that are organised into probe
// sets to the average number of probes per probe sets.
//
// This will be used to normalise the total counts.
//
// Affymetrix uses probe sets, but calls them probes.
//
// So what is called a "probe set" in Ensembl is actually a probe for
// Affymetrix. In order to make the numbers reported by this test
// comparable to the manufacturer's data sheet, the numbers reported are
// divided by the number of probes per probe set for arrays that are
// organised into probe sets.
//
String sql =
"select "
+ " array.name, "
+ " count(distinct probe_id)/count(distinct probe_set_id) "
+ "from "
+ " probe_set join probe using (probe_set_id) join array_chip on (array_chip.array_chip_id = probe.array_chip_id) join array using (array_id) "
+ "group by "
+ " array.name, array.vendor";
Map<String, Integer> arrayNameToAverageProbeSetSize = getCountsBySQL(dbre, sql);
return arrayNameToAverageProbeSetSize;
}
@Override
protected double threshold() {
return 1;
}
@Override
protected boolean testUpperThreshold(){
return true;
}
@Override
protected double minimum() {
return 0;
}
}