SimUtils.java example

Explorer
HappyResearch-master
- happyresearch
  - src
    - main
      - java
        happy
        research
        cf
        AbstractCF.java
        CBF_mt.java
        ClassicCF_mt.java
        ClassicCF_t.java
        CogTrust_mt.java
        ConfigParams.java
        DT_Impute_mt.java
        DT_Impute_t.java
        DT_Impute_t1.java
        Dataset.java
        DatasetDist.java
        DatasetUtils.java
        DefaultCF.java
        DefaultCF_mt.java
        DefaultTrust_mt.java
        DefaultTrust_t.java
        GlobalAve_t.java
        GlobleAvg_mt.java
        HybridCT_mt.java
        HybridCT_t.java
        ITrust_mt.java
        ITrust_t.java
        KNRTrust_mt.java
        MATrust_mt.java
        MTx_mt.java
        MTx_t.java
        Measures.java
        Merge_mt.java
        Merge_t.java
        Merge_tj.java
        Performance.java
        PhyRating.java
        Prediction.java
        RN_mt.java
        RN_t.java
        Rating.java
        RatingUtils.java
        ReadResults.java
        RecSysCourse_mt.java
        RunCF.java
        SlopeOne.java
        SlopeOne_mt.java
        SlopeOne_t.java
        TCF_mt.java
        TCF_t.java
        TSF_mt.java
        TSF_t.java
        TTx_mt.java
        TTx_t.java
        Thread_t.java
        TrustAllPlus_mt.java
        TrustAllPlus_t.java
        TrustAll_mt.java
        TrustAll_t.java
        TrustRating.java
        TrustWalker_mt.java
        TrustWalker_t.java
        VirRating.java
        VirRatingsCF.java
        data
        CiaoCrawler.java
        CiaoDataset.java
        CiaoParser.java
        Crawler.java
        DatasetGenerator.java
        DoubanCrawler.java
        DoubanMovie.java
        GewaraCrawler.java
        GewaraMovie.java
        MTimeCrawler.java
        MTimeMovie.java
        pgp
        AbstractPGP.java
        CertificateType.java
        EnvironmentParams.java
        ModifiedPGP.java
        OurMethodPGP.java
        OurMethodWChain.java
        PGPNode.java
        Performance.java
        RunComparison.java
        TraditionalPGP.java
        TraditionalWChainPGP.java
        TrustChainPGP.java
        TrustChainWTresholdPGP.java
        TrustType.java
        tp
        DatasetUtils.java
        EPT.java
        ETAF.java
        TAF.java
        TrustModel.java
        utils
        MoleTrust.java
        SimUtils.java
        TidalTrust.java
        TrustUtils.java
        librec
        undefined
        BaseMF.java
        BaseNM.java
        CiaoTest.java
        CoFiSet.java
        DMF.java
        DNM.java
        DRM.java
        FSM.java
        FUSMauc.java
        FUSMrmse.java
        FUSTauc.java
        FUSTauc_backup.java
        FUSTrmse.java
        LibRec.java
        MMMF.java
        RBM.java
        RBMF.java
        RecUtils.java
        TimeSVD.java
        TimeSVDPlusPlus.java
        TrustAnalysis.java
        TrustPredictor.java
        TrustSVD2.java
        TrustSVDPlusPlus.java
        TrustSVD_DT.java
        UnitTests.java
package happy.research.utils;

import happy.coding.io.FileIO;
import happy.coding.io.Lists;
import happy.coding.io.Logs;
import happy.coding.math.Gaussian;
import happy.coding.math.Randoms;
import happy.coding.math.Sims;
import happy.coding.math.Stats;
import happy.coding.system.Debug;
import happy.coding.system.Systems;
import happy.research.cf.ConfigParams;
import happy.research.cf.Dataset;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.stat.correlation.PearsonsCorrelation;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimUtils {
	private final static Logger log = LoggerFactory.getLogger(SimUtils.class);
	public final static PearsonsCorrelation pc = new PearsonsCorrelation();

	public static double alpha = 0.2, beta = 0.2;

	public enum SimMethod {
		COS, PCC, MSD, CPC, SRC, BS, PIP, SM, iufCOS, caPCC
	};

	/**
	 * @param a
	 * @param b
	 * @param isRemoveChance
	 *            : whether to remove chance correlation
	 * @return
	 */
	private static double ijcai_bs_examples(List<Double> a, List<Double> b, boolean isRemoveChance) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		int N = a.size();
		int R = (int) (Dataset.maxScale / Dataset.minScale) - 1;
		// {diff level, count} map
		Map<Integer, Integer> diffMap = new HashMap<>();
		for (int i = 0; i < R + 1; i++)
			diffMap.put(i, 0);

		for (int i = 0; i < N; i++) {
			double ar = a.get(i);
			double br = b.get(i);
			double diff = Math.abs(ar - br);
			int level = (int) (diff / Dataset.minScale);

			int count = diffMap.get(level);
			diffMap.put(level, count + 1);
		}

		// System.out.println(PrintUtils.printMap(diffMap));
		double sum = 0.0, weights = 0.0;

		double chance = 1.0;
		for (int i = 0; i < R + 1; i++) {
			int count = diffMap.get(i);
			int alpha = i == 0 ? (R + 1) : 2 * (R - i + 1);
			int M2 = (R + 1) * (R + 1);
			double post = (count + alpha + 0.0) / (N + M2);
			double prio = (alpha + 0.0) / M2;
			double byChance = Math.pow(prio, count);
			chance *= byChance;

			double weight = post - prio;
			try {
				if (Debug.ON) {
					weights += post;
					sum += post * i * Dataset.minScale;
				} else {
					if (weight > 0) {
						weights += Math.abs(weight);
						sum += weight * i * Dataset.minScale;
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

		double d = sum / weights;
		double similarity = 1 - d / (Dataset.maxScale - Dataset.minScale);
		if (!isRemoveChance)
			chance = 0;

		double bias = 0.04;
		if (Debug.OFF)
			bias = 0;
		return Math.max(similarity * (1 - chance) - bias, 0.0);
	}

	public static boolean isPositive(double rate) {
		return rate >= Dataset.median;
	}

	public static boolean isConsistent(double r1, double r2) {
		return (r1 - Dataset.median) * (r2 - Dataset.median) >= 0;
	}

	/**
	 * Calculate the Bayesian Similarity, referring to
	 * "A Novel Bayesian Similairty for Recommender Systems" (IJCAI 2013)
	 * 
	 * @param a
	 *            user a's rating vector
	 * @param b
	 *            user b's rating vector
	 * @param priors
	 *            the prior value of each rating scales
	 * @param sd
	 *            standard deviation of ratings given on each item
	 * @param mu
	 *            mean of ratings given on each item
	 * @return Bayesian similarity
	 */
	public static double bsSim(List<Double> a, List<Double> b, List<Double> priors, List<Double> sd, List<Double> mu,
			Map<Integer, Map<Double, Double>> histos, List<Double> cf) throws Exception {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		int N = a.size();
		int R = Dataset.scaleSize;
		// {distance level, count} map
		Map<Double, Double> evidences = new HashMap<>();
		for (int i = 0; i < R; i++)
			evidences.put(i * Dataset.minScale, 0.0);

		double numEvidence = 0;
		for (int i = 0; i < N; i++) {
			double ar = a.get(i);
			double br = b.get(i);
			double di = Math.abs(ar - br);

			double sigma = sd.get(i);
			double mean = mu.get(i);
			double conf = cf.get(i);

			double evidence = evidences.get(di);
			double ei = 0; // evidence weight

			// ours
			double x = ConfigParams.defaultInstance().X_SIGMA;
			if (sigma <= 0 || x <= 0)
				ei = 1;
			else
				ei = 1 - di / (x * sigma);

			// new factor
			double singu = 0.0;
			if (Debug.OFF) {
				// Singularity 
				double asin = 0, bsin = 0;
				if (ar > Dataset.median)
					asin = 1 - conf;
				else
					asin = conf;

				if (br > Dataset.median)
					bsin = 1 - conf;
				else
					bsin = conf;

				singu = asin * bsin;
			} else if (Debug.ON) {
				// Gaussian
				double pa = Gaussian.pdf(ar, mean, sigma);
				double pb = Gaussian.pdf(br, mean, sigma);

				singu = (1 - pa) * (1 - pb);
			} else if (Debug.OFF) {
				// Discrete
				Map<Double, Double> hist = histos.get(i);
				if (hist.containsKey(di))
					singu = 1 - hist.get(di);
				else
					singu = 1;
			}

			double semantic = 0.0;
			if (Debug.ON) {
				// Semantics

				// factor 1: proximity
				double pr = 0;
				double range = Dataset.range;
				if (isConsistent(ar, br))
					pr = 1 - di / range;
				else
					pr = -di / range;

				// factor 2: impact
				double im = 0.0;
				double s = 0.5 * (ar + br);
				if (isPositive(ar) && isPositive(br))
					im = s / Dataset.maxScale;
				else if (!isPositive(ar) && !isPositive(br))
					im = 1 - s / Dataset.maxScale;
				else
					im = -s / Dataset.maxScale;

				// factor 3: popularity
				double po = 0;
				double dist = Math.abs(s - mean);
				boolean consistent = (ar - mean) * (br - mean) >= 0;
				if (consistent)
					po = dist / range;
				else
					po = -dist / range;

				semantic = pr * im * po;
			}

			ei = alpha * ei + beta * singu + (1 - alpha - beta) * semantic;

			if (ei < -1)
				ei = -1;

			numEvidence += ei;
			evidences.put(di, evidence + ei);
		}

		double sum = 0.0, weights = 0.0;

		double chance = 1.0;
		double[] x = Lists.toArray(priors);

		for (int i = 0; i < R; i++) {
			double num = evidences.get(i * Dataset.minScale);
			double alpha = prior(x, i);

			int M2 = R * R;
			double post = (num + alpha + 0.0) / (numEvidence + M2);
			double prio = (alpha + 0.0) / M2;
			double byChance = Math.pow(prio, num);
			chance *= byChance;

			double weight = post - prio;
			try {
				if (Debug.OFF) {
					weights += post;
					sum += post * i * Dataset.minScale;
				} else {
					if (weight > 0) {
						weights += Math.abs(weight);
						sum += weight * i * Dataset.minScale;
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

		double d = sum / weights;
		double similarity = 1 - d / (Dataset.maxScale - Dataset.minScale);
		if (Debug.OFF)
			chance = 0.0;

		double bias = 0.04;
		if (Debug.ON)
			bias = 0;

		if (Debug.OFF) {
			return Math.max(similarity * (1 - chance) - bias, 0.0);
		} else {
			return Math.max(similarity - chance - bias, 0.0);
		}
	}

	private static double prior(double[] priors, int i) {
		double evidence = 0;
		if (i == 0) {
			for (int j = 0; j < priors.length; j++)
				evidence += priors[j] * priors[j];
		} else {
			for (int j = 0; (i + j) < priors.length; j++)
				evidence += 2 * priors[j] * priors[j + i];
		}
		return evidence;
	}

	public static double pearsonSim(List<Double> a, List<Double> b, List<Double> ca) {
		if (a == null || b == null || a.size() < 2 || b.size() < 2 || a.size() != b.size())
			return Double.NaN;
		double[] as = Lists.toArray(a);
		double[] bs = Lists.toArray(b);
		double[] cs = Lists.toArray(ca);

		return pearsonSim(as, bs, cs);
	}

	/**
	 * This example is used in my merge journal paper
	 */
	@SuppressWarnings("unchecked")
	@Test
	public void kbs_examples() {
		Map<String, Double> merged_rates = new HashMap<>();
		Map<String, Double> merged_confs = new HashMap<>();
		merged_rates.put("i1", 4.33);
		merged_confs.put("i1", 0.19);

		merged_rates.put("i2", 4.0);
		merged_confs.put("i2", 0.38);

		merged_rates.put("i3", 5.0);
		merged_confs.put("i3", 1.0);

		merged_rates.put("i4", 3.0);
		merged_confs.put("i4", 0.25);

		merged_rates.put("i5", 2.73);
		merged_confs.put("i5", 0.47);

		merged_rates.put("i8", 1.72);
		merged_confs.put("i8", 0.47);

		Map<String, Double>[] us = new HashMap[10];
		us[2] = new HashMap<>();
		us[2].put("i1", 5.0);
		us[2].put("i3", 4.0);
		us[2].put("i5", 3.0);
		us[2].put("i8", 2.0);

		us[3] = new HashMap<>();
		us[3].put("i2", 4.0);
		us[3].put("i4", 3.0);
		us[3].put("i8", 1.0);

		us[4] = new HashMap<>();
		us[4].put("i1", 3.0);
		us[4].put("i3", 5.0);
		us[4].put("i5", 2.0);

		us[5] = new HashMap<>();
		us[5].put("i2", 4.0);
		us[5].put("i3", 4.0);
		us[5].put("i5", 3.0);
		us[5].put("i8", 3.0);

		us[6] = new HashMap<>();
		us[6].put("i2", 3.0);
		us[6].put("i3", 3.0);
		us[6].put("i4", 5.0);
		us[6].put("i5", 5.0);

		us[7] = new HashMap<>();
		us[7].put("i7", 5.0);
		us[7].put("i9", 4.0);

		us[8] = new HashMap<>();
		us[8].put("i3", 4.0);
		us[8].put("i5", 2.0);
		us[8].put("i8", 1.0);

		us[9] = new HashMap<>();
		us[9].put("i3", 4.0);
		us[9].put("i5", 5.0);
		us[9].put("i8", 5.0);

		for (int i = 2; i < us.length; i++) {
			List<Double> u = new ArrayList<>();
			List<Double> v = new ArrayList<>();
			List<Double> c = new ArrayList<>();

			Map<String, Double> ux = us[i];
			for (String item : ux.keySet()) {
				if (merged_rates.containsKey(item)) {
					u.add(merged_rates.get(item));
					c.add(merged_confs.get(item));
					v.add(ux.get(item));
				}
			}

			double cpcc = SimUtils.pearsonSim(u, v, c);
			double pcc = Sims.pcc(u, v);
			Logs.debug("u" + i + " cpcc = " + cpcc);
			Logs.debug("u" + i + "  pcc = " + pcc);
		}
	}

	/**
	 * Calculate the PIP similarity proposed by Hyung Jun Ahn [2008]:
	 * 
	 * <i>A new similarity measure for collaborative filtering to alleviate the
	 * new user cold-starting problem</i>
	 * 
	 * @param a
	 *            user a's ratings
	 * @param b
	 *            user b's ratings
	 * @param means
	 *            the average ratings of items by all users
	 * @return PIP similarity
	 */
	public static double PIPSim(List<Double> a, List<Double> b, List<Double> means) {
		double score = 0;

		/**
		 * to compute median rating
		 * 
		 * however, this is only useful for the number of rating scales is odd
		 * rather than even. But it is the way used in the paper.
		 */
		double r = (Dataset.maxScale + Dataset.minScale) / 2.0;

		for (int i = 0; i < a.size(); i++) {
			double r1 = a.get(i);
			double r2 = b.get(i);
			double ui = means.get(i);

			double agreement = (r1 - r) * (r2 - r);
			boolean agree = false;
			if (agreement < 0)
				agree = false;
			else
				agree = true;

			/* compute proximity */
			double D = 0;
			if (agree)
				D = Math.abs(r1 - r2);
			else
				D = 2 * Math.abs(r1 - r2);

			double prox = 2 * (Dataset.maxScale - Dataset.minScale) + 1 - D;
			double proximity = Math.pow(prox, 2);

			/* compute impact */
			double impact = (Math.abs(r1 - r) + 1) * (Math.abs(r2 - r) + 1);
			if (!agree)
				impact = 1.0 / impact;

			/* compute popularity */
			double val = (r1 - ui) * (r2 - ui);
			double popularity = 1.0;
			double pop = (r1 + r2) / 2.0 - ui;
			if (val > 0)
				popularity = 1 + Math.pow(pop, 2);

			/* aggregation PIP */
			score += proximity * impact * popularity;
		}

		return score;
	}

	/**
	 * Calculate confidence-aware PCC similarity
	 * 
	 * @param u
	 *            user u's ratings
	 * @param v
	 *            user v's ratings
	 * @param uc
	 *            user u's rating confidences
	 * @return confidence-aware PCC similarity
	 */
	public static double pearsonSim(double[] u, double[] v, double[] uc) {
		if (u == null || v == null || u.length < 2 || v.length < 2 || u.length != v.length)
			return Double.NaN;

		double meanA = Stats.weightedcMean(u, uc);
		double meanB = Stats.mean(v);
		double sumNum = 0.0, sumDen1 = 0.0, sumDen2 = 0.0;
		for (int i = 0; i < u.length; i++) {
			double ai = uc[i] * (u[i] - meanA);
			double bi = v[i] - meanB;
			sumNum += ai * bi;
			sumDen1 += ai * ai;
			// sumDen1 += uc[i] * (u[i] - meanA) * (u[i] - meanA);
			sumDen2 += bi * bi;
		}
		return sumNum / (Math.sqrt(sumDen1) * Math.sqrt(sumDen2));
	}

	/**
	 * Calculate Spearman's Rank Correlation (SRC)
	 * 
	 * @param u
	 *            user u's rating ranks (tied ratings get the average rank of
	 *            their spot)
	 * @param v
	 *            user v's rating ranks
	 * 
	 * @return Spearman's Rank Correlation (SRC)
	 */
	public static double SRCSim(List<Double> u, List<Double> v) {
		if (u == null || v == null)
			return Double.NaN;

		double meanU = Stats.mean(u);
		double meanV = Stats.mean(v);
		double sumNum = 0.0, sumDen1 = 0.0, sumDen2 = 0.0;

		for (int i = 0; i < u.size(); i++) {
			double ui = u.get(i) - meanU;
			double vi = v.get(i) - meanV;

			sumNum += ui * vi;
			sumDen1 += Math.pow(ui, 2);
			sumDen2 += Math.pow(vi, 2);
		}
		return sumNum / (Math.sqrt(sumDen1) * Math.sqrt(sumDen2));
	}

	/**
	 * Calculate Spearman's Rank Correlation (SRC)
	 * 
	 * @param a
	 *            user u's ratings
	 * @param b
	 *            user v's ratings
	 * @param ua
	 *            user u's all ratings
	 * @param va
	 *            user v's all ratings
	 * 
	 * @return Spearman's Rank Correlation (SRC)
	 */
	public static double SRCSim(List<Double> a, List<Double> b, List<Double> ua, List<Double> va) {
		if (a == null || b == null)
			return Double.NaN;

		List<Double> usRatings = new ArrayList<>(ua);
		List<Double> vsRatings = new ArrayList<>(va);

		Collections.sort(usRatings);
		Collections.sort(vsRatings);

		List<Double> u = new ArrayList<>();
		List<Double> v = new ArrayList<>();
		for (Double ar : a) {
			double sum = 0;
			int count = 0;
			for (int i = 0; i < usRatings.size(); i++) {
				double usRating = usRatings.get(i);
				if (usRating == ar) {
					sum += (i + 1);
					count++;
				}
			}
			u.add(sum / count);
		}
		for (Double br : b) {
			double sum = 0;
			int count = 0;
			for (int i = 0; i < vsRatings.size(); i++) {
				double vsRating = vsRatings.get(i);
				if (vsRating == br) {
					sum += (i + 1);
					count++;
				}
			}
			v.add(sum / count);
		}

		double meanU = Stats.mean(u);
		double meanV = Stats.mean(v);
		double sumNum = 0.0, sumDen1 = 0.0, sumDen2 = 0.0;

		for (int i = 0; i < u.size(); i++) {
			double ui = u.get(i) - meanU;
			double vi = v.get(i) - meanV;

			sumNum += ui * vi;
			sumDen1 += Math.pow(ui, 2);
			sumDen2 += Math.pow(vi, 2);
		}
		return sumNum / (Math.sqrt(sumDen1) * Math.sqrt(sumDen2));
	}

	public static double distanceSim(List<Double> a, List<Double> b) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		double maxRating = Dataset.maxScale, minRating = Dataset.minScale;
		double sumNum = 0.0, sumDen = 0.0;
		for (int i = 0; i < a.size(); i++) {
			sumNum += Math.abs(a.get(i) - b.get(i));
			sumDen += Math.abs(maxRating - minRating);
		}

		return 1 - sumNum / sumDen;
	}

	public static double distanceSim(List<Double> a, List<Double> b, List<Double> ac, List<Double> bc) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		double sumNum = 0.0, sumDen = 0.0;

		for (int i = 0; i < a.size(); i++) {
			double ai = a.get(i), bi = b.get(i);
			double ca = ac.get(i), cb = bc.get(i);
			double dr = Math.abs(ai - bi);
			double dc = Math.abs(ca - cb);
			double dm = Dataset.maxScale - Dataset.minScale;

			if (Debug.ON) {
				double d = (ai - Dataset.median) * (bi - Dataset.median);
				double w = 1.0;
				if (d >= 0)
					w = 1.0 / (d + 1); // agreement 
				else {
					d = -d;
					w = d / (d + 1);
				}

				sumNum += (w + dr / dm + dc / (dc + 1));
				sumDen += 3;
			} else {
				sumNum += dr / dm + dc / (dc + 1);
				sumDen += 2;
			}
		}

		return 1 - sumNum / sumDen;
	}

	public static double disValueSim(double[] a, double[] b) {
		if (a == null || b == null || a.length < 1 || b.length < 1 || a.length != b.length)
			return Double.NaN;

		double maxRating = Dataset.maxScale;
		double sum = 0.0;
		int count = 0;
		for (int i = 0; i < a.length; i++) {
			sum += 1 - Math.abs(a[i] - b[i]) / maxRating;
			count++;
		}

		return sum / count;
	}

	public static double disValueSim(List<Double> a, List<Double> b) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		double[] as = Lists.toArray(a);
		double[] bs = Lists.toArray(b);

		return disValueSim(as, bs);
	}

	/**
	 * Calculate the cosine similarity between two vectors
	 * 
	 * @param a
	 *            user a's ratings
	 * @param b
	 *            user b's ratings
	 * @return cosine similarity
	 */
	public static double cosineSim(double[] a, double[] b) {
		if (a == null || b == null || a.length < 1 || b.length < 1 || a.length != b.length)
			return Double.NaN;

		double sum = 0.0, sum_a = 0, sum_b = 0;
		for (int i = 0; i < a.length; i++) {
			sum += a[i] * b[i];
			sum_a += a[i] * a[i];
			sum_b += b[i] * b[i];
		}

		double val = Math.sqrt(sum_a) * Math.sqrt(sum_b);

		return sum / val;
	}

	/**
	 * Calculate the SM (singularities measure) similarity proposed by Bobadilla
	 * et al. [2012]: <i>A collaborative filtering similarity measure based on
	 * singularities</i>
	 * 
	 * @param a
	 *            user a's ratings
	 * @param b
	 *            user b's ratings
	 * @return SM similarity
	 */
	public static double SMSim(List<Double> a, List<Double> b, List<Double> sp, List<Double> sn) {
		double r = Dataset.median;

		double sumA = 0, sumB = 0, sumC = 0;
		int countA = 0, countB = 0, countC = 0;
		for (int i = 0; i < a.size(); i++) {
			double ai = a.get(i);
			double bi = b.get(i);
			double pi = sp.get(i);
			double ni = sn.get(i);

			double ri = ai / Dataset.maxScale;
			double rj = bi / Dataset.maxScale;

			if (ai > r && bi > r) {
				// A: positive agreement
				countA++;
				sumA += (1 - Math.pow(ri - rj, 2)) * Math.pow(pi, 2);
			} else if (ai <= r && bi <= r) {
				// B: negative agreement
				countB++;
				sumB += (1 - Math.pow(ri - rj, 2)) * Math.pow(ni, 2);
			} else {
				// C: disagreement
				countC++;
				sumC += (1 - Math.pow(ri - rj, 2)) * pi * ni;
			}
		}

		double score = 0;
		if (countA > 0)
			score += sumA / countA;
		if (countB > 0)
			score += sumB / countB;
		if (countC > 0)
			score += sumC / countC;

		if (countA + countB + countC == 0)
			return Double.NaN;
		else
			return score / 3.0;
	}

	/**
	 * Implement Weng's approach
	 * 
	 * @param a
	 * @param b
	 * @return
	 */
	public static double kappa2Similarity(List<Double> a, List<Double> b) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		double weight[][] = { { 1.00, 0.75, 0.50, 0.25, 0.00 }, { 0.75, 1.00, 0.75, 0.50, 0.25 },
				{ 0.50, 0.75, 1.00, 0.75, 0.50 }, { 0.25, 0.50, 0.75, 1.00, 0.75 }, { 0.00, 0.25, 0.50, 0.75, 1.00 } };
		double data[][] = { { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 },
				{ 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 } };

		RealMatrix weightMatrix = new Array2DRowRealMatrix(weight);
		RealMatrix observationMatrix = new Array2DRowRealMatrix(data);

		int R[] = new int[5];
		int C[] = new int[5];

		int size = a.size();
		for (int i = 0; i < size; i++) {
			int aRating = a.get(i).intValue();
			int bRating = b.get(i).intValue();

			int idx = aRating - 1;
			int jdx = bRating - 1;
			double count = observationMatrix.getRow(idx)[jdx];

			count += 1;
			observationMatrix.setEntry(idx, jdx, count);
		}

		for (int i = 0; i < 5; i++) {
			R[i] = 0;
			for (int j = 0; j < 5; j++)
				R[i] += (int) observationMatrix.getRow(i)[j];
		}

		for (int j = 0; j < 5; j++) {
			C[j] = 0;
			for (int i = 0; i < 5; i++)
				C[j] += (int) observationMatrix.getColumn(j)[i];
		}

		double similarity = 0.0;
		double observation = 0.0, expectation = 0.0;
		for (int i = 0; i < 5; i++) {
			for (int j = 0; j < 5; j++) {
				observation += weightMatrix.getRow(i)[j] * observationMatrix.getRow(i)[j] / C[j];
			}
		}
		observation *= size;

		for (int i = 0; i < 5; i++) {
			expectation += R[i] * R[i];
		}

		similarity = (observation - expectation) / (size * size - expectation);

		return similarity;
	}

	/**
	 * Compare the trends of different similarity methods;
	 * 
	 * @throws Exception
	 */
	@Test
	// @Ignore
	public void ijcai_trends() throws Exception {
		Dataset.minScale = 1;
		Dataset.maxScale = 5;
		Dataset.scaleSize = 5;

		int size = 1_000_000;
		int items = 20;

		// double[] pccs = new double[size];
		// double[] coss = new double[size];
		List<Double> bys = new ArrayList<>();

		String dirPath = Systems.getDesktop();
		// List<Double> ps = new ArrayList<>();
		// List<Double> cs = new ArrayList<>();
		List<Double> bs = new ArrayList<>();

		// List<Double> pstd = new ArrayList<>();
		// List<Double> cstd = new ArrayList<>();
		List<Double> bstd = new ArrayList<>();

		/*
		 * FileUtils.deleteFile(dirPath + "pcc.txt");
		 * FileUtils.deleteFile(dirPath + "cos.txt");
		 * FileUtils.deleteFile(dirPath + "by.txt");
		 * FileUtils.deleteFile(dirPath + "pcc-std.txt");
		 * FileUtils.deleteFile(dirPath + "cos-std.txt");
		 * FileUtils.deleteFile(dirPath + "by-std.txt");
		 */

		for (int i = 1; i < items + 1; i++) {
			System.out.println("Progress = " + i + "/" + items);

			bys.clear();

			for (int j = 0; j < size; j++) {
				double[] a = Randoms.doubles(1, 6, i);
				double[] b = Randoms.doubles(1, 6, i);

				List<Double> u = Lists.toList(a);
				List<Double> v = Lists.toList(b);

				// double pcc = SimUtils.pearsonSim(a, b);
				// double cos = SimUtils.cosineSim(a, b);
				double by = SimUtils.ijcai_bs_examples(u, v, true);
				// double by = SimUtils.MSDSim(MathUtils.array2Col(a),
				// MathUtils.array2Col(b));
				// double by = SimUtils.CPCSim(u, v, 3);

				// pccs[j] = pcc;
				// coss[j] = cos;
				bys.add(by);

			}

			// double mp = MathUtils.mean(pccs);
			// mp = (1 + mp) / 2;
			// double mc = MathUtils.mean(coss);
			double mb = Stats.mean(bys);
			//mb = (1 + mb) / 2;

			// ps.add(mp);
			// cs.add(mc);
			bs.add(mb);

			for (int j = 0; j < size; j++) {
				// if (Double.isNaN(pccs[j])) pccs[j] = mp;
				if (Double.isNaN(bys.get(j)))
					bys.set(j, mb);
			}

			// double dp = MathUtils.sd(pccs);
			// double dc = MathUtils.sd(coss);
			double db = Stats.sd(bys);

			// pstd.add(dp);
			// cstd.add(dc);
			bstd.add(db);
		}

		// FileUtils.writeCollection(dirPath + "pcc.txt", ps, true);
		// FileUtils.writeCollection(dirPath + "cos.txt", cs, true);
		FileIO.writeList(dirPath + "cpc.txt", bs, null, true);

		// FileUtils.writeCollection(dirPath + "pcc-std.txt", pstd, true);
		// FileUtils.writeCollection(dirPath + "cos-std.txt", cstd, true);
		FileIO.writeList(dirPath + "cpc-std.txt", bstd, null, true);
	}

	@Test
	public void ijcai_examples() throws Exception {
		Dataset.minScale = 1;
		Dataset.maxScale = 5;
		Dataset.scaleSize = 5;

		double[] a, b;

		if (Debug.OFF) {
			int size = 1_00;
			int items = 10;

			for (int i = 1; i < items + 1; i++) {
				for (int j = 0; j < size; j++) {
					a = Randoms.doubles(1, 6, i);
					b = Randoms.doubles(1, 6, i);

					singleComparison(a, b);
				}

			}
		} else {
			a = new double[] { 1, 1, 1 };
			b = new double[] { 1, 1, 1 };
			singleComparison(a, b);

			a = new double[] { 1, 1, 1 };
			b = new double[] { 2, 2, 2 };
			singleComparison(a, b);

			a = new double[] { 1, 1, 1 };
			b = new double[] { 5, 5, 5 };
			singleComparison(a, b);

			a = new double[] { 1, 5, 1 };
			b = new double[] { 5, 1, 5 };
			singleComparison(a, b);

			a = new double[] { 2, 4, 4 };
			b = new double[] { 4, 2, 2 };
			singleComparison(a, b);

			a = new double[] { 2, 4, 4, 1 };
			b = new double[] { 4, 2, 2, 5 };
			singleComparison(a, b);

			a = new double[] { 1 };
			b = new double[] { 1 };
			singleComparison(a, b);

			a = new double[] { 1 };
			b = new double[] { 2 };
			singleComparison(a, b);

			a = new double[] { 1 };
			b = new double[] { 5 };
			singleComparison(a, b);

			a = new double[] { 1, 5 };
			b = new double[] { 5, 1 };
			singleComparison(a, b);

			a = new double[] { 1, 3 };
			b = new double[] { 4, 2 };
			singleComparison(a, b);

			a = new double[] { 5, 1 };
			b = new double[] { 5, 4 };
			singleComparison(a, b);

			a = new double[] { 4, 3 };
			b = new double[] { 3, 1 };
			singleComparison(a, b);
		}
	}

	private static void singleComparison(double[] a1, double[] b1) {
		List<Double> u = Lists.toList(a1);
		List<Double> v = Lists.toList(b1);
		float pcc = (float) Sims.pcc(u, v);
		float cos = (float) Sims.cos(u, v);
		float bs = (float) SimUtils.ijcai_bs_examples(u, v, true);
		float bs2 = (float) SimUtils.ijcai_bs_examples(u, v, false);
		log.info("u = {}, v = {}", u, v);
		log.info("PCC = {}, COS = {}, BS = {}, BS-1 = {}", new Object[] { pcc, cos, bs, bs2 });
	}

	public static double kappaSim(List<Double> a, List<Double> b) {
		if (a == null || b == null || a.size() < 1 || b.size() < 1 || a.size() != b.size())
			return Double.NaN;

		double weight[][] = { { 1.00, 0.75, 0.50, 0.25, 0.00 }, { 0.75, 1.00, 0.75, 0.50, 0.25 },
				{ 0.50, 0.75, 1.00, 0.75, 0.50 }, { 0.25, 0.50, 0.75, 1.00, 0.75 }, { 0.00, 0.25, 0.50, 0.75, 1.00 } };
		double data[][] = { { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 },
				{ 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 } };

		RealMatrix weightMatrix = new Array2DRowRealMatrix(weight);
		RealMatrix observationMatrix = new Array2DRowRealMatrix(data);
		RealMatrix expectationMatrix = new Array2DRowRealMatrix(data);

		int R[] = new int[5];
		int C[] = new int[5];
		int size = a.size();
		for (int i = 0; i < size; i++) {
			int aRating = a.get(i).intValue();
			int bRating = b.get(i).intValue();

			int idx = aRating - 1;
			int jdx = bRating - 1;
			double count = observationMatrix.getRow(idx)[jdx];

			count += 1;
			observationMatrix.setEntry(idx, jdx, count);
		}

		for (int i = 0; i < 5; i++) {
			R[i] = 0;
			for (int j = 0; j < 5; j++)
				R[i] += (int) observationMatrix.getRow(i)[j];
		}

		for (int j = 0; j < 5; j++) {
			C[j] = 0;
			for (int i = 0; i < 5; i++)
				C[j] += (int) observationMatrix.getColumn(j)[i];
		}

		for (int i = 0; i < 5; i++) {
			for (int j = 0; j < 5; j++) {
				double value = R[i] * C[j] / (size + 0.0);
				expectationMatrix.setEntry(i, j, value);
			}
		}

		double similarity = 0.0;
		double observation = 0.0, expectation = 0.0;
		for (int i = 0; i < 5; i++) {
			for (int j = 0; j < 5; j++) {
				observation += weightMatrix.getRow(i)[j] * observationMatrix.getRow(i)[j];
				expectation += weightMatrix.getRow(i)[j] * expectationMatrix.getRow(i)[j];
			}
		}

		similarity = (observation - expectation) / (size - expectation);

		return similarity;
	}

	/*
	 * Below are a list of significance computation methods
	 */
	public static double overlapSig(int aSize, int uSize, int auCommon) {
		return auCommon / (Math.min(aSize, uSize) + 0.0);
	}

	public static double aSig(int size, int gamma) {
		return Math.min(size, gamma) / (gamma + 0.0);
	}

	public static double bSig(int size, int gamma) {
		return size / (size + gamma + 0.0);
	}

	public static double cSig(int size, int gamma, double similarity) {
		double SW = (size < gamma) ? size / (0.0 + gamma) : 1.0;

		return 2 * SW * similarity / (SW + similarity);
	}

	public static double dSig(double r, double s) throws Exception {
		return TrustUtils.confidence(r, s);
	}

	public static double eSig(int aSize, int uSize, int auCommon) {
		return overlapSig(aSize, uSize, auCommon);
	}

	public static double fSig(int aSize, int uSize, int auCommon, double similarity) {
		double os = overlapSig(aSize, uSize, auCommon);
		return 2 * os * similarity / (os + similarity);
	}

}