package pearsonCorrelationScore;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* @author shenchao
*
* 皮尔逊相关度评价
*
* 以《集体智慧编程》一书用户评价相似度数据集做测试
*/
public class PearsonCorrelationScore {
private Map<String, Map<String, double>> dataset = null;
public PearsonCorrelationScore() {
initDataSet();
}
/**
* 初始化数据集
*/
private void initDataSet() {
dataset = new HashMap<String, Map<String, double>>();
// 初始化Lisa Rose 数据集
Map<String, double> roseMap = new HashMap<String, double>();
roseMap.put("Lady in the water", 2.5);
roseMap.put("Snakes on a Plane", 3.5);
roseMap.put("Just My Luck", 3.0);
roseMap.put("Superman Returns", 3.5);
roseMap.put("You, Me and Dupree", 2.5);
roseMap.put("The Night Listener", 3.0);
dataset.put("Lisa Rose", roseMap);
// 初始化Jack Matthews 数据集
Map<String, double> jackMap = new HashMap<String, double>();
jackMap.put("Lady in the water", 3.0);
jackMap.put("Snakes on a Plane", 4.0);
jackMap.put("Superman Returns", 5.0);
jackMap.put("You, Me and Dupree", 3.5);
jackMap.put("The Night Listener", 3.0);
dataset.put("Jack Matthews", jackMap);
// 初始化Jack Matthews 数据集
Map<String, double> geneMap = new HashMap<String, double>();
geneMap.put("Lady in the water", 3.0);
geneMap.put("Snakes on a Plane", 3.5);
geneMap.put("Just My Luck", 1.5);
geneMap.put("Superman Returns", 5.0);
geneMap.put("You, Me and Dupree", 3.5);
geneMap.put("The Night Listener", 3.0);
dataset.put("Gene Seymour", geneMap);
}
public Map<String, Map<String, double>> getDataSet() {
return dataset;
}
/**
* @param person1
* name
* @param person2
* name
* @return 皮尔逊相关度值
*/
public double sim_pearson(String person1, String person2) {
// 找出双方都评论过的电影,(皮尔逊算法要求)
List<String> list = new ArrayList<String>();
for (Entry<String, double> p1 : dataset.get(person1).entrySet()) {
if (dataset.get(person2).containsKey(p1.getKey())) {
list.add(p1.getKey());
}
}
double sumX = 0.0;
double sumY = 0.0;
double sumX_Sq = 0.0;
double sumY_Sq = 0.0;
double sumXY = 0.0;
int N = list.size();
for (String name : list) {
Map<String, double> p1Map = dataset.get(person1);
Map<String, double> p2Map = dataset.get(person2);
sumX += p1Map.get(name);
sumY += p2Map.get(name);
sumX_Sq += Math.pow(p1Map.get(name), 2);
sumY_Sq += Math.pow(p2Map.get(name), 2);
sumXY += p1Map.get(name) * p2Map.get(name);
}
double numerator = sumXY - sumX * sumY / N;
double denominator = Math.sqrt((sumX_Sq - sumX * sumX / N)
* (sumY_Sq - sumY * sumY / N));
// 分母不能为0
if (denominator == 0) {
return 0;
}
return numerator / denominator;
}
public static void main(String[] args) {
PearsonCorrelationScore pearsonCorrelationScore = new PearsonCorrelationScore();
System.out.println(pearsonCorrelationScore.sim_pearson("Lisa Rose",
"Jack Matthews"));
}
}
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有