# 修改代码,将读取数据的方式改为pandas读取csv文件
import numpy as np
import pandas as pd
def pearson_correlation(x, y):
"""
计算两个向量x和y之间的皮尔逊相关系数。
:param x: 第一个向量
:param y: 第二个向量
:return: 皮尔逊相关系数
"""
# 计算均值
x_mean = np.mean(x)
y_mean = np.mean(y)
# 计算分子和分母
numerator = np.sum((x - x_mean) * (y - y_mean))
denominator = np.sqrt(np.sum((x - x_mean) ** 2) * np.sum((y - y_mean) ** 2))
# 计算皮尔逊相关系数
if denominator == 0:
return 0
else:
return numerator / denominator
# 读取数据
data = pd.read_csv('E:\lunwenpython\measures_v2.csv')
# 分离特征和标签
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# 计算每一列特征与标签之间的皮尔逊相关系数
for i in range(X.shape[1]):
corr = pearson_correlation(X[:, i], y)
print("Feature %d correlation with label: %.4f" % (i+1, corr))