先利用xlrd_read_data函数读取数据,然后用pca降维,降维后的数据存放在lowDDataMat中
def xlrd_read_data(path): table = xlrd.open_workbook(path).sheets()[0] #读取第一个表格 row = table.nrows # 行数 col = table.ncols # 列数 datamatrix = np.zeros((row, col))#生成一个nrows行ncols列,且元素均为0的初始矩阵 for x in range(col): cols = np.matrix(table.col_values(x)) # 把list转换为矩阵进行矩阵操作 datamatrix[:, x] = cols # 按列把数据存进矩阵中 return datamatrix def pca(dataMat,topNfeat=999999): meanVals=np.mean(dataMat,axis=0) meanRemoved=dataMat-meanVals covMat=np.cov(meanRemoved,rowvar=0) eigVals,eigVects=np.linalg.eig(np.mat(covMat)) eigValInd=np.argsort(eigVals) eigValInd=eigValInd[:-(topNfeat+1):-1] redEigVects=eigVects[:,eigValInd] lowDDataMat=meanRemoved*redEigVects reconMat=(lowDDataMat*redEigVects.T)+meanVals return lowDDataMat,reconMat path= r'c:\Users\Liugengxin\Desktop\数学建模\2018E模拟\Data_For_Oneyear\16indicators.xlsx' data=xlrd_read_data(path) data_pca,recon=pca(data,5) DataFrame(data_pca).to_excel(r'c:\Users\Liugengxin\Desktop\testpca.xlsx')