當前位置：首頁 > 编程语言 > python >内容正文

python

数据科学与python语言实验——NumPy数值计算基础

發布時間：2023/12/4 python 24 豆豆

生活随笔收集整理的這篇文章主要介紹了数据科学与python语言实验——NumPy数值计算基础小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

NumPy數值計算基礎

實驗數據：
鏈接：https://pan.baidu.com/s/1-E2ShVTdI0X5lwDtMLFFsQ
提取碼：0929

代碼實現：

之前不會的地方：

1．讀取文件
使用numpy內置的loadtxt()函數以及這個函數的參數frame：要讀取的文件，dtype讀取后的數據類型，delimiter：讀取文件中的數據分隔符

#參數列表：fname要讀取的文件，dtype讀取后的數據類型，delimiter讀取文件中數據的分隔符self.data=np.loadtxt(fname=path,dtype=str,delimiter=',')

2．數據的切分索引

self.data=self.data[1:,:]#去掉第一行（標簽行） self.colindex=self.data[0,:]#得到標簽行

3．將數據中的年份季度帶小數的浮點數轉化為不帶浮點數的整數使用了numpy中的np.char.replace()函數

#需要將數據中的年份和季度中的小數部分去掉 self.data[:,:2]=np.char.replace(self.data[:,:2],'.0','') #data[:,:2]是數據中的前兩列

4.查找滿足條件的行索引

index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter)) # 使用where方法返回符合給定年份和季度的行索引

5.實現變量的展平

import numpy as npfp='./macrodata.csv'#要讀取的文件 op='./test.csv'#輸出文件class processdata:colmap = {'year': 0, 'quarter': 1, 'gdp': 2, 'realcons': 3, 'realinv': 4, 'realgovt': 5, 'realdpi': 6, 'cpi': 7, \'m1': 8, 'tbilrate': 9, 'unemp': 10, 'pop': 11, 'infl': 12, 'realint': 13} # 定義一個字典，用來轉換字符串列名與列索引def __init__(self,path):#定義構造函數#參數列表：fname要讀取的文件，dtype讀取后的數據類型，delimiter讀取文件中數據的分隔符self.data=np.loadtxt(fname=path,dtype=str,delimiter=',')self.data=self.data[1:,:]#去掉第一行（標簽行）self.colindex=self.data[0,:]#得到標簽行#需要將數據中的年份和季度中的小數部分去掉self.data[:,:2]=np.char.replace(self.data[:,:2],'.0','')#data[:,:2]是數據中的前兩列def lookupdata(self,year,quarter,col):#查看任意時間點的GDP或者人口信息 col傳入gdp或popyear = str(int(year))quarter = str(int(quarter))index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter)) # 使用where方法返回符合給定年份和季度的行索引index = np.array(index)result = 'The ' + col + ' in quarter ' + str(quarter) + ', year ' + str(year) + ' is 'if index.size == 0: # 若沒有查找到符合條件的行tmp = np.where(self.data[:, 0] == year)tmp = np.array(tmp)if tmp.size == 0: # 若沒有找到該年份的行error = 'The given year ' + str(year) + ' is out of range...'else: # 找到年份但沒找到對應的季度error = 'The given quarter ' + str(quarter) + ' is not found for the given year ' + str(year) + '...'print(error)return errorelse: # 找到了符合條件的結果print(result + str(self.data[index[0][0]][processdata.colmap[col]]) + '...')return self.data[index[0][0]][processdata.colmap[col]]def calPerCapitaGDP(self, year, quarter): # 計算人均GDPyear = str(int(year))quarter = str(int(quarter))index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter)) # 使用where方法返回符合給定年份和季度的行索引index = np.array(index)result = 'The Per Capita GDP in quarter ' + str(quarter) + ', year ' + str(year) + ' is 'if index.size == 0: # 若沒有查找到符合條件的行tmp = np.where(self.data[:, 0] == year)tmp = np.array(tmp)if tmp.size == 0: # 若沒有找到該年份的行error = 'The given year ' + str(year) + ' is out of range...'else: # 找到年份但沒找到對應的季度error = 'The given quarter ' + str(quarter) + ' is not found for the given year ' + str(year) + '...'print(error)return errorprint(result + str(float(self.data[index[0][0]][2]) / float(self.data[index[0][0]][11])) + '...') # 第二列數據/第11列數據return float(self.data[index[0][0]][2]) / float(self.data[index[0][0]][11])def flattendata(self, col=['gdp', 'pop']): # 展平方法collist = [processdata.colmap[i] for i in col] # 將要展平的列的列名轉換成對應的數值索引data = self.data[:, collist] # 提取這些列的數據colarr = np.zeros((self.data.shape[0], 1), dtype=np.float)indexdata = self.data[:, [0, 1]] # 提取年份和季度信息indexdata = indexdata.repeat(len(col), axis=0) # 對年份和季度兩列進行縱向重復，重復的次數為要展平的列數，repeat是對所有行整個進行重復newdata = data.flatten().reshape(-1, 1) # 使用flatten方法將要展平的列展平，并轉為列向量newcol = collist * self.data.shape[0] # 將要展平的列的列號整個進行橫向重復（與repeat重復方式對應），重復次數為原始數據的行數newcol = np.array(newcol).reshape(-1, 1)tmp = np.hstack((indexdata, newdata)) # 將重復后的日期季度列與展平后的列向量進行橫向合并tmp = np.hstack((tmp, newcol)) # 再與其原數據所在列號生成的列進行橫向合并newcolindex = np.array(['year', 'quarter', 'values', 'columns number'])tmp = np.vstack((newcolindex, tmp)) # 添加新的列名return tmpdef printdata(self):data1 = np.vstack((self.data[:, 2].reshape(-1, 1), self.data[:, 11].reshape(-1, 1)))print(data1.shape)prdata=processdata(fp)#定義了一個類對象來實現這些方法 print('(a):') prdata.lookupdata(2000,1,'gdp') print('(b):') prdata.lookupdata(2000,1,'pop') print('(c):') prdata.lookupdata(2020,1,'gdp') print('(d):') prdata.lookupdata(2000,6,'pop') print('(e):') prdata.calPerCapitaGDP(2000,1) print('(f):') flatdata=prdata.flattendata()np.savetxt(op,flatdata,delimiter=',',fmt='%s')#保存展平后的結果print('End!') input('按回車鍵結束')

總結

以上是生活随笔為你收集整理的数据科学与python语言实验——NumPy数值计算基础的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： 618维权丨淘宝一网店被投诉挂名牌卖仿鞋
下一篇：数据科学与python语言——Matpl