當前位置：首頁 > 人工智能 > pytorch >内容正文

pytorch

深度学习-Tensorflow2.2-图像处理{10}-图像定位/优化/图运算/及GPU优化等-22

發布時間：2024/9/15 pytorch 30 豆豆

生活随笔收集整理的這篇文章主要介紹了深度学习-Tensorflow2.2-图像处理{10}-图像定位/优化/图运算/及GPU优化等-22 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

常見圖像處理的任務

圖像定位

圖像定位網絡架構

Oxford-IIIT數據集

代碼

import tensorflow as tf import matplotlib.pyplot as plt %matplotlib inline from lxml import etree import numpy as np import glob from matplotlib.patches import Rectangle import os gpus = tf.config.experimental.list_physical_devices('GPU') if gpus:tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2000)]) gpu_ok = tf.test.is_gpu_available() print("tf version:", tf.__version__) print("use GPU", gpu_ok) # 判斷是否使用gpu進行訓練

圖像定位示例

# 讀取一張圖片 img = tf.io.read_file("F:/py/ziliao/數據集/圖片定位與分割數據集/images/Abyssinian_1.jpg") img = tf.image.decode_jpeg(img)# 解碼 img.shape

plt.imshow(img)

# 讀取頭部位置軸的文件 xml = open("F:/py/ziliao/數據集/圖片定位與分割數據集/annotations/xmls/Abyssinian_1.xml").read() sel = etree.HTML(xml) # 頁面解析取出寬度和高度 width = int(sel.xpath('//size/width/text()')[0]) height = int(sel.xpath('//size/height/text()')[0]) # 取出原定義好的頭部位置值 xmin = int(sel.xpath('//bndbox/xmin/text()')[0]) xmax = int(sel.xpath('//bndbox/xmax/text()')[0]) ymin = int(sel.xpath('//bndbox/ymin/text()')[0]) ymax = int(sel.xpath('//bndbox/ymax/text()')[0]) print((width,height),(xmin,xmax,ymin,ymax))

# 繪制矩形框 plt.imshow(img) rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill = False,color="red") # 給定 x，y 最小值寬度=x軸最大值減去x軸最小值 # 高度=y軸最大值減去y軸最小值 fill = False 不填充color="red" 紅色框ax = plt.gca() # 把框花在當前圖像上 ax.axes.add_patch(rect) # 添加矩形框

改變圖片大小并繪制頭像框

# 讀取一張圖片 img1 = tf.io.read_file("F:/py/ziliao/數據集/圖片定位與分割數據集/images/Abyssinian_1.jpg") img1 = tf.image.decode_jpeg(img1)# 解碼 img1.shape # 頁面解析取出寬度和高度 width = int(sel.xpath('//size/width/text()')[0]) height = int(sel.xpath('//size/height/text()')[0]) # 取出原定義好的頭部位置值 xmin = int(sel.xpath('//bndbox/xmin/text()')[0]) xmax = int(sel.xpath('//bndbox/xmax/text()')[0]) ymin = int(sel.xpath('//bndbox/ymin/text()')[0]) ymax = int(sel.xpath('//bndbox/ymax/text()')[0]) print((width,height),(xmin,xmax,ymin,ymax)) # 因圖像大小不一所以我們需要對圖像進行變形 img1 = tf.image.resize(img1,[224,224]) #改變形狀 img1 = img1/255 # 歸一化 plt.imshow(img1) # 繪制變形后的圖像

# 因圖像大小改變了，所以需要對頭部定位數據進行比值變化 # 根據最初圖片的高，寬，及 x軸和y軸最大最小值及改變后圖片的像素值進行比例計算 xmin = (xmin/width)*224 ymin = (ymin/height)*224 xmax = (xmax/width)*224 ymax = (ymax/height)*224 # 繪制矩形框 plt.imshow(img1) rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill = False,color="red") # 給定 x，y 最小值寬度=x軸最大值減去x軸最小值 # 高度=y軸最大值減去y軸最小值 fill = False 不填充color="red" 紅色框ax = plt.gca() # 把框花在當前圖像上 ax.axes.add_patch(rect) # 添加矩形框

創建輸入管道

# 讀取該路徑下的所有jpg后綴的文件 images = glob.glob("F:/py/ziliao/數據集/圖片定位與分割數據集/images/*.jpg") len(images)

images[:5]

# 獲取頭部目標值 xmls = glob.glob("F:/py/ziliao/數據集/圖片定位與分割數據集/annotations/xmls/*.xml") len(xmls) # 因為定位好的頭部數據只有3686份，目標值和圖像名字是對應的使用目標值名字到圖片路徑提取出對應的圖像

xmls[-5:]

names = [x.split("\\")[-1].split(".")[-2] for x in xmls] # 通過切割提取出名稱

# 對images中所有圖片路徑進行迭代如果分割后的路徑在names中存在就保留 imgs_train = [img for img in images if img.split("\\")[-1].split(".")[-2] in names] len(imgs_train)

imgs_train[-5:]

imgs_train.sort(key=lambda x:x.split("\\")[-1].split(".")[-2]) # 排序防止文件不對應 xmls.sort(key=lambda x:x.split("\\")[-1].split(".")[-2]) def to_labels(path):xml = open("{}".format(path)).read()sel = etree.HTML(xml)# 頁面解析取出寬度和高度width = int(sel.xpath('//size/width/text()')[0])height = int(sel.xpath('//size/height/text()')[0])# 取出原定義好的頭部位置值xmin = int(sel.xpath('//bndbox/xmin/text()')[0])xmax = int(sel.xpath('//bndbox/xmax/text()')[0])ymin = int(sel.xpath('//bndbox/ymin/text()')[0])ymax = int(sel.xpath('//bndbox/ymax/text()')[0])return [xmin/width,ymin/height,xmax/width,ymax/height] labels = [to_labels(path) for path in xmls]

out1,out2,out3,out4 = list(zip(*labels)) out1[0],out2[0],out3[0],out4[0]

out1 = np.array(out1) out2 = np.array(out2) out3 = np.array(out3) out4 = np.array(out4) # 創建label的數據集 label_dataset = tf.data.Dataset.from_tensor_slices((out1,out2,out3,out4))

def load_image(path):img = tf.io.read_file(path)img = tf.image.decode_jpeg(img,channels=3)img = tf.image.resize(img,[224,224])img = img/127.5 - 1 # 圖片像素范圍是0-255 我們用255的一半在減去1 歸一化到0-1之間return img image_dataset = tf.data.Dataset.from_tensor_slices(imgs_train) image_dataset = image_dataset.map(load_image) dataset = tf.data.Dataset.zip((image_dataset,label_dataset)) # 合并數據

dataset = dataset.repeat().shuffle(len(imgs_train)).batch(32) # 取出一張圖片一份label 進行頭像框繪制 for img,labels in dataset.take(1):plt.imshow(tf.keras.preprocessing.image.array_to_img(img[0]))out1,out2,out3,out4 = labelsxmin,ymin,xmax,ymax = out1[0].numpy()*224,out2[0].numpy()*224,out3[0].numpy()*224,out4[0].numpy()*224rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill = False,color="red")ax = plt.gca() # 把框畫在當前圖像上ax.axes.add_patch(rect) # 添加矩形框plt.show()

# 劃分數據集 test_count = int(len(imgs_train)*0.2) train_count = len(imgs_train)-test_count test_count,train_count

dataset_train =dataset.skip(test_count) dataset_test = dataset.take(test_count)

創建圖像定位的模型

xception = tf.keras.applications.Xception(weights="imagenet",include_top = False,input_shape=(224,224,3)) inputs = tf.keras.layers.Input(shape=(224,224,3)) x = xception(inputs) x = tf.keras.layers.GlobalAveragePooling2D()(x) x = tf.keras.layers.Dense(2048,activation = "relu")(x) x = tf.keras.layers.Dense(256,activation="relu")(x)out1 = tf.keras.layers.Dense(1)(x) out2 = tf.keras.layers.Dense(1)(x) out3 = tf.keras.layers.Dense(1)(x) out4 = tf.keras.layers.Dense(1)(x)prediction = [out1,out2,out3,out4] model = tf.keras.models.Model(inputs=inputs,outputs=prediction) # 編譯模型 model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001),loss="mse",metrics=["mae"] ) # 訓練步數 EPOCHS = 20 BATCH_SIZE = 4 STEPS_PER_EPOCH = train_count//BATCH_SIZE VALIDATION_STEPS = test_count//BATCH_SIZE # 訓練 history = model.fit(dataset_train,epochs=EPOCHS,steps_per_epoch=STEPS_PER_EPOCH,validation_steps=VALIDATION_STEPS,validation_data=dataset_test )

# 保存模型:此方法保存以下所有內容： # 1.權重值 2.模型配置（架構） 3.優化器配置 model.save("./save/txfg.h5") new_model = tf.keras.models.load_model("./save/txfg.h5") # 預測結果（因顯存不夠只訓練了5次所以誤差較大） plt.figure(figsize=(8,24)) for img,_ in dataset_test.take(1):out1,out2,out3,out4 = new_model.predict(img)for i in range(3):plt.subplot(3,1,i+1)plt.imshow(tf.keras.preprocessing.image.array_to_img(img[i]))xmin,ymin,xmax,ymax = out1[i]*224,out2[i]*224,out3[i]*224,out4[i]*224rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill = False,color="red")ax = plt.gca() # 把框畫在當前圖像上ax.axes.add_patch(rect) # 添加矩形框

圖像定位優化

圖像定位的應用

圖運算模式

我們只需要在最后一個函數上添加 @tf.function 這樣構成了一個圖運算就會提高讀取圖片的效率

GPU的使用與分配

獲得當前主機上運算設備列表

列出當前設備的所有gpu，gpus[0:2]表示只對前兩塊可見

顯存使用策略

總結

以上是生活随笔為你收集整理的深度学习-Tensorflow2.2-图像处理{10}-图像定位/优化/图运算/及GPU优化等-22的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：深度学习-Tensorflow2.2-模
下一篇：深度学习-Tensorflow2.2-图