PyTorch


基础

导入重要库

import numpy as np
np.__version__
'1.20.1'
import torch
torch.__version__
'1.11.0'
import matplotlib.pyplot as plt

测试GPU

torch.cuda.is_available()
False

如果返回False

  • 确认机器是否有支持CUDA的GPU;
  • 使用nvidia-smi检查驱动版本:
    • 如果没有nvidia-smi命令说明没有驱动;
    • 如果驱动版本较低与CUDA不匹配,则需要更新驱动
  • 如果PyTorch与CUDA版本不匹配,则更新PyTorch或CUDA;

numpy基础

创建ndarray

拷贝

深拷贝

a = np.array([[1, 2], [3, 4]])
a
array([[1, 2],
       [3, 4]])

浅拷贝

np.asarray(a)
array([[1, 2],
       [3, 4]])

np.ones()

np.ones((2, 2), dtype='int8')*6
array([[6, 6],
       [6, 6]], dtype=int8)

np.zeros()

np.zeros((2, 2), dtype='int8')+3
array([[3, 3],
       [3, 3]], dtype=int8)

np.arange()

np.arange(5)
array([0, 1, 2, 3, 4])
np.arange(2, 5)
array([2, 3, 4])
np.arange(2, 9, 3)
array([2, 5, 8])
np.arange(2, 9, 0.3)
array([2. , 2.3, 2.6, 2.9, 3.2, 3.5, 3.8, 4.1, 4.4, 4.7, 5. , 5.3, 5.6,
       5.9, 6.2, 6.5, 6.8, 7.1, 7.4, 7.7, 8. , 8.3, 8.6, 8.9])

np.linspace()

np.linspace(start=2, stop=10, num=3)
array([ 2.,  6., 10.])

属性

ndim

a.ndim
2

shape

a.shape
(2, 2)
np.reshape(a, (1, 4))
array([[1, 2, 3, 4]])

size

a.size
4

dtype

例如 int8、int16、int32、float32、float64、bool等

a.dtype
dtype('int64')
a.astype('float32')
array([[1., 2.],
       [3., 4.]], dtype=float32)

常用方法

axis指定被聚合的维度

view()

浅拷贝

a.view()
array([[1, 2],
       [3, 4]])

copy()

深拷贝

a.copy()
array([[1, 2],
       [3, 4]])

np.max()&np.argmax()

np.max(a, axis=1)
array([2, 4])
np.argmax(a, axis=1)
array([1, 1])

np.min()&np.argmin()

np.min(a, axis=1)
array([1, 3])
np.argmin(a, axis=1)
array([0, 0])

np.sort()&np.argsort()

np.sort(a, axis=1)
array([[1, 2],
       [3, 4]])
np.argsort(a, axis=1)
array([[0, 1],
       [0, 1]])

np.sum()

np.sum(a, axis=1)
array([3, 7])

np.concatenate()

np.concatenate((a, a), axis=1)
array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

np.newaxis

a[:,:,np.newaxis]
array([[[1],
        [2]],

       [[3],
        [4]]])

Tensor基础

Scalar、Vector、Matrix、Tensor都是Tensor

常见类型:torch.float32、torch.float64、torch.uint8、torch.int64

创建

torch.tensor()

torch.tensor(data, dtype=None, device=None,requires_grad=False)

t = torch.tensor([[1, 2], [3, 4]])
t
tensor([[1, 2],
        [3, 4]])

torch.Tensor

指定浮点型

torch.Tensor([3, 4])
tensor([3., 4.])

torch.from_numpy()

torch.from_numpy(ndarry)

torch.from_numpy(a)
tensor([[1, 2],
        [3, 4]])

torch.zeros()

torch.zeros(*size, dtype=None…)

torch.zeros(3, 3)
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

torch.eye()

torch.eye(size, dtype=None…)

torch.eye(3, 3)
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

torch.ones()

torch.ones(size, dtype=None…)

torch.ones(3, 3)
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

torch.rand()

0~1均匀分布

torch.rand(3, 3)
tensor([[0.4539, 0.1813, 0.1875],
        [0.6278, 0.4813, 0.1640],
        [0.9915, 0.4590, 0.1938]])

torch.randn()

正态分布

torch.randn(3, 3)
tensor([[ 0.2320, -0.0949, -1.0982],
        [ 1.0619,  2.2982, -0.0982],
        [-0.3590, -1.4509,  1.8511]])

torch.normal()

torch.normal(mean, std, size)
指定均值和方差的正太分布

torch.normal(10, 3, (3, 3))
tensor([[ 8.9341, 14.7398, 10.1189],
        [ 9.6228,  9.5262, 12.9245],
        [ 7.1244, 10.4619,  4.8592]])

torch.randint()

指定整数范围随机[low,high)

torch.randint(10, (5,))
tensor([6, 7, 1, 4, 3])
torch.randint(-10, 10, (5,))
tensor([-9,  6, -2, -5, -5])

提取

item()

torch.tensor(2).item()
2

numpy()

t.numpy()
array([[1, 2],
       [3, 4]])

tolist()

t.numpy().tolist()
[[1, 2], [3, 4]]

cuda()

t.cuda()

cpu()

t.cpu()
tensor([[1, 2],
        [3, 4]])

形状及操作

shape

t.shape
torch.Size([2, 2])

size()

t.size()
torch.Size([2, 2])

numel()

统计数量

t.numel()
4

permute()

z转置可能让数据变得不再连续
可以交换所有维度

t.permute(1, 0)
tensor([[1, 3],
        [2, 4]])

transpose()

只能交换两个维度

t.transpose(1, 0)
tensor([[1, 3],
        [2, 4]])

view()

改变形状,只能处理连续存储的情况

t.view(1,4)
tensor([[1, 2, 3, 4]])
t.transpose(1, 0).view(1, 4)
---------------------------

RuntimeErrorTraceback (most recent call last)

<ipython-input-53-9e55d885aeb2> in <module>
----> 1 t.transpose(1, 0).view(1, 4)


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

reshape()

处理连续存储的情况同view,不连续的情况重新创建数据

t.transpose(1, 0).reshape(1, 4)
tensor([[1, 3, 2, 4]])

squeeze()

torch.rand(3,1,3).squeeze(0).shape
torch.Size([3, 1, 3])
torch.rand(3,1,3).squeeze(1).shape
torch.Size([3, 3])

unsqueeze()

torch.rand(3,3).unsqueeze(2).shape
torch.Size([3, 3, 1])
torch.rand(3,3).unsqueeze(0).unsqueeze(2).shape
torch.Size([1, 3, 1, 3])

常用方法

torch.cat()

torch.cat(tensors, dim = 0, out = None)

torch.cat((torch.rand(3,3),torch.rand(3,3)),dim=0).shape
torch.Size([6, 3])
torch.cat((torch.rand(3,3),torch.rand(3,3)),dim=1).shape
torch.Size([3, 6])

torch.stack()

torch.stack(inputs, dim=0)

torch.stack((torch.rand(3,3),torch.rand(3,3)),dim=0).shape
torch.Size([2, 3, 3])
torch.stack((torch.rand(3,3),torch.rand(3,3)),dim=2).shape
torch.Size([3, 3, 2])

torch.chunk()

torch.chunk(input, chunks, dim=0)

torch.chunk(torch.rand(7,3), 3, dim=0)
(tensor([[0.0078, 0.0176, 0.2839],
         [0.1064, 0.8287, 0.6433],
         [0.3821, 0.2781, 0.1552]]),
 tensor([[0.5985, 0.4576, 0.4888],
         [0.1841, 0.3461, 0.1023],
         [0.7858, 0.2326, 0.2193]]),
 tensor([[0.1304, 0.2903, 0.9171]]))

torch.split()

torch.split(tensor, split_size_or_sections, dim=0)

torch.split(torch.rand(7,3), 3, dim=0)
(tensor([[0.8669, 0.7608, 0.4389],
         [0.7666, 0.1656, 0.5473],
         [0.5230, 0.2720, 0.0568]]),
 tensor([[0.0176, 0.3546, 0.8935],
         [0.7100, 0.9868, 0.9970],
         [0.3909, 0.3361, 0.6061]]),
 tensor([[0.5709, 0.4006, 0.3961]]))
torch.split(torch.rand(7,3), 2, dim=0)
(tensor([[0.5106, 0.2453, 0.9660],
         [0.3012, 0.6863, 0.7366]]),
 tensor([[0.5543, 0.5786, 0.6808],
         [0.9212, 0.9496, 0.5799]]),
 tensor([[0.5509, 0.8643, 0.6333],
         [0.4321, 0.4174, 0.4292]]),
 tensor([[0.5197, 0.9226, 0.5475]]))
torch.split(torch.rand(7,4), (4, 3), dim=0)
(tensor([[0.1663, 0.2489, 0.9874, 0.7570],
         [0.8650, 0.9872, 0.4446, 0.3961],
         [0.1678, 0.0448, 0.8555, 0.9096],
         [0.9019, 0.0997, 0.2572, 0.0637]]),
 tensor([[0.0106, 0.4245, 0.2285, 0.2700],
         [0.4306, 0.1111, 0.4926, 0.5459],
         [0.4850, 0.4326, 0.0123, 0.9814]]))

unbind()

torch.unbind(input, dim=0)

直接解开一个维度

torch.unbind(torch.rand(3,4), dim=0)
(tensor([0.0992, 0.3381, 0.6153, 0.9314]),
 tensor([0.7412, 0.9560, 0.3918, 0.7036]),
 tensor([0.8900, 0.5912, 0.4202, 0.4862]))

index_select

torch.index_select(tensor, dim, index)

当维度过多时可以代替索引

torch.index_select(torch.rand(3,3,3), dim=0, index=torch.tensor([0,2]))
tensor([[[0.7024, 0.0431, 0.5123],
         [0.3802, 0.1955, 0.8054],
         [0.5484, 0.9898, 0.4589]],

        [[0.7791, 0.0389, 0.8322],
         [0.1915, 0.6176, 0.9362],
         [0.7712, 0.2995, 0.0981]]])

masked_select

torch.masked_select(input, mask, out=None)

test = torch.rand(10)
torch.masked_select(test, test>0.5)
tensor([0.7480, 0.5427, 0.5509, 0.5230, 0.6500, 0.7832, 0.9810, 0.7644])
test[test>0.5]
tensor([0.7480, 0.5427, 0.5509, 0.5230, 0.6500, 0.7832, 0.9810, 0.7644])

数据操作

Dataset

from torch.utils.data import Dataset

自定义数据集

class MyDataset(Dataset):
    # 构造函数
    def __init__(self, data_tensor, target_tensor):
        self.data_tensor = data_tensor
        self.target_tensor = target_tensor
    # 返回数据集大小
    def __len__(self):
        return self.data_tensor.size(0)
    # 返回索引的数据与标签
    def __getitem__(self, index):
        return self.data_tensor[index], self.target_tensor[index]
my_dataset = MyDataset(torch.randn(10, 3), torch.randn(10))
my_dataset.__len__()
10
my_dataset[2]
(tensor([-0.0947, -0.5704,  0.1172]), tensor(-0.2017))

DataLoader

  • 迭代器
  • 实现多进程、数据打乱、batch
from torch.utils.data import DataLoader
tensor_dataloader = DataLoader(dataset=my_dataset, # 传入的数据集, 必须参数
                               batch_size=3,       # 输出的batch大小
                               shuffle=True,       # 数据是否打乱
                               num_workers=0)      # 进程数, 0表示只有主进程
# 以循环形式输出
for data, target in tensor_dataloader: 
    print(data, target)
tensor([[-2.1315, -0.3849, -0.0614],
        [-1.2755, -0.0085,  1.1822],
        [-0.3820,  1.0813,  1.3424]]) tensor([0.8899, 0.4688, 1.5528])
tensor([[-0.0947, -0.5704,  0.1172],
        [ 0.3214, -2.5288,  0.4268],
        [-1.8927, -0.8425, -1.3250]]) tensor([-0.2017,  0.4900,  1.3065])
tensor([[ 1.2582,  0.3975,  0.1769],
        [-0.4028, -0.5276,  1.0662],
        [ 0.0277,  1.3796,  0.4556]]) tensor([ 0.8842,  0.6022, -0.8440])
tensor([[-0.9969,  1.3454,  0.0579]]) tensor([-1.5651])
# 输出一个batch
print('One batch tensor data: ', iter(tensor_dataloader).next())
One batch tensor data:  [tensor([[-0.0947, -0.5704,  0.1172],
        [ 0.0277,  1.3796,  0.4556],
        [ 0.3214, -2.5288,  0.4268]]), tensor([-0.2017, -0.8440,  0.4900])]

Torchvision

常用数据集 + 常见网络模型 + 常用图像处理方法

torchvision.datasets

常用数据集

import torchvision
from torchvision import transforms
# 以MNIST为例
mnist_dataset = torchvision.datasets.MNIST(root='./data',
                                       train=True,
                                       transform=None,
                                       target_transform=None,
                                       download=True)
test = list(mnist_dataset)[0][0]
test

png

torchvision.transforms

数据预处理

tt = transforms.ToTensor()
tt(test)
test = transforms.Resize((200,200))(test)
test

png

transforms.CenterCrop((100, 100))(test)

png

torchvision.transforms.RandomCrop((100, 100))(test)

png

torchvision.transforms.FiveCrop((100, 100))(test)[0]

png

# p用于设定反转概率
transforms.RandomHorizontalFlip(p=0.8)(test)

png

torchvision.transforms.RandomVerticalFlip(p=0.5)(test)

png

# 只对 Tensor 进行变换的操作
transforms.ToPILImage()(transforms.Normalize(-1, 1)(tt(test)))

png

# 定义组合操作
composed = transforms.Compose([transforms.Resize((200, 200)),
                               transforms.RandomCrop(80)])
# 组合操作后的图
composed(test)

png

transforms.Compose([
    transforms.RandomResizedCrop((200, 200)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
Compose(
    RandomResizedCrop(size=(200, 200), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
    RandomHorizontalFlip(p=0.5)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

torchvision.models

import torchvision.models as models
googlenet = models.googlenet(pretrained=True)

torchvision.utils

# 加载MNIST数据集
mnist_dataset = torchvision.datasets.MNIST(root='./data',
                               train=False,
                               transform=transforms.ToTensor(),
                               target_transform=None,
                               download=True)
# 取32张图片的tensor
tensor_dataloader = DataLoader(dataset=mnist_dataset,
                               batch_size=32)
data_iter = iter(tensor_dataloader)
img_tensor, label_tensor = data_iter.next()
print(img_tensor.shape)
torch.Size([32, 1, 28, 28])
googlenet(img_tensor.repeat(1,3,1,1))
# 将32张图片拼接在一个网格中
grid_tensor = torchvision.utils.make_grid(img_tensor, nrow=8, padding=2)
transforms.ToPILImage()(grid_tensor)

png

# 输入为一张图片的tensor 直接保存
torchvision.utils.save_image(grid_tensor, 'grid.jpg')

# 输入为List 调用grid_img函数后保存
torchvision.utils.save_image(img_tensor, 'grid2.jpg', nrow=5, padding=2)

神经网络

卷积

Conv2d类

import torch.nn as nn
input_feat = torch.tensor([[4, 1, 7, 5], [4, 4, 2, 5], [7, 7, 2, 4], [1, 0, 2, 4]], dtype=torch.float32).unsqueeze(0).unsqueeze(0)
print(input_feat)
print(input_feat.shape)
tensor([[[[4., 1., 7., 5.],
          [4., 4., 2., 5.],
          [7., 7., 2., 4.],
          [1., 0., 2., 4.]]]])
torch.Size([1, 1, 4, 4])
conv2d = nn.Conv2d(1, 1, (2, 2), stride=1, padding=1, bias=False)
# 卷积核要有四个维度(输入通道数,输出通道数,高,宽)
kernels = torch.tensor([[[[1, 0], [2, 1]]]], dtype=torch.float32)
conv2d.weight = nn.Parameter(kernels, requires_grad=False)
print(conv2d.weight)
print(conv2d.bias)
Parameter containing:
tensor([[[[1., 0.],
          [2., 1.]]]])
None
conv2d(input_feat)
tensor([[[[ 4.,  9.,  9., 19., 10.],
          [ 4., 16., 11., 16., 15.],
          [ 7., 25., 20., 10., 13.],
          [ 1.,  9.,  9., 10., 12.],
          [ 0.,  1.,  0.,  2.,  4.]]]])

深度可分离卷积(Depthwise Separable Convolution)

  • groups 参数的作用就是控制输入特征图与输出特征图的分组情况
  • groups 不为 1 的时候,groups 必须能整除 in_channels 和 out_channels
  • 当 groups 等于 in_channels 时,就是DW卷积
# 生成一个三通道的5x5特征图
x = torch.rand((3, 5, 5)).unsqueeze(0)
print(x.shape)
# 输出:
# torch.Size([1, 3, 5, 5])
# 请注意DW中,输入特征通道数与输出通道数是一样的
in_channels_dw = x.shape[1]
out_channels_dw = x.shape[1]
# 一般来讲DW卷积的kernel size为3
kernel_size = 3
stride = 1
# DW卷积groups参数与输入通道数一样
dw = nn.Conv2d(in_channels_dw, out_channels_dw, kernel_size, stride, groups=in_channels_dw)
torch.Size([1, 3, 5, 5])
in_channels_pw = out_channels_dw
out_channels_pw = 4
kernel_size_pw = 1
pw = nn.Conv2d(in_channels_pw, out_channels_pw, kernel_size_pw, stride)
out = pw(dw(x))
print(out.shape)
torch.Size([1, 4, 3, 3])

空洞卷积dilation

nn.Conv2d(2,2,3,dilation=2)
Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), dilation=(2, 2))

分布式训练

基本信息

# 是否有GPU
torch.cuda.is_available()
False
# GPU的数量
torch.cuda.device_count()
0
# 将CPU及各个GPU设置成实例,多个GPU可以通过序号区分
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

迁移

  • 数据和模型都可以通过to()在CPU和各个GPU之间迁移
data = torch.ones((3, 3))
print(data.device)

# 将data推到gpu上
data_gpu = data.to(device)
print(data_gpu.device)
cpu
cpu

DataParallel

  • torch.nn.DataParallel(module, device_ids=None, output_device=None, dim=0)
  • 单进程自动实现数据并行
import os
class ASimpleNet(nn.Module):
    def __init__(self, layers=3):
        super(ASimpleNet, self).__init__()
        self.linears = nn.Sequential(*[nn.Linear(3, 3, bias=False) for i in range(layers)])
    def forward(self, x):
        print("forward batchsize is: {}".format(x.size()[0]))
        x = self.linears(x)
        x = torch.relu(x)
        return x
        
batch_size = 16
inputs = torch.randn(batch_size, 3)
labels = torch.randn(batch_size, 3)
inputs, labels = inputs.to(device), labels.to(device)
net = ASimpleNet()
net = nn.DataParallel(net)
net.to(device)
print("CUDA_VISIBLE_DEVICES :{}".format(os.environ["CUDA_VISIBLE_DEVICES"]))

for epoch in range(1):
    outputs = net(inputs)
forward batchsize is: 16

可视化

Tensorboard

import random
import numpy as np
import torch
from torch import nn

# 模型定义
class LinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(1))
        self.bias = nn.Parameter(torch.randn(1))

    def forward(self, input):
        return (input * self.weight) + self.bias

# 数据
w = 2
b = 3
xlim = [-10, 10]
x_train = np.random.randint(low=xlim[0], high=xlim[1], size=30)
y_train = [w * x + b + random.randint(0,2) for x in x_train]
# Tensorboard
from tensorboardX import SummaryWriter
# from torch.utils.tensorboard import SummaryWriter

# 训练
model = LinearModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
y_train = torch.tensor(y_train, dtype=torch.float32)

writer = SummaryWriter()

for n_iter in range(500):
    input = torch.from_numpy(x_train)
    output = model(input)
    loss = nn.MSELoss()(output, y_train)
    model.zero_grad()
    loss.backward()
    optimizer.step()
    writer.add_scalar('Loss/train', loss, n_iter)

Visdom

from visdom import Visdom
import numpy as np
import time

# 将窗口类实例化
viz = Visdom() 
# 创建窗口并初始化
viz.line([0.], [0], win='train_loss', opts=dict(title='train_loss'))

for n_iter in range(10):
    # 随机获取loss值
    loss = 0.2 * np.random.randn() + 1
    # 更新窗口图像
    viz.line([loss], [n_iter], win='train_loss', update='append')
    time.sleep(0.5)

img = np.zeros((3, 100, 100))
img[0] = np.arange(0, 10000).reshape(100, 100) / 10000
img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000
# 可视化图像
viz.image(img)
Setting up a new session...





'window_3ac152bac92404'
from visdom import Visdom
import numpy as np

# 训练
model = LinearModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
y_train = torch.tensor(y_train, dtype=torch.float32)

# 实例化一个窗口
viz = Visdom(port=8097)
# 初始化窗口的信息
viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))

for n_iter in range(500):
    input = torch.from_numpy(x_train)
    output = model(input)
    loss = nn.MSELoss()(output, y_train)
    model.zero_grad()
    loss.backward()
    optimizer.step()
    # 更新监听的信息
    viz.line([loss.item()], [n_iter], win='train_loss', update='append')
<ipython-input-128-20de72c468e0>:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  y_train = torch.tensor(y_train, dtype=torch.float32)
Setting up a new session...
viz = Visdom()
viz.line([[0., 0.]], [0], win='train', opts=dict(title='loss_accuracy', legend=['loss', 'acc']))

for n_iter in range(10):
    loss = 0.2 * np.random.randn() + 1
    accuracy = 0.1 * np.random.randn() + 1
    viz.line([[loss, accuracy]], [n_iter], win='train', update='append')
    time.sleep(0.5)
Setting up a new session...
False

文章作者: 万川
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 万川 !
  目录