基础
导入重要库
import numpy as np
np.__version__
'1.20.1'
import torch
torch.__version__
'1.11.0'
import matplotlib.pyplot as plt
测试GPU
torch.cuda.is_available()
False
如果返回False
- 确认机器是否有支持CUDA的GPU;
- 使用nvidia-smi检查驱动版本:
- 如果没有nvidia-smi命令说明没有驱动;
- 如果驱动版本较低与CUDA不匹配,则需要更新驱动;
- 如果PyTorch与CUDA版本不匹配,则更新PyTorch或CUDA;
numpy基础
创建ndarray
拷贝
深拷贝
a = np.array([[1, 2], [3, 4]])
a
array([[1, 2],
[3, 4]])
浅拷贝
np.asarray(a)
array([[1, 2],
[3, 4]])
np.ones()
np.ones((2, 2), dtype='int8')*6
array([[6, 6],
[6, 6]], dtype=int8)
np.zeros()
np.zeros((2, 2), dtype='int8')+3
array([[3, 3],
[3, 3]], dtype=int8)
np.arange()
np.arange(5)
array([0, 1, 2, 3, 4])
np.arange(2, 5)
array([2, 3, 4])
np.arange(2, 9, 3)
array([2, 5, 8])
np.arange(2, 9, 0.3)
array([2. , 2.3, 2.6, 2.9, 3.2, 3.5, 3.8, 4.1, 4.4, 4.7, 5. , 5.3, 5.6,
5.9, 6.2, 6.5, 6.8, 7.1, 7.4, 7.7, 8. , 8.3, 8.6, 8.9])
np.linspace()
np.linspace(start=2, stop=10, num=3)
array([ 2., 6., 10.])
属性
ndim
a.ndim
2
shape
a.shape
(2, 2)
np.reshape(a, (1, 4))
array([[1, 2, 3, 4]])
size
a.size
4
dtype
例如 int8、int16、int32、float32、float64、bool等
a.dtype
dtype('int64')
a.astype('float32')
array([[1., 2.],
[3., 4.]], dtype=float32)
常用方法
axis指定被聚合的维度
view()
浅拷贝
a.view()
array([[1, 2],
[3, 4]])
copy()
深拷贝
a.copy()
array([[1, 2],
[3, 4]])
np.max()&np.argmax()
np.max(a, axis=1)
array([2, 4])
np.argmax(a, axis=1)
array([1, 1])
np.min()&np.argmin()
np.min(a, axis=1)
array([1, 3])
np.argmin(a, axis=1)
array([0, 0])
np.sort()&np.argsort()
np.sort(a, axis=1)
array([[1, 2],
[3, 4]])
np.argsort(a, axis=1)
array([[0, 1],
[0, 1]])
np.sum()
np.sum(a, axis=1)
array([3, 7])
np.concatenate()
np.concatenate((a, a), axis=1)
array([[1, 2, 1, 2],
[3, 4, 3, 4]])
np.newaxis
a[:,:,np.newaxis]
array([[[1],
[2]],
[[3],
[4]]])
Tensor基础
Scalar、Vector、Matrix、Tensor都是Tensor
常见类型:torch.float32、torch.float64、torch.uint8、torch.int64
创建
torch.tensor()
torch.tensor(data, dtype=None, device=None,requires_grad=False)
t = torch.tensor([[1, 2], [3, 4]])
t
tensor([[1, 2],
[3, 4]])
torch.Tensor
指定浮点型
torch.Tensor([3, 4])
tensor([3., 4.])
torch.from_numpy()
torch.from_numpy(ndarry)
torch.from_numpy(a)
tensor([[1, 2],
[3, 4]])
torch.zeros()
torch.zeros(*size, dtype=None…)
torch.zeros(3, 3)
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
torch.eye()
torch.eye(size, dtype=None…)
torch.eye(3, 3)
tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
torch.ones()
torch.ones(size, dtype=None…)
torch.ones(3, 3)
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
torch.rand()
0~1均匀分布
torch.rand(3, 3)
tensor([[0.4539, 0.1813, 0.1875],
[0.6278, 0.4813, 0.1640],
[0.9915, 0.4590, 0.1938]])
torch.randn()
正态分布
torch.randn(3, 3)
tensor([[ 0.2320, -0.0949, -1.0982],
[ 1.0619, 2.2982, -0.0982],
[-0.3590, -1.4509, 1.8511]])
torch.normal()
torch.normal(mean, std, size)
指定均值和方差的正太分布
torch.normal(10, 3, (3, 3))
tensor([[ 8.9341, 14.7398, 10.1189],
[ 9.6228, 9.5262, 12.9245],
[ 7.1244, 10.4619, 4.8592]])
torch.randint()
指定整数范围随机[low,high)
torch.randint(10, (5,))
tensor([6, 7, 1, 4, 3])
torch.randint(-10, 10, (5,))
tensor([-9, 6, -2, -5, -5])
提取
item()
torch.tensor(2).item()
2
numpy()
t.numpy()
array([[1, 2],
[3, 4]])
tolist()
t.numpy().tolist()
[[1, 2], [3, 4]]
cuda()
t.cuda()
cpu()
t.cpu()
tensor([[1, 2],
[3, 4]])
形状及操作
shape
t.shape
torch.Size([2, 2])
size()
t.size()
torch.Size([2, 2])
numel()
统计数量
t.numel()
4
permute()
z转置可能让数据变得不再连续
可以交换所有维度
t.permute(1, 0)
tensor([[1, 3],
[2, 4]])
transpose()
只能交换两个维度
t.transpose(1, 0)
tensor([[1, 3],
[2, 4]])
view()
改变形状,只能处理连续存储的情况
t.view(1,4)
tensor([[1, 2, 3, 4]])
t.transpose(1, 0).view(1, 4)
---------------------------
RuntimeErrorTraceback (most recent call last)
<ipython-input-53-9e55d885aeb2> in <module>
----> 1 t.transpose(1, 0).view(1, 4)
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
reshape()
处理连续存储的情况同view,不连续的情况重新创建数据
t.transpose(1, 0).reshape(1, 4)
tensor([[1, 3, 2, 4]])
squeeze()
torch.rand(3,1,3).squeeze(0).shape
torch.Size([3, 1, 3])
torch.rand(3,1,3).squeeze(1).shape
torch.Size([3, 3])
unsqueeze()
torch.rand(3,3).unsqueeze(2).shape
torch.Size([3, 3, 1])
torch.rand(3,3).unsqueeze(0).unsqueeze(2).shape
torch.Size([1, 3, 1, 3])
常用方法
torch.cat()
torch.cat(tensors, dim = 0, out = None)
torch.cat((torch.rand(3,3),torch.rand(3,3)),dim=0).shape
torch.Size([6, 3])
torch.cat((torch.rand(3,3),torch.rand(3,3)),dim=1).shape
torch.Size([3, 6])
torch.stack()
torch.stack(inputs, dim=0)
torch.stack((torch.rand(3,3),torch.rand(3,3)),dim=0).shape
torch.Size([2, 3, 3])
torch.stack((torch.rand(3,3),torch.rand(3,3)),dim=2).shape
torch.Size([3, 3, 2])
torch.chunk()
torch.chunk(input, chunks, dim=0)
torch.chunk(torch.rand(7,3), 3, dim=0)
(tensor([[0.0078, 0.0176, 0.2839],
[0.1064, 0.8287, 0.6433],
[0.3821, 0.2781, 0.1552]]),
tensor([[0.5985, 0.4576, 0.4888],
[0.1841, 0.3461, 0.1023],
[0.7858, 0.2326, 0.2193]]),
tensor([[0.1304, 0.2903, 0.9171]]))
torch.split()
torch.split(tensor, split_size_or_sections, dim=0)
torch.split(torch.rand(7,3), 3, dim=0)
(tensor([[0.8669, 0.7608, 0.4389],
[0.7666, 0.1656, 0.5473],
[0.5230, 0.2720, 0.0568]]),
tensor([[0.0176, 0.3546, 0.8935],
[0.7100, 0.9868, 0.9970],
[0.3909, 0.3361, 0.6061]]),
tensor([[0.5709, 0.4006, 0.3961]]))
torch.split(torch.rand(7,3), 2, dim=0)
(tensor([[0.5106, 0.2453, 0.9660],
[0.3012, 0.6863, 0.7366]]),
tensor([[0.5543, 0.5786, 0.6808],
[0.9212, 0.9496, 0.5799]]),
tensor([[0.5509, 0.8643, 0.6333],
[0.4321, 0.4174, 0.4292]]),
tensor([[0.5197, 0.9226, 0.5475]]))
torch.split(torch.rand(7,4), (4, 3), dim=0)
(tensor([[0.1663, 0.2489, 0.9874, 0.7570],
[0.8650, 0.9872, 0.4446, 0.3961],
[0.1678, 0.0448, 0.8555, 0.9096],
[0.9019, 0.0997, 0.2572, 0.0637]]),
tensor([[0.0106, 0.4245, 0.2285, 0.2700],
[0.4306, 0.1111, 0.4926, 0.5459],
[0.4850, 0.4326, 0.0123, 0.9814]]))
unbind()
torch.unbind(input, dim=0)
直接解开一个维度
torch.unbind(torch.rand(3,4), dim=0)
(tensor([0.0992, 0.3381, 0.6153, 0.9314]),
tensor([0.7412, 0.9560, 0.3918, 0.7036]),
tensor([0.8900, 0.5912, 0.4202, 0.4862]))
index_select
torch.index_select(tensor, dim, index)
当维度过多时可以代替索引
torch.index_select(torch.rand(3,3,3), dim=0, index=torch.tensor([0,2]))
tensor([[[0.7024, 0.0431, 0.5123],
[0.3802, 0.1955, 0.8054],
[0.5484, 0.9898, 0.4589]],
[[0.7791, 0.0389, 0.8322],
[0.1915, 0.6176, 0.9362],
[0.7712, 0.2995, 0.0981]]])
masked_select
torch.masked_select(input, mask, out=None)
test = torch.rand(10)
torch.masked_select(test, test>0.5)
tensor([0.7480, 0.5427, 0.5509, 0.5230, 0.6500, 0.7832, 0.9810, 0.7644])
test[test>0.5]
tensor([0.7480, 0.5427, 0.5509, 0.5230, 0.6500, 0.7832, 0.9810, 0.7644])
数据操作
Dataset
from torch.utils.data import Dataset
自定义数据集
class MyDataset(Dataset):
# 构造函数
def __init__(self, data_tensor, target_tensor):
self.data_tensor = data_tensor
self.target_tensor = target_tensor
# 返回数据集大小
def __len__(self):
return self.data_tensor.size(0)
# 返回索引的数据与标签
def __getitem__(self, index):
return self.data_tensor[index], self.target_tensor[index]
my_dataset = MyDataset(torch.randn(10, 3), torch.randn(10))
my_dataset.__len__()
10
my_dataset[2]
(tensor([-0.0947, -0.5704, 0.1172]), tensor(-0.2017))
DataLoader
- 迭代器
- 实现多进程、数据打乱、batch
from torch.utils.data import DataLoader
tensor_dataloader = DataLoader(dataset=my_dataset, # 传入的数据集, 必须参数
batch_size=3, # 输出的batch大小
shuffle=True, # 数据是否打乱
num_workers=0) # 进程数, 0表示只有主进程
# 以循环形式输出
for data, target in tensor_dataloader:
print(data, target)
tensor([[-2.1315, -0.3849, -0.0614],
[-1.2755, -0.0085, 1.1822],
[-0.3820, 1.0813, 1.3424]]) tensor([0.8899, 0.4688, 1.5528])
tensor([[-0.0947, -0.5704, 0.1172],
[ 0.3214, -2.5288, 0.4268],
[-1.8927, -0.8425, -1.3250]]) tensor([-0.2017, 0.4900, 1.3065])
tensor([[ 1.2582, 0.3975, 0.1769],
[-0.4028, -0.5276, 1.0662],
[ 0.0277, 1.3796, 0.4556]]) tensor([ 0.8842, 0.6022, -0.8440])
tensor([[-0.9969, 1.3454, 0.0579]]) tensor([-1.5651])
# 输出一个batch
print('One batch tensor data: ', iter(tensor_dataloader).next())
One batch tensor data: [tensor([[-0.0947, -0.5704, 0.1172],
[ 0.0277, 1.3796, 0.4556],
[ 0.3214, -2.5288, 0.4268]]), tensor([-0.2017, -0.8440, 0.4900])]
Torchvision
常用数据集 + 常见网络模型 + 常用图像处理方法
torchvision.datasets
常用数据集
import torchvision
from torchvision import transforms
# 以MNIST为例
mnist_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=None,
target_transform=None,
download=True)
test = list(mnist_dataset)[0][0]
test
torchvision.transforms
数据预处理
tt = transforms.ToTensor()
tt(test)
test = transforms.Resize((200,200))(test)
test
transforms.CenterCrop((100, 100))(test)
torchvision.transforms.RandomCrop((100, 100))(test)
torchvision.transforms.FiveCrop((100, 100))(test)[0]
# p用于设定反转概率
transforms.RandomHorizontalFlip(p=0.8)(test)
torchvision.transforms.RandomVerticalFlip(p=0.5)(test)
# 只对 Tensor 进行变换的操作
transforms.ToPILImage()(transforms.Normalize(-1, 1)(tt(test)))
# 定义组合操作
composed = transforms.Compose([transforms.Resize((200, 200)),
transforms.RandomCrop(80)])
# 组合操作后的图
composed(test)
transforms.Compose([
transforms.RandomResizedCrop((200, 200)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
Compose(
RandomResizedCrop(size=(200, 200), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
RandomHorizontalFlip(p=0.5)
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
torchvision.models
import torchvision.models as models
googlenet = models.googlenet(pretrained=True)
torchvision.utils
# 加载MNIST数据集
mnist_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor(),
target_transform=None,
download=True)
# 取32张图片的tensor
tensor_dataloader = DataLoader(dataset=mnist_dataset,
batch_size=32)
data_iter = iter(tensor_dataloader)
img_tensor, label_tensor = data_iter.next()
print(img_tensor.shape)
torch.Size([32, 1, 28, 28])
googlenet(img_tensor.repeat(1,3,1,1))
# 将32张图片拼接在一个网格中
grid_tensor = torchvision.utils.make_grid(img_tensor, nrow=8, padding=2)
transforms.ToPILImage()(grid_tensor)
# 输入为一张图片的tensor 直接保存
torchvision.utils.save_image(grid_tensor, 'grid.jpg')
# 输入为List 调用grid_img函数后保存
torchvision.utils.save_image(img_tensor, 'grid2.jpg', nrow=5, padding=2)
神经网络
卷积
Conv2d类
import torch.nn as nn
input_feat = torch.tensor([[4, 1, 7, 5], [4, 4, 2, 5], [7, 7, 2, 4], [1, 0, 2, 4]], dtype=torch.float32).unsqueeze(0).unsqueeze(0)
print(input_feat)
print(input_feat.shape)
tensor([[[[4., 1., 7., 5.],
[4., 4., 2., 5.],
[7., 7., 2., 4.],
[1., 0., 2., 4.]]]])
torch.Size([1, 1, 4, 4])
conv2d = nn.Conv2d(1, 1, (2, 2), stride=1, padding=1, bias=False)
# 卷积核要有四个维度(输入通道数,输出通道数,高,宽)
kernels = torch.tensor([[[[1, 0], [2, 1]]]], dtype=torch.float32)
conv2d.weight = nn.Parameter(kernels, requires_grad=False)
print(conv2d.weight)
print(conv2d.bias)
Parameter containing:
tensor([[[[1., 0.],
[2., 1.]]]])
None
conv2d(input_feat)
tensor([[[[ 4., 9., 9., 19., 10.],
[ 4., 16., 11., 16., 15.],
[ 7., 25., 20., 10., 13.],
[ 1., 9., 9., 10., 12.],
[ 0., 1., 0., 2., 4.]]]])
深度可分离卷积(Depthwise Separable Convolution)
- groups 参数的作用就是控制输入特征图与输出特征图的分组情况
- groups 不为 1 的时候,groups 必须能整除 in_channels 和 out_channels
- 当 groups 等于 in_channels 时,就是DW卷积
# 生成一个三通道的5x5特征图
x = torch.rand((3, 5, 5)).unsqueeze(0)
print(x.shape)
# 输出:
# torch.Size([1, 3, 5, 5])
# 请注意DW中,输入特征通道数与输出通道数是一样的
in_channels_dw = x.shape[1]
out_channels_dw = x.shape[1]
# 一般来讲DW卷积的kernel size为3
kernel_size = 3
stride = 1
# DW卷积groups参数与输入通道数一样
dw = nn.Conv2d(in_channels_dw, out_channels_dw, kernel_size, stride, groups=in_channels_dw)
torch.Size([1, 3, 5, 5])
in_channels_pw = out_channels_dw
out_channels_pw = 4
kernel_size_pw = 1
pw = nn.Conv2d(in_channels_pw, out_channels_pw, kernel_size_pw, stride)
out = pw(dw(x))
print(out.shape)
torch.Size([1, 4, 3, 3])
空洞卷积dilation
nn.Conv2d(2,2,3,dilation=2)
Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), dilation=(2, 2))
分布式训练
基本信息
# 是否有GPU
torch.cuda.is_available()
False
# GPU的数量
torch.cuda.device_count()
0
# 将CPU及各个GPU设置成实例,多个GPU可以通过序号区分
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
迁移
- 数据和模型都可以通过to()在CPU和各个GPU之间迁移
data = torch.ones((3, 3))
print(data.device)
# 将data推到gpu上
data_gpu = data.to(device)
print(data_gpu.device)
cpu
cpu
DataParallel
- torch.nn.DataParallel(module, device_ids=None, output_device=None, dim=0)
- 单进程自动实现数据并行
import os
class ASimpleNet(nn.Module):
def __init__(self, layers=3):
super(ASimpleNet, self).__init__()
self.linears = nn.Sequential(*[nn.Linear(3, 3, bias=False) for i in range(layers)])
def forward(self, x):
print("forward batchsize is: {}".format(x.size()[0]))
x = self.linears(x)
x = torch.relu(x)
return x
batch_size = 16
inputs = torch.randn(batch_size, 3)
labels = torch.randn(batch_size, 3)
inputs, labels = inputs.to(device), labels.to(device)
net = ASimpleNet()
net = nn.DataParallel(net)
net.to(device)
print("CUDA_VISIBLE_DEVICES :{}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
for epoch in range(1):
outputs = net(inputs)
forward batchsize is: 16
可视化
Tensorboard
- 启动命令: tensorboard –logdir=runs
- 端口: http://127.0.0.1:6006/
import random
import numpy as np
import torch
from torch import nn
# 模型定义
class LinearModel(nn.Module):
def __init__(self):
super().__init__()
self.weight = nn.Parameter(torch.randn(1))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, input):
return (input * self.weight) + self.bias
# 数据
w = 2
b = 3
xlim = [-10, 10]
x_train = np.random.randint(low=xlim[0], high=xlim[1], size=30)
y_train = [w * x + b + random.randint(0,2) for x in x_train]
# Tensorboard
from tensorboardX import SummaryWriter
# from torch.utils.tensorboard import SummaryWriter
# 训练
model = LinearModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
y_train = torch.tensor(y_train, dtype=torch.float32)
writer = SummaryWriter()
for n_iter in range(500):
input = torch.from_numpy(x_train)
output = model(input)
loss = nn.MSELoss()(output, y_train)
model.zero_grad()
loss.backward()
optimizer.step()
writer.add_scalar('Loss/train', loss, n_iter)
Visdom
- 启动命令: python -m visdom.server
- 端口: http://127.0.0.1:8097/
from visdom import Visdom
import numpy as np
import time
# 将窗口类实例化
viz = Visdom()
# 创建窗口并初始化
viz.line([0.], [0], win='train_loss', opts=dict(title='train_loss'))
for n_iter in range(10):
# 随机获取loss值
loss = 0.2 * np.random.randn() + 1
# 更新窗口图像
viz.line([loss], [n_iter], win='train_loss', update='append')
time.sleep(0.5)
img = np.zeros((3, 100, 100))
img[0] = np.arange(0, 10000).reshape(100, 100) / 10000
img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000
# 可视化图像
viz.image(img)
Setting up a new session...
'window_3ac152bac92404'
from visdom import Visdom
import numpy as np
# 训练
model = LinearModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
y_train = torch.tensor(y_train, dtype=torch.float32)
# 实例化一个窗口
viz = Visdom(port=8097)
# 初始化窗口的信息
viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))
for n_iter in range(500):
input = torch.from_numpy(x_train)
output = model(input)
loss = nn.MSELoss()(output, y_train)
model.zero_grad()
loss.backward()
optimizer.step()
# 更新监听的信息
viz.line([loss.item()], [n_iter], win='train_loss', update='append')
<ipython-input-128-20de72c468e0>:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
y_train = torch.tensor(y_train, dtype=torch.float32)
Setting up a new session...
viz = Visdom()
viz.line([[0., 0.]], [0], win='train', opts=dict(title='loss_accuracy', legend=['loss', 'acc']))
for n_iter in range(10):
loss = 0.2 * np.random.randn() + 1
accuracy = 0.1 * np.random.randn() + 1
viz.line([[loss, accuracy]], [n_iter], win='train', update='append')
time.sleep(0.5)
Setting up a new session...
False