nackta blog: Pytorch tutorial

nackta

socar 부트캠프의 강의 중 일부를 정리한 내용입니다.

import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from torchvision import datasets, transforms

import matplotlib.pyplot as plt

torch tensor

tensor는 numpy의 ndarray처럼 다차원 데이터 배열입니다. tensor를 생성할 때는 list를 사용할 수도 있지만 ndarray를 사용할 수도 있습니다.

ndarray와 tensor의 차이점은 tensor에는 back propagation(역전파)를 다루기 위해 forward pass(순전파)에서 전달된 값과 연산의 종류를 기억할 수 있습니다.(gradient 값 저장)

tensor의 parameter

data : list나 ndarray 등 array데이터
dtype : 데이터 타입
device : tensor가 위치해있는 device 지정
requires_grad : gradient 값 저장 유무

tensor의 생성

ndarray와 비슷한 생성 방법입니다.

a = torch.tensor([[1.0, 4.0], [4.0, 3.0]])
b = torch.tensor([[4, 3], [1, 4], [1, 2]])
print(a)

tensor([[1., 4.],
        [4., 3.]])

print(b)

tensor([[4, 3],
        [1, 4],
        [1, 2]])

tensor.dtype : tensor의 자료형 확인
tensor.shape : tensor의 size 확인

print(a.dtype, a.shape)

torch.float32 torch.Size([2, 2])

print(b.dtype, b.shape)

torch.int64 torch.Size([3, 2])

torch.ones(*size) : 모든 원소가 1인 tensor
torch.zeros(*size) : 모든 원소가 0인 tensor
torch.eye(n, m) : 대각 원소가 1이고 나머지가 0인 \(n*m\) tensor를 생성. m = None이면 \(n*n\)tensor를 생성
torch.rand(*size) : 모든 원소를 랜덤한 값으로 채워진 tensor. dtype을 int로 지정시 에러가 발생

a = torch.ones([2, 3])
b = torch.zeros([3, 2], dtype=torch.int64)
c = torch.eye(4,3)
d = torch.rand([2, 4, 3], dtype=torch.float)

print(a)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

print(b)

tensor([[0, 0],
        [0, 0],
        [0, 0]])

print(c)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 0.]])

print(d)

tensor([[[0.1721, 0.5773, 0.8588],
         [0.8192, 0.3488, 0.5716],
         [0.3033, 0.4843, 0.1258],
         [0.6734, 0.4262, 0.9605]],

        [[0.0436, 0.7960, 0.0340],
         [0.6968, 0.8373, 0.5634],
         [0.0413, 0.0030, 0.7226],
         [0.0329, 0.8479, 0.7469]]])

앞서 언급했듯이 ndarray로도 tensor를 생성할 수 있습니다. tensor()는 원본의 값을 복사하는 반면 as_tensor()와 from_numpy()는 원본의 값을 참조하기 때문에 원본의 값을 바꾸면 같이 변하는 것을 알 수 있습니다.

import numpy as np

d = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])

a = torch.tensor(d)
b = torch.as_tensor(d)
c = torch.from_numpy(d)

print(a, id(a))

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32) 883303344

print(b, id(b))

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32) 883288720

print(c, id(c))

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32) 883073808

print(d, id(d))

[[1 1 1]
 [2 2 2]
 [3 3 3]] 883007184

d[0,0] = 0

print(a)

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32)

print(b)

tensor([[0, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32)

print(c)

tensor([[0, 1, 1],
        [2, 2, 2],
        [3, 3, 3]], dtype=torch.int32)

print(d)

[[0 1 1]
 [2 2 2]
 [3 3 3]]

tensor.shape을 통해 동일한 size의 tensor를 만들 수도 있지만 _like(tensor) 방법으로도 동일한 size의 tensor를 만들 수 있습니다.

a = torch.ones(a.shape)
print(a)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

b = torch.ones_like(a)
c = torch.zeros_like(a, dtype=torch.float)
d = torch.rand_like(a, dtype=torch.float)

print(b)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

print(c)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

print(d)

tensor([[0.0823, 0.9254, 0.6186],
        [0.4919, 0.9770, 0.6451],
        [0.4601, 0.9505, 0.9724]])

device 지정

gpu연산을 위해서는 tensor의 device를 cuda로 지정해줘야합니다. torch.cuda.is_available()를 통해 gpu가 사용 가능한지 확인 후 tensor.to(‘cuda’)를 통해 device를 지정할 수 있습니다.

torch.cuda.is_available() # 저는 gpu가 없어요 ㅠㅜ

False

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
    
a.to(device)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

a.device

device(type='cpu')

tensor 다루기

a = torch.ones([2, 3])
b = torch.zeros([2, 3])

torch.cat() : 텐서 합치기

c = torch.cat([a, b], dim=0) # 열방향으로 합치기
d = torch.cat([a, b], dim=1) # 행방향으로 합치기
print(c)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])

print(c.shape)

torch.Size([4, 3])

print(d)

tensor([[1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.]])

print(d.shape)

torch.Size([2, 6])

torch.stack : 텐서 쌓기

새로운 차원(dim)에 따라 tensor들 쌓아줍니다. dim=0일때에는 tensor 전체를 기준으로 쌓고, dim=1일때는 tensor의 다음 차원을 기준으로 쌓는식입니다. 그러므로 dim은 0부터 tensor의 차원의 수를 넘을 수 없고, stack 안의 tensor들은 서로 size가 같아야 합니다.

c = torch.stack([a, b], dim=0)
d = torch.stack([a, b], dim=1)
e = torch.stack([a, b], dim=2)
print(c)

tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])

print(c.shape)

torch.Size([2, 2, 3])

print(d)

tensor([[[1., 1., 1.],
         [0., 0., 0.]],

        [[1., 1., 1.],
         [0., 0., 0.]]])

print(d.shape)

torch.Size([2, 2, 3])

print(e)

tensor([[[1., 0.],
         [1., 0.],
         [1., 0.]],

        [[1., 0.],
         [1., 0.],
         [1., 0.]]])

print(e.shape)

torch.Size([2, 3, 2])

hstack : torch.cat([a, b], dim=0)
vstack : torch.cat([a, b], dim=1)

c = torch.hstack([a, b])
print(c)

tensor([[1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.]])

print(c.shape)

torch.Size([2, 6])

d = torch.vstack([a, b])
print(d)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])

print(d.shape)

torch.Size([4, 3])

torch.unsqueeze() : 더미차원 삭제
torch.squeeze() : 더미차원 추가

# unsqueeze and squeeze
print(a)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

a = torch.unsqueeze(a, dim=1)
print(a)

tensor([[[1., 1., 1.]],

        [[1., 1., 1.]]])

print(a.shape)

torch.Size([2, 1, 3])

print(torch.squeeze(a))

tensor([[1., 1., 1.],
        [1., 1., 1.]])

print(a.shape)

torch.Size([2, 1, 3])

tonsor의 연산

행렬 곱

a = torch.tensor(np.array(list(range(12)))).reshape(3, 4)
print(a)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]], dtype=torch.int32)

b = torch.tensor(np.array(list(range(8)))).reshape(4, 2)
print(b)

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]], dtype=torch.int32)

c = a @ b
print(c)

tensor([[ 28,  34],
        [ 76,  98],
        [124, 162]], dtype=torch.int32)

d = torch.matmul(a, b)
print(d)

tensor([[ 28,  34],
        [ 76,  98],
        [124, 162]], dtype=torch.int32)

element-wise product

a = torch.tensor(np.array(list(range(6)))).reshape(2, 3)
print(a)

tensor([[0, 1, 2],
        [3, 4, 5]], dtype=torch.int32)

b = torch.tensor(list(range(10, 13))).reshape(1, 3)
print(b)

tensor([[10, 11, 12]])

c = a * b
print(c)

tensor([[ 0, 11, 24],
        [30, 44, 60]])

d = a.mul(b)
print(d)

tensor([[ 0, 11, 24],
        [30, 44, 60]])

item() : tensor의 원소를 반환합니다. tensor 안에 원소가 1개만 있어야 합니다.

# item
agg = d.sum()
v = agg.item()
print(v, type(v))

169 <class 'int'>

inplace operations

print(a)

tensor([[0, 1, 2],
        [3, 4, 5]], dtype=torch.int32)

a.add(5)

tensor([[ 5,  6,  7],
        [ 8,  9, 10]], dtype=torch.int32)

print(a)

tensor([[0, 1, 2],
        [3, 4, 5]], dtype=torch.int32)

a.add_(5) # inplace operations

tensor([[ 5,  6,  7],
        [ 8,  9, 10]], dtype=torch.int32)

print(a)

tensor([[ 5,  6,  7],
        [ 8,  9, 10]], dtype=torch.int32)

Dataset

파이토치에서 Dataset은 전체 데이터를 sample 단위로 처리해주는 역할을 합니다. Dataset을 상속받아 오버라이딩을 통해 커스텀 Dataset을 만들어보겠습니다.

커스텀 Dataset 구조

class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self): 
        # 데이터셋의 전처리를 해주는 부분
        
    def __len__(self):
        # 데이터셋의 길이를 반환해주는 부분
        
    def __getitem__(self, idx):
        # 데이터셋에서 샘플을 추출해주는 부분

다음 커스텀 데이터 셋을 확인해보면 __init__에서 features와 target 데이터, 전처리 함수를 저장하도록 정의 되있고, __len__에서 data의 길이를 반환해주도록 정의되있습니다. 마지막으로 __getitem__에서 저장된 데이터를 인덱싱 후 정의된 전처리 함수를 거쳐 반환되도록 구현되있습니다.

class LionDataset(Dataset):
    def __init__(self, data, target, transform=None, target_transform=None):
        self.data = data # feature data
        self.target = target # target data
        self.transform = transform # featrue
        self.target_transform = target_transform
        pass
    
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.target[idx]
        
        if self.transform:
          x = self.transform(x)
        if self.target_transform:
          y = self.target_transform(y)
        
        return x, y

data = np.array(list(range(100))).reshape(-1, 2)
target = np.array([[i] * 5 for i in range(10)]).reshape(-1)

print(data)

[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]
 [20 21]
 [22 23]
 [24 25]
 [26 27]
 [28 29]
 [30 31]
 [32 33]
 [34 35]
 [36 37]
 [38 39]
 [40 41]
 [42 43]
 [44 45]
 [46 47]
 [48 49]
 [50 51]
 [52 53]
 [54 55]
 [56 57]
 [58 59]
 [60 61]
 [62 63]
 [64 65]
 [66 67]
 [68 69]
 [70 71]
 [72 73]
 [74 75]
 [76 77]
 [78 79]
 [80 81]
 [82 83]
 [84 85]
 [86 87]
 [88 89]
 [90 91]
 [92 93]
 [94 95]
 [96 97]
 [98 99]]

print(target)

[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4 5 5 5 5 5 6 6 6 6 6 7 7
 7 7 7 8 8 8 8 8 9 9 9 9 9]

lion = LionDataset(data=data, target=target)
print(lion[0:4])

(array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]]), array([0, 0, 0, 0]))

print(len(lion))

DataLoader

DataLoader는 dataset을 batch 단위로 묶어주는 역할을 합니다.

batch_size : batch_size
shuffle : True시 epoch마다 데이터가 학습되는 순서가 섞임

loader = DataLoader(dataset=lion, batch_size=10, shuffle=True)

for i, batch in enumerate(loader):
    x, y = batch
    if i == 0:
        print(x)
        print(y)
    print(x.shape)

tensor([[38, 39],
        [ 4,  5],
        [88, 89],
        [62, 63],
        [84, 85],
        [44, 45],
        [40, 41],
        [20, 21],
        [66, 67],
        [68, 69]], dtype=torch.int32)
tensor([3, 0, 8, 6, 8, 4, 4, 2, 6, 6], dtype=torch.int32)
torch.Size([10, 2])
torch.Size([10, 2])
torch.Size([10, 2])
torch.Size([10, 2])
torch.Size([10, 2])

torch.nn.Module

pytorch 모델은 parameters를 추적하며 forward pass를 진행한 뒤 back propagation을 통해 학습을 진행합니다. torch.nn.Module은 여러 층의 layer로 이뤄진 모델을 쉽게 관리할 수 있는 class입니다.

pytorch 모델의 기본구조

class Model_Name(nn.Module):
    def __init__(self):
        super().__init__()
        """
        모델에 사용될 Layer(nn.Linear, nn.Conv2d)와 
        activation function(nn.functional.relu, nn.functional.sigmoid)등을 정의
        """

    def forward(self, x):
        """
        모델에서 실행되어야하는 계산을 정의
        """
        return x

example

class LionLinear(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim   # 입력차원 
        self.output_dim = output_dim # 출력차원
        
        self.flatten = nn.Flatten()  # tensor 평탄화 정의
        self.classifier = nn.Linear(input_dim, output_dim) # Linear layer 정의
        self.act = nn.ReLU() # activation function(ReLU) 정의
        
    def forward(self, x):
        x = self.flatten(x)    # data를 linear layers에 맞게 평탄화 후
        x = self.classifier(x) # linear layer를 통과,
        x = self.act(x)        # activation function을 통해 출력
        
        return x

pytorch 모델은 모델의 구조를 쉽게 파악할 수 있습니다.

linear_model = LionLinear(28*28, 10).to(device)
print(linear_model)

LionLinear(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (classifier): Linear(in_features=784, out_features=10, bias=True)
  (act): ReLU()
)

print(linear_model.classifier)

Linear(in_features=784, out_features=10, bias=True)

다음은 MLP를 구현하는 module입니다.

코드를 더 간결하게 하기 위해 다음과 같이 일부 layer 등을 따로 모듈로 구현 후 전체 모듈에 합쳐서 구현할 수 있습니다.

class LionLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.layer = nn.Linear(self.input_dim, self.output_dim)
        pass

    def forward(self, x):
        assert x.shape[-1] == self.input_dim, "Input dimension mismatch"
        return self.layer(x)

class LionMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.flatten = nn.Flatten()
        self.linear_1 = LionLayer(self.input_dim, self.hidden_dim)
        self.linear_2 = LionLayer(self.hidden_dim, self.output_dim)
        self.act_1 = nn.ReLU()
        self.act_2 = nn.Softmax()
        pass
    
    def forward(self, x):
        x = self.flatten(x)
        x = self.act_1(self.linear_1(x))
        x = self.act_2(self.linear_2(x))
        return x

mlp = LionMLP(28*28, 50, 10)
print(mlp)

LionMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_1): LionLayer(
    (layer): Linear(in_features=784, out_features=50, bias=True)
  )
  (linear_2): LionLayer(
    (layer): Linear(in_features=50, out_features=10, bias=True)
  )
  (act_1): ReLU()
  (act_2): Softmax(dim=None)
)

nn.Sequential()을 통해 forward() 부분을 짧게 작성할 수 있습니다.

class LionMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.flatten = nn.Flatten()
        self.linear_1 = LionLayer(self.input_dim, self.hidden_dim)
        self.linear_2 = LionLayer(self.hidden_dim, self.output_dim)
        self.act_1 = nn.ReLU()
        self.act_2 = nn.Softmax()
        self.model = nn.Sequential(self.flatten, self.linear_1, self.act_1, self.linear_2, self.act_2)
        pass
    def forward(self, x):
        return self.model(x)

mlp = LionMLP(28 * 28, 40, 10)
print(mlp)

LionMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_1): LionLayer(
    (layer): Linear(in_features=784, out_features=40, bias=True)
  )
  (linear_2): LionLayer(
    (layer): Linear(in_features=40, out_features=10, bias=True)
  )
  (act_1): ReLU()
  (act_2): Softmax(dim=None)
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): LionLayer(
      (layer): Linear(in_features=784, out_features=40, bias=True)
    )
    (2): ReLU()
    (3): LionLayer(
      (layer): Linear(in_features=40, out_features=10, bias=True)
    )
    (4): Softmax(dim=None)
  )
)

모델 파라미터 확인

for name, param in mlp.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Layer: linear_1.layer.weight | Size: torch.Size([40, 784]) | Values: tensor([[ 0.0178, -0.0108, -0.0017,  ...,  0.0248, -0.0011,  0.0218],
        [-0.0343,  0.0235, -0.0201,  ...,  0.0321, -0.0141,  0.0180]],
       grad_fn=<SliceBackward0>) 

Layer: linear_1.layer.bias | Size: torch.Size([40]) | Values: tensor([ 0.0207, -0.0041], grad_fn=<SliceBackward0>) 

Layer: linear_2.layer.weight | Size: torch.Size([10, 40]) | Values: tensor([[ 0.0301, -0.0820, -0.1123, -0.0777, -0.0678, -0.1305, -0.0293,  0.0596,
         -0.0557, -0.1315, -0.1003,  0.0239,  0.1351,  0.0894, -0.0559, -0.0034,
          0.0682,  0.1417,  0.0025, -0.0303, -0.0004, -0.0029,  0.0281,  0.0950,
          0.0631, -0.0958, -0.0276, -0.1566,  0.1130, -0.0361,  0.0906, -0.0657,
          0.1251,  0.0872, -0.1033,  0.0821,  0.0856, -0.1505,  0.1350,  0.1250],
        [ 0.0562,  0.0557,  0.0730, -0.1076,  0.0850, -0.1426,  0.1005,  0.1108,
          0.0231,  0.1560,  0.1185, -0.0472,  0.0049,  0.0836,  0.0008, -0.1055,
          0.1363,  0.1266, -0.0864,  0.1325,  0.1444,  0.1412,  0.1253, -0.0832,
          0.0536,  0.0351, -0.1228, -0.0855, -0.0909,  0.0840, -0.0335,  0.0978,
         -0.0824,  0.1048,  0.0254, -0.0287,  0.0238,  0.1337, -0.1085,  0.1532]],
       grad_fn=<SliceBackward0>) 

Layer: linear_2.layer.bias | Size: torch.Size([10]) | Values: tensor([ 0.0235, -0.0153], grad_fn=<SliceBackward0>)

reference

PyTorch로 시작하는 딥 러닝 입문 : https://wikidocs.net/57165

https://anweh.tistory.com/21

Pytorch tutorial