# 연습장

JiyunLim  
2023-08-28

In [1]:
try:
    from tqdm import tqdm
except ImportError:
    def tqdm(iterable):
        return iterable

In [2]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import AGCRN

from torch_geometric_temporal.dataset import ChickenpoxDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split

In [3]:
loader = ChickenpoxDatasetLoader()

dataset = loader.get_dataset(lags=8)

train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.2)

In [15]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.recurrent = AGCRN(number_of_nodes = 20,
                              in_channels = node_features, # 노드특징 수
                              out_channels = 16,  # 필터 수
                              K = 2,
                              embedding_dimensions = 4)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, e, h):
        h_0 = self.recurrent(x, e, h)
        y = F.relu(h_0)
        y = self.linear(y)
        return y, h_0

In [16]:
model = RecurrentGCN(node_features = 8)
model

In [17]:
model = RecurrentGCN(node_features = 8)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

e = torch.empty(20, 4) # 노드수 x 임베딩 차원 수

torch.nn.init.xavier_uniform_(e) # 임베딩 벡터 초기화

for epoch in tqdm(range(30)):
    cost = 0
    h = None
    for time, snapshot in enumerate(train_dataset):
        x = snapshot.x.view(1, 20, 8)
        y_hat, h = model(x, e, h)
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

100%|██████████| 30/30 [00:58<00:00,  1.96s/it]

In [18]:
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    x = snapshot.x.view(1, 20, 8)
    y_hat, h = model(x, e, h)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

MSE: 1.0056

AGCRN 모델은 그래프 데이터에서 노드 간의 동적 상호 작용을 학습하여
시계열 데이터의 패턴을 모델링한다. 이를 통해 시간적 상관 관계와 노드
간의 영향력을 효과적으로 표현한다. 모델 내부의 가중치와 임베딩 벡터들은
시간 스텝별로 입력 데이터와 상호 작용하면서 데이터의 변화와 관계를
이해하는 데 사용된다. 따라서 AGCRN 모델은 시계열 데이터의 특징을
추출하고 상관 관계를 모델링하는데 필터와 유사한 역할을 수행한다.

요약하면, self.recurrent에 해당하는 AGCRN 모델은 그래프 내에서 노드 간의
상호 작용을 표현하고 시계열 데이터의 패턴과 상관 관계를 추출하는데
필터와 유사한 역할을 한다.

------------------------------------------------------------------------

In [19]:
# modules 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# pytorch geometric temporal
import torch_geometric_temporal
from torch_geometric_temporal.signal import temporal_signal_split
from torch_geometric_temporal.signal.static_graph_temporal_signal import StaticGraphTemporalSignal
from torch_geometric_temporal.nn.recurrent import AGCRN

# torch
import torch
import torch.nn.functional as F


# read data
def makedict(FX,W=None,node_ids=None):
    T,N = np.array(FX).shape
    if W==None:
        W = pd.DataFrame(FX).corr().to_numpy().reshape(-1).tolist() # corr 로 weights
    if node_ids==None:
        node_ids = ['node'+str(n) for n in range(N)]
    _dict={
        'edges':[[i,j] for i in range(N) for j in range(N)], 
        'node_ids': node_ids,
        'weights':W,
        'FX':FX
    }
    return _dict

class Loader(object):
    def __init__(self, data_dict):
        self._dataset = data_dict
    
    def _get_edges(self):
        self._edges = np.array(self._dataset["edges"]).T

    def _get_edge_weights(self):
        edge_weights = np.array(self._dataset["weights"]).T
        self._edge_weights = edge_weights

    def _get_targets_and_features(self):
        stacked_target = np.stack(self._dataset["FX"])
        self.features = np.stack([
            stacked_target[i : i + self.lags, :].T
            for i in range(stacked_target.shape[0] - self.lags)
        ])
        self.targets = np.stack([
            stacked_target[i + self.lags, :].T
            for i in range(stacked_target.shape[0] - self.lags)
        ])


    def get_dataset(self, lags: int = 4) -> StaticGraphTemporalSignal:
        self.lags = lags
        self._get_edges()
        self._get_edge_weights()
        self._get_targets_and_features()
        dataset = StaticGraphTemporalSignal(
            self._edges, self._edge_weights, self.features, self.targets
        )
        dataset.node_ids = self._dataset['node_ids']
        return dataset
    

# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

`-` solar data

In [20]:
# read dataframe 
df = pd.read_csv('../SOLAR/data_eng_230710.csv')

# make y, y_upper, y_period, time, regions 
y = df.loc[:,'Bukchoncheon':'Gyeongju-si'].to_numpy()
yU = df.loc[:,'Bukchoncheon_Upper':'Gyeongju-si_Upper'].to_numpy()
yP = np.divide(y, yU+1e-10)

t = df.loc[:,'date']
regions = list(df.loc[:, 'Bukchoncheon':'Gyeongju-si'].columns)

In [21]:
dict1 = makedict(y)
loader = Loader(dict1)
dataset = loader.get_dataset(lags=4)
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

In [22]:
np.array(train_dataset.features).shape, np.array(test_dataset.features).shape

In [56]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.recurrent = AGCRN(number_of_nodes = 44,
                              in_channels = 4, # 노드특징 수
                              out_channels = 16,  # 필터 수
                              K = 2,
                              embedding_dimensions = 20)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, e, h):
        h_0 = self.recurrent(x, e, h)
        y = F.relu(h_0)
        y = self.linear(y)
        return y, h_0

In [42]:
# model = RecurrentGCN(node_features = 4)

# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# model.train()

# e = torch.empty(44, 4) # 노드수 x 임베딩 차원 수

# torch.nn.init.xavier_uniform_(e) # 임베딩 벡터 초기화

# for epoch in tqdm(range(1)):
#     cost = 0
#     h = None
#     for time, snapshot in enumerate(train_dataset):
#         x = snapshot.x.view(1, 44, 4)
#         print(x.shape)
    #     y_hat, h = model(x, e, h)
    #     cost = cost + torch.mean((y_hat-snapshot.y)**2)
    # cost = cost / (time+1)
    # cost.backward()
    # optimizer.step()
    # optimizer.zero_grad()

In [57]:
def get_batches(dataset, len_tr, batch_size=256):
    num_batches =  len_tr // batch_size + (1 if len_tr % batch_size != 0 else 0)
    batches = []
    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = start_idx + batch_size
        batches.append(train_dataset[start_idx:end_idx])
    return batches

In [58]:
len_tr = train_dataset.snapshot_count

In [59]:
batches = get_batches(train_dataset, len_tr, batch_size=128)

In [62]:
model = RecurrentGCN(node_features = 4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

e = torch.empty(44, 20) # 노드수 x 임베딩 차원 수

torch.nn.init.xavier_uniform_(e) # 임베딩 벡터 초기화

for epoch in tqdm(range(5)):
    losses_batch = []
    for b, batch in enumerate(batches):
        loss = 0
        h = None
        for time, snapshot in enumerate(batch):
            x = snapshot.x.view(1, 44, 4)
            y_hat, h = model(x, e, h)
            loss = loss + torch.mean((y_hat.reshape(-1)-snapshot.y.reshape(-1))**2)
            print(f'\rbatch={b}\t t={time+1}\t loss={loss/(time+1)}\t', end='', flush=True)
        loss = loss / (time+1)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        losses_batch.append(loss.item())
    print(f'\repoch={epoch}\t loss={np.mean(losses_batch)}\n', end='', flush=True)

  0%|          | 0/5 [00:00<?, ?it/s] 20%|██        | 1/5 [01:37<06:31, 97.91s/it] 40%|████      | 2/5 [03:28<05:16, 105.52s/it] 60%|██████    | 3/5 [05:42<03:56, 118.42s/it] 80%|████████  | 4/5 [07:33<01:55, 115.62s/it]100%|██████████| 5/5 [10:49<00:00, 129.97s/it]

epoch=0  loss=1.244255702285205747827       4       
epoch=1  loss=0.4966041831409239243774  94  
epoch=2  loss=0.2456967795596403263489  68  
epoch=3  loss=0.1712142917163231950659  23  
epoch=4  loss=0.1428124369943843393677      5   

In [63]:
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    x = snapshot.x.view(1, 44, 4)
    y_hat, h = model(x, e, h)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

MSE: 0.2140

------------------------------------------------------------------------

In [64]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN

from torch_geometric_temporal.dataset import ChickenpoxDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split

In [65]:
loader = ChickenpoxDatasetLoader()

dataset = loader.get_dataset()

train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.2)

In [88]:
next(iter(dataset))

In [66]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features, periods):
        super(RecurrentGCN, self).__init__()
        self.recurrent = A3TGCN(node_features, 32, periods)
        self.linear = torch.nn.Linear(32, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x.view(x.shape[0], 1, x.shape[1]), edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

In [67]:
model = RecurrentGCN(node_features = 1, periods = 4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in tqdm(range(10)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

100%|██████████| 10/10 [03:40<00:00, 22.07s/it]

In [68]:
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

MSE: 1.0110

`-` solar data

In [69]:
# read dataframe 
df = pd.read_csv('../SOLAR/data_eng_230710.csv')

# make y, y_upper, y_period, time, regions 
y = df.loc[:,'Bukchoncheon':'Gyeongju-si'].to_numpy()
yU = df.loc[:,'Bukchoncheon_Upper':'Gyeongju-si_Upper'].to_numpy()
yP = np.divide(y, yU+1e-10)

t = df.loc[:,'date']
regions = list(df.loc[:, 'Bukchoncheon':'Gyeongju-si'].columns)

In [82]:
dict1 = makedict(y)
loader = Loader(dict1)
dataset = loader.get_dataset(lags=1)
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

In [83]:
np.array(train_dataset.features).shape, np.array(test_dataset.features).shape

In [84]:
len_tr = train_dataset.snapshot_count

In [85]:
batches = get_batches(train_dataset, len_tr, batch_size=128)

In [86]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features, periods):
        super(RecurrentGCN, self).__init__()
        self.recurrent = A3TGCN(node_features, 32, periods)
        self.linear = torch.nn.Linear(32, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x.view(x.shape[0], 1, x.shape[1]), edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

In [87]:
model = RecurrentGCN(node_features = 1, periods = 4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in tqdm(range(5)):
    losses_batch = []
    for b, batch in enumerate(batches):
        loss = 0
        h = None
        for time, snapshot in enumerate(batch):
            y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
            loss = loss + torch.mean((y_hat.reshape(-1)-snapshot.y.reshape(-1))**2)
            print(f'\rbatch={b}\t t={time+1}\t loss={loss/(time+1)}\t', end='', flush=True)
        loss = loss / (time+1)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        losses_batch.append(loss.item())
    print(f'\repoch={epoch}\t loss={np.mean(losses_batch)}\n', end='', flush=True)

  0%|          | 0/5 [00:00<?, ?it/s]