动态因子图模型(Dynamic Factor Graphs)
文章目录DFG 相关论文《Dynamic Factor Graphs for Time Series Modeling》《DYNAMIC FACTOR GRAPHS –A NEW WIND POWER FORECASTING APPROACH》其它拓展DFG 原理模型结构模型推断模型训练pytorch 实现importutilsdatasetmodulesDFG modelload datatra
文章目录
DFG 相关论文
1.《Dynamic Factor Graphs for Time Series Modeling》

论文地址:https://link.springer.com/chapter/10.1007%2F978-3-642-04174-7_9
这篇2009年文章提出了动态因子图模型,用于时间序列建模。作者是图灵奖得主 Yann LeCun 的学生。
2.《DYNAMIC FACTOR GRAPHS –A NEW WIND POWER FORECASTING APPROACH》

这篇2014年的文章将动态因子图模型用于风能预测。对比 DFG 模型与 ARIMA 模型在极短时间(未来六小时)风能预测的性能。
3. 其它拓展
STNN 时空神经网络就是将 DFG 拓展成时空序列建模。
DFG 原理
模型结构
实际上就是隐变量模型,但考虑的应用场景是:隐变量是连续的、(可能)高维的,内在动力学过程是确定性的、(可能)高度非线性的。 隐变量与观测变量之间的关系也可以是非线性的。

为什么需要隐变量模型?
对时间序列建模的常用两种方法:
无隐变量 Y t = f ( Y t − 1 , … , Y t − p ) + ϵ t Y_t = f(Y_{t-1}, \ldots, Y_{t-p}) + \epsilon_t Yt=f(Yt−1,…,Yt−p)+ϵt 当前时刻的值仅仅由最近的 p p p 个历史值确定。
有隐变量 Z t = f ( Z t − p t − 1 ) + ϵ t Y t = g ( Z t ) + w t Z_t = f(\mathbf{Z}_{t-p}^{t-1}) + \epsilon_t \\ Y_t = g(Z_t) + w_t Zt=f(Zt−pt−1)+ϵtYt=g(Zt)+wt 隐变量模型是对不完全观测数据建模的较合适的方法,即观测量 Y Y Y 不一定能反映动态过程的完整性质,它可能只是更高维状态 Z Z Z 的低维投影。
DFG 模型由两部分组成:观测模型、动态模型。
- 观测模型:
Y t ∗ ≡ g ( W o , Z t ) Y t = Y t ∗ + ω t Y_t^* \equiv g(W_o, Z_t) \\ Y_t = Y_t^* + \omega_t Yt∗≡g(Wo,Zt)Yt=Yt∗+ωt
g ( ⋅ ) g(\cdot) g(⋅) 可以选取简单的线性观测模型。 ω t \omega_t ωt 为高斯噪声。 - 动态模型:
Z t ∗ ≡ f ( W d , Z t − p t − 1 ) Z t = Z t ∗ + ϵ t Z_t^* \equiv f(W_d, \mathbf{Z}_{t-p}^{t-1}) \\ Z_t = Z_t^* + \epsilon_t Zt∗≡f(Wd,Zt−pt−1)Zt=Zt∗+ϵt
Z t − p t − 1 \mathbf{Z}_{t-p}^{t-1} Zt−pt−1 表示 p p p 个历史状态, f ( ⋅ ) f(\cdot) f(⋅) 可以选择多元自回归模型,也可以采用多层神经网络模型来拟合非线性动态。 ϵ t \epsilon_t ϵt 为高斯噪声。
模型推断
由于 Yann LeCun 主推基于能量的机器学习方法,这个模型的学习问题也是基于能量的思路,实际上就是抛开概率分布,一心做优化。
模型推断的目的是为了找到最优的隐状态序列 Z t a t b Z_{t_a}^{t_b} Ztatb 以最小化观测时间 [ t a , t b ] [t_a, t_b] [ta,tb] 内的能量:
E ( W d , W o , Y t a t b ) = ∑ t = t a t b E o ( t ) + E d ( t ) E(W_d, W_o, Y_{t_a}^{t_b}) = \sum_{t=t_a}^{t_b} E_o(t) + E_d(t) E(Wd,Wo,Ytatb)=t=ta∑tbEo(t)+Ed(t)
其中,
E d ( t ) ≡ min Z t E d ( W d , Z t − p t − 1 , Z t ) = min ∣ ∣ Z t ∗ − Z t ∣ ∣ 2 E o ( t ) ≡ min Z t E o ( W o , Z t , Y t ) = min ∣ ∣ Y t ∗ − Y t ∣ ∣ 2 E_d(t) \equiv \min_{Z_t} E_d(W_d, \mathbf{Z}_{t-p}^{t-1}, Z_t) = \min ||Z^*_t - Z_t||^2\\ E_o(t) \equiv \min_{Z_t} E_o(W_o, Z_{t}, Y_t) = \min ||Y^*_t - Y_t||^2 Ed(t)≡ZtminEd(Wd,Zt−pt−1,Zt)=min∣∣Zt∗−Zt∣∣2Eo(t)≡ZtminEo(Wo,Zt,Yt)=min∣∣Yt∗−Yt∣∣2
模型训练
模型训练视为了找的最优的参数 W = [ W o , W d ] \mathbf{W} = [W_o, W_d] W=[Wo,Wd] 以最小化损失函数:
L ( W , Y , Z ) = E ( W , Y ) + R z ( Z ) + R ( W ) L(\mathbf{W}, Y, Z) = E(\mathbf{W}, Y) + R_z(Z) + R(\mathbf{W}) L(W,Y,Z)=E(W,Y)+Rz(Z)+R(W)
其中 R ( W ) R(\mathbf{W}) R(W) 是对参数的正则化, R z ( Z ) R_z(Z) Rz(Z) 是对隐状态的正则化,例如平滑性约束: R z ( Z t t + 1 ) = ∑ i ( z i , t + 1 − z i , t ) 2 R_z(Z_t^{t+1}) = \sum_i (z_{i,t+1} -z_{i,t})^2 Rz(Ztt+1)=∑i(zi,t+1−zi,t)2.
模型训练的过程由如下两步交替进行:
- Z ~ = arg min Z L ( W ~ , Y , Z ) \tilde{Z} = \argmin_{Z} L(\mathbf{\tilde{W}}, Y, Z) Z~=ZargminL(W~,Y,Z)
- W ~ = arg min W L ( W , Y , Z ~ ) \tilde{W} = \argmin_{W} L(\mathbf{W}, Y, \tilde{Z}) W~=WargminL(W,Y,Z~)
这可以视为 EM 算法的两步,
- E 步:推断最优的隐变量 Z Z Z;
- M 步:优化参数 W W W 使得能量最低(概率最大)。
pytorch 实现
import
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import os
import random
import json
from collections import defaultdict, OrderedDict
import shutil
utils
def rmse(x_pred, x_target, reduce=True):
if reduce:
return x_pred.sub(x_target).pow(2).sum(-1).sqrt().mean().item()
return x_pred.sub(x_target).pow(2).sum(-1).sqrt().squeeze()
def identity(input):
return input
class Logger(object):
def __init__(self, log_dir, name, chkpt_interval):
super(Logger, self).__init__()
os.makedirs(os.path.join(log_dir, name))
self.log_path = os.path.join(log_dir, name, 'logs.json')
self.model_path = os.path.join(log_dir, name, 'model.pt')
self.logs = defaultdict(list)
self.logs['epoch'] = 0
self.chkpt_interval = chkpt_interval
def log(self, key, value):
if isinstance(value, dict):
for k, v in value.items():
self.log('{}.{}'.format(key, k), v)
else:
self.logs[key].append(value)
def checkpoint(self, model):
if (self.logs['epoch'] + 1) % self.chkpt_interval == 0:
self.save(model)
self.logs['epoch'] += 1
def save(self, model):
with open(self.log_path, 'w') as f:
json.dump(self.logs, f, sort_keys=True, indent=4)
torch.save(model.state_dict(), self.model_path)
dataset
def dataset_factory(data_dir, name):
# get dataset
if name == 'lorentz':
opt, data = lorentz(data_dir, '{}.csv'.format(name))
else:
raise ValueError('Non dataset named `{}`.'.format(name))
# split train / test
train_data = data[:opt.nt_train]
test_data = data[opt.nt_train:]
return opt, (train_data, test_data)
def lorentz(data_dir, file='lorentz.csv'):
# dataset configuration
opt = DotDict()
opt.nt = 200
opt.nt_train = 100
opt.nx = 3
opt.periode = opt.nt
# loading data
data = torch.Tensor(np.genfromtxt(os.path.join(data_dir, file))).view(opt.nt, opt.nx)
return opt, data
modules
class MLP(nn.Module):
def __init__(self, ninp, nhid, nout, nlayers, dropout):
super(MLP, self).__init__()
self.ninp = ninp
# modules
if nlayers == 1:
self.module = nn.Linear(ninp, nout)
else:
modules = [nn.Linear(ninp, nhid), nn.ReLU(), nn.Dropout(dropout)]
nlayers -= 1
while nlayers > 1:
modules += [nn.Linear(nhid, nhid), nn.ReLU(), nn.Dropout(dropout)]
nlayers -= 1
modules.append(nn.Linear(nhid, nout))
self.module = nn.Sequential(*modules)
def forward(self, input):
return self.module(input)
DFG model
class DFG(nn.Module):
def __init__(self, nx, nt, nz, nhid=0, nlayers=1,
activation='identity', device='cuda'):
super(DFG, self).__init__()
assert (nhid > 0 and nlayers > 1) or (nhid == 0 and nlayers == 1)
# attributes
self.nt = nt
self.nx = nx
self.nz = nz
# kernel
self.activation = F.tanh if activation == 'tanh' else identity if activation == 'identity' else None
# modules
self.factors = nn.Parameter(torch.Tensor(nt, nz))
self.dynamic = MLP(nz, nhid, nz, nlayers, 0)
self.decoder = nn.Linear(nz, nx, bias=False)
# init
self.factors.data.uniform_(-0.1, 0.1)
def update_z(self, z):
z = z.view(-1,self.nz)
z_next = self.dynamic(z)
return self.activation(z_next)
def decode_z(self, z):
x_rec = self.decoder(z)
return x_rec
def dec_closure(self, t_idx):
z_inf = self.factors[t_idx]
x_rec = self.decoder(z_inf)
return x_rec
def dyn_closure(self, t_idx):
z_input = self.factors[t_idx]
z_input = z_input.view(-1, self.nz)
z_gen = self.dynamic(z_input)
return self.activation(z_gen)
def generate(self, nsteps):
z = self.factors[-1]
z_gen = []
for t in range(nsteps):
z = self.update_z(z)
z_gen.append(z)
z_gen = torch.stack(z_gen).view(-1,self.nz)
x_gen = self.decode_z(z_gen)
return x_gen, z_gen
def factors_parameters(self):
yield self.factors
load data
# parse
opt = DotDict()
opt.datadir = 'data'
opt.dataset = 'lorentz'
opt.outputdir = 'output_lorentz'
opt.xp = 'dfg'
opt.lr = 0.001
opt.beta1 = 0.9
opt.beta2 = 0.999
opt.eps = 1e-9
opt.wd = 1e-6
opt.wd_z = 1e-7
opt.xp = 'stnn_d'
opt.device = 0
opt.patience = 300
opt.l2_z = 0.01
opt.l1_rel = 0.01
opt.nz = 6
opt.nhid = 0
opt.nlayers = 1
opt.lambd = 1
opt.activation = 'identity'
opt.batch_size = 100
opt.nepoch = 10000
opt.manualSeed = random.randint(1, 10000)
# cudnn
if opt.device > -1:
os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.device)
device = torch.device('cuda:0')
else:
device = torch.device('cpu')
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)
if opt.device > -1:
torch.cuda.manual_seed_all(opt.manualSeed)
#######################################################################################################################
# Data
#######################################################################################################################
# -- load data
setup, (train_data, test_data) = dataset_factory(opt.datadir, opt.dataset)
train_data = train_data.to(device)
test_data = test_data.to(device)
for k, v in setup.items():
opt[k] = v
# -- train inputs
t_idx = torch.arange(1,opt.nt_train, out=torch.LongTensor()).contiguous()
train
if os.path.exists(opt.outputdir):
shutil.rmtree(opt.outputdir)
#######################################################################################################################
# Model
#######################################################################################################################
model = DFG(opt.nx, opt.nt_train, opt.nz, opt.nhid, opt.nlayers,
opt.activation).to(device)
#######################################################################################################################
# Optimizer
#######################################################################################################################
params = [{'params': model.factors_parameters(), 'weight_decay': opt.wd_z},
{'params': model.dynamic.parameters()},
{'params': model.decoder.parameters()}]
optimizer = optim.Adam(params, lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps, weight_decay=opt.wd)
if opt.patience > 0:
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=opt.patience)
#######################################################################################################################
# Logs
#######################################################################################################################
logger = Logger(opt.outputdir, opt.xp, opt.nt_train)
with open(os.path.join(opt.outputdir, opt.xp, 'config.json'), 'w') as f:
json.dump(opt, f, sort_keys=True, indent=4)
#######################################################################################################################
# Training
#######################################################################################################################
lr = opt.lr
from tqdm.notebook import tqdm
pb = tqdm(range(opt.nepoch))
for e in pb:
# ------------------------ Train ------------------------
model.train()
# --- decoder ---
idx_perm = torch.randperm(opt.nt_train-1).to(device)
batches = idx_perm.split(opt.batch_size)
logs_train = defaultdict(float)
for i, batch in enumerate(batches):
optimizer.zero_grad()
# data
input_t = t_idx[batch]
x_target = train_data[input_t]
# closure
x_rec = model.dec_closure(input_t)
mse_dec = F.mse_loss(x_rec, x_target)
# backward
mse_dec.backward()
# step
optimizer.step()
# log
logger.log('train_iter.mse_dec', mse_dec.item())
logs_train['mse_dec'] += mse_dec.item() * len(batch)
# --- dynamic ---
idx_perm = torch.randperm(opt.nt_train-1).to(device)
batches = idx_perm.split(opt.batch_size)
for i, batch in enumerate(batches):
optimizer.zero_grad()
# data
input_t = t_idx[batch]
# closure
z_inf = model.factors[input_t]
z_pred = model.dyn_closure(input_t - 1)
# loss
mse_dyn = z_pred.sub(z_inf).pow(2).mean()
loss_dyn = mse_dyn * opt.lambd
if opt.l2_z > 0:
loss_dyn += opt.l2_z * model.factors[input_t - 1].sub(model.factors[input_t]).pow(2).mean()
# backward
loss_dyn.backward()
# step
optimizer.step()
# log
logger.log('train_iter.mse_dyn', mse_dyn.item())
logs_train['mse_dyn'] += mse_dyn.item() * len(batch)
logs_train['loss_dyn'] += loss_dyn.item() * len(batch)
# --- logs ---
logs_train['mse_dec'] /= opt.nt_train
logs_train['mse_dyn'] /= opt.nt_train
logs_train['loss_dyn'] /= opt.nt_train
logs_train['loss'] = logs_train['mse_dec'] + logs_train['loss_dyn']
logger.log('train_epoch', logs_train)
# ------------------------ Test ------------------------
model.eval()
with torch.no_grad():
x_pred, _ = model.generate(opt.nt - opt.nt_train)
score_ts = rmse(x_pred, test_data, reduce=False)
score = rmse(x_pred, test_data)
logger.log('test_epoch.rmse', score)
logger.log('test_epoch.ts', {t: {'rmse': scr.item()} for t, scr in enumerate(score_ts)})
# checkpoint
logger.log('train_epoch.lr', lr)
pb.set_postfix(loss=logs_train['loss'], rmse_test=score, lr=lr)
logger.checkpoint(model)
# schedule lr
if opt.patience > 0 and score < 10:
lr_scheduler.step(score)
lr = optimizer.param_groups[0]['lr']
if lr <= 1e-5:
break
logger.save(model)
result
"""
result
"""
model.eval()
with torch.no_grad():
prediction, _ = model.generate(100)
mse = rmse(prediction, test_data)
import matplotlib.pyplot as plt
plt.figure('Test plots', figsize=(17, 4), dpi=90)
with open(os.path.join(opt.outputdir, opt.xp, 'logs.json'), 'r') as f:
logs = json.load(f)
plt.plot([logs['test_epoch.ts.{}.rmse'.format(ts)][-1] for ts in range(100)], label=opt.xp, alpha=0.8)
plt.grid()
plt.title('Prediction RMSE')
plt.xlabel('timestep')
plt.legend()

plt.figure('Results',figsize=(15,6))
plt.subplot(1, 3, 1)
plt.imshow(test_data.squeeze().cpu().numpy().T, aspect='auto', cmap='jet')
plt.colorbar()
plt.title('ground truth')
plt.subplot(1, 3, 2)
plt.imshow(prediction.squeeze().cpu().numpy().T, aspect='auto', cmap='jet')
plt.colorbar()
plt.title('{} prediction'.format('DFG'))
plt.subplot(1, 3, 3)
plt.imshow(test_data.sub(prediction).abs().cpu().numpy().T, aspect='auto')
plt.colorbar()
plt.title('absolute error')

plt.figure()
plt.plot(model.decode_z(model.factors).squeeze().detach().cpu().numpy())
plt.plot(range(100,200),prediction.squeeze().detach().cpu().numpy())
plt.plot(range(100,200),test_data.squeeze().cpu().numpy())
plt.show()

更多推荐



所有评论(0)