2021年1月15日星期五

self-defined stacked LSTM has different output than nn.LSTM

I implemented multi layer LSTM, however the result is different from nn.LSTM if init_state is not None. I loaded weights from an LSTM model to both my self defined model and pytorch's nn.LSTM model. I suspect I may do something wrong in forward function. Any help will be greatly appreciated. Many thanks!

  class StackedLSTMs(nn.Module):  def __init__(self, input_sz:int, hidden_sz: int, num_layers: int):      super().__init__()      self.num_layers = num_layers      self.hidden_sz = hidden_sz      self.LSTMs = nn.ModuleList()      for layer in range(num_layers):          if layer == 0:              self.LSTMs.append(nn.LSTMCell(input_sz, hidden_sz))              #self.LSTMs.append(NaiveCustomLSTMCell(input_sz, hidden_sz))          else:              self.LSTMs.append(nn.LSTMCell(hidden_sz, hidden_sz))              #self.LSTMs.append(NaiveCustomLSTMCell(hidden_sz, hidden_sz))      def forward(self, x, h: Optional[Tuple[torch.Tensor, torch.Tensor]] = None):      seq_size, bs, _ = x.size()      outputs = []      if h is None:          hn = torch.zeros(self.num_layers, bs, self.hidden_sz)          cn = torch.zeros(self.num_layers, bs, self.hidden_sz)      else:          (hn, cn) = h        for t in range(seq_size):          for layer, lstm in enumerate(self.LSTMs):              if layer == 0:                  hn[layer, :, :], cn[layer, :, :] = lstm(x[t, :, :], (hn[layer, :, :], cn[layer, :, :]))              else:                  hn[layer, :, :], cn[layer, :, :] = lstm(hn[layer-1, :, :], (hn[layer, :, :], cn[layer, :, :]))          temp = hn[self.num_layers - 1, :, :].detach().clone()          outputs.append(temp)      outputs = torch.stack(outputs, dim=0)      h = (hn, cn)      #outputs = outputs.transpose(0, 1).contiguous()      return outputs, h    torch.manual_seed(999)  lstms = nn.LSTM(320, 320, 2)  stackedlstms = StackedLSTMs(320, 320, 2)    stackedlstms.LSTMs[0].weight_ih = oldmodel.prediction.dec_rnn.lstm.weight_ih_l0  stackedlstms.LSTMs[0].weight_hh = oldmodel.prediction.dec_rnn.lstm.weight_hh_l0  stackedlstms.LSTMs[0].bias_ih = oldmodel.prediction.dec_rnn.lstm.bias_ih_l0  stackedlstms.LSTMs[0].bias_hh = oldmodel.prediction.dec_rnn.lstm.bias_hh_l0    stackedlstms.LSTMs[1].weight_ih = oldmodel.prediction.dec_rnn.lstm.weight_ih_l1  stackedlstms.LSTMs[1].weight_hh = oldmodel.prediction.dec_rnn.lstm.weight_hh_l1  stackedlstms.LSTMs[1].bias_ih = oldmodel.prediction.dec_rnn.lstm.bias_ih_l1  stackedlstms.LSTMs[1].bias_hh = oldmodel.prediction.dec_rnn.lstm.bias_ih_l1    lstms.weight_ih_l0 = oldmodel.prediction.dec_rnn.lstm.weight_ih_l0  lstms.weight_hh_l0 = oldmodel.prediction.dec_rnn.lstm.weight_hh_l0  lstms.bias_ih_l0 = oldmodel.prediction.dec_rnn.lstm.bias_ih_l0  lstms.bias_hh_l0 = oldmodel.prediction.dec_rnn.lstm.bias_ih_l0    lstms.weight_ih_l1 = oldmodel.prediction.dec_rnn.lstm.weight_ih_l1  lstms.weight_hh_l1 = oldmodel.prediction.dec_rnn.lstm.weight_hh_l1  lstms.bias_ih_l1 = oldmodel.prediction.dec_rnn.lstm.bias_ih_l1  lstms.bias_hh_l1 = oldmodel.prediction.dec_rnn.lstm.bias_ih_l1    hidden = torch.load('hidden.pt')  newembedt = torch.load('newembed_t.pt')    lstms_res = lstms(newembedt, hidden)  stackedlstms_res = stackedlstms(newembedt, hidden)    print(torch.sum(abs(lstms_res[0]-stackedlstms_res[0])))  print(torch.sum(abs(lstms_res[1][0]-stackedlstms_res[1][0])))  
https://stackoverflow.com/questions/65745434/self-defined-stacked-lstm-has-different-output-than-nn-lstm January 16, 2021 at 08:51AM

没有评论:

发表评论