可以在GRU层后面加入多头注意力机制,代码如下:
```python
from keras.layers import Input, Conv1D, CuDNNGRU, Dropout, concatenate, Dense, Lambda, add, Activation, Permute, Reshape, multiply
def LSTNet(trainX1,trainX2,trainY,config):
input1 = Input(shape=(trainX1.shape[1], trainX1.shape[2]))
conv1 = Conv1D(filters=48, kernel_size=6, strides=1, activation='relu') # for input1
# It's a probelm that I can't find any way to use the same Conv1D layer to train the two inputs,
conv2 = Conv1D(filters=48, kernel_size=6 , strides=1, activation='relu') # for input2
conv2.set_weights(conv1.get_weights()) # at least use same weight
conv1out = conv1(input1)
gru1out = CuDNNGRU(64)(conv1out)
gru1out = Dropout(config.dropout)(gru1out)
input2 = Input(shape=(trainX2.shape[1], trainX2.shape[2]))
conv2out = conv2(input2)
gru2out = CuDNNGRU(64)(conv2out)
gru2out = Dropout(config.dropout)(gru2out)
# 多头注意力机制
attention_size = 16
head_num = 4
query = Dense(attention_size)(gru1out)
key = Dense(attention_size)(gru2out)
value = Dense(attention_size)(gru2out)
query = Reshape((-1, head_num, attention_size // head_num))(query)
query = Permute((2, 1, 3))(query)
key = Reshape((-1, head_num, attention_size // head_num))(key)
key = Permute((2, 3, 1))(key)
value = Reshape((-1, head_num, attention_size // head_num))(value)
value = Permute((2, 1, 3))(value)
score = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[3, 3