-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathfw.py
122 lines (103 loc) · 5.54 KB
/
fw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
from associative_retrieval import read_data
from subprocess import call
ar_data = read_data()
STEP_NUM = 11
ELEM_NUM = 26 + 10 + 1
class FastWeightsRecurrentNeuralNetworks(object):
def __init__(self, step_num, elem_num, hidden_num):
self.x = tf.placeholder(tf.float32, [None, step_num, elem_num])
self.y = tf.placeholder(tf.float32, [None, elem_num])
self.l = tf.placeholder(tf.float32, [])
self.e = tf.placeholder(tf.float32, [])
self.w1 = tf.Variable(tf.random_uniform([elem_num, 50], -np.sqrt(0.02), np.sqrt(0.02)), dtype=tf.float32)
self.b1 = tf.Variable(tf.zeros([1, 50]), dtype=tf.float32)
self.w2 = tf.Variable(tf.random_uniform([50, 100], -np.sqrt(0.01), np.sqrt(0.01)), dtype=tf.float32)
self.b2 = tf.Variable(tf.zeros([1, 100]), dtype=tf.float32)
self.w3 = tf.Variable(tf.random_uniform([hidden_num, 100], -np.sqrt(0.01), np.sqrt(0.01)), dtype=tf.float32)
self.b3 = tf.Variable(tf.zeros([1, 100]), dtype=tf.float32)
self.w4 = tf.Variable(tf.random_uniform([100, elem_num], -np.sqrt(1.0 / elem_num), np.sqrt(1.0 / elem_num)),
dtype=tf.float32)
self.b4 = tf.Variable(tf.zeros([1, elem_num]), dtype=tf.float32)
self.w = tf.Variable(initial_value=0.05 * np.identity(hidden_num), dtype=tf.float32)
self.c = tf.Variable(tf.random_uniform([100, hidden_num], -np.sqrt(hidden_num), np.sqrt(hidden_num)),
dtype=tf.float32)
self.g = tf.Variable(tf.ones([1, hidden_num]), dtype=tf.float32)
self.b = tf.Variable(tf.zeros([1, hidden_num]), dtype=tf.float32)
batch_size = tf.shape(self.x)[0]
a = tf.zeros(tf.pack([batch_size, hidden_num, hidden_num]), dtype=tf.float32)
h = tf.zeros([batch_size, hidden_num], dtype=tf.float32)
la = []
for t in range(0, step_num):
z = tf.nn.relu(tf.matmul(
tf.nn.relu(tf.matmul(self.x[:, t, :], self.w1) + self.b1),
self.w2) + self.b2
)
h = tf.nn.relu(
tf.matmul(h, self.w) + tf.matmul(z, self.c)
)
hs = tf.reshape(h, tf.pack([batch_size, 1, hidden_num]))
hh = hs
a = tf.add(tf.scalar_mul(self.l, a),
tf.scalar_mul(self.e, tf.batch_matmul(tf.transpose(hs, [0, 2, 1]), hs)))
la.append(tf.reduce_mean(tf.square(a)))
for s in range(0, 1):
hs = tf.reshape(tf.matmul(h, self.w), tf.shape(hh)) + \
tf.reshape(tf.matmul(z, self.c), tf.shape(hh)) + \
tf.batch_matmul(hs, a)
mu = tf.reduce_mean(hs, reduction_indices=0)
sig = tf.sqrt(tf.reduce_mean(tf.square(hs - mu), reduction_indices=0))
hs = tf.nn.relu(tf.div(tf.mul(self.g, (hs - mu)), sig) + self.b)
h = tf.reshape(hs, tf.pack([batch_size, hidden_num]))
h = tf.nn.relu(tf.matmul(h, self.w3) + self.b3)
logits = tf.matmul(h, self.w4) + self.b4
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.y))
self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(self.loss)
correct = tf.equal(tf.argmax(logits, dimension=1), tf.argmax(self.y, dimension=1))
self.acc = tf.reduce_mean(tf.cast(correct, tf.float32))
self.summary = tf.merge_summary([
tf.scalar_summary('loss', self.loss),
tf.scalar_summary('acc', self.acc)
])
self.sess = tf.Session()
def train(self, save=0, verbose=0):
call('rm -rf ./summary'.split(' '))
self.sess.run(tf.initialize_all_variables())
writer = tf.train.SummaryWriter('./summary')
batch_size = 100
start_time = time.time()
saver = tf.train.Saver(tf.all_variables())
for epoch in range(0, 500):
batch_idxs = 600
for idx in range(0, batch_idxs):
bx, by = ar_data.train.next_batch(batch_size=batch_size)
loss, acc, summary, _ = self.sess.run([self.loss, self.acc, self.summary, self.trainer],
feed_dict={self.x: bx, self.y: by, self.l: 0.9, self.e: 0.5})
writer.add_summary(summary, global_step=epoch * batch_idxs + idx)
if verbose > 0 and idx % verbose == 0:
print('Epoch: [{:4d}] [{:4d}/{:4d}] time: {:.4f}, loss: {:.8f}, acc: {:.2f}'.format(
epoch, idx, batch_idxs, time.time() - start_time, loss, acc
))
if save > 0 and (epoch+1) % save == 0:
saver.save(self.sess, 'log/model', global_step=epoch)
saver.save(self.sess, 'log/moodel-final')
def test(self, val=True):
batch_idxs = 100
batch_size = 100
tot = 0.0
data = ar_data.val if val else ar_data.test
name = 'Validation' if val else 'Test'
for idx in range(0, batch_idxs):
bx, by = data.next_batch(batch_size=batch_size)
acc = self.sess.run(self.acc, feed_dict={self.x: bx, self.y: by, self.l: 0.9, self.e: 0.5})
tot += acc / batch_idxs
print('{}: {:.4f}'.format(name, tot))
def load(self, save_path='log/model-final'):
saver = tf.train.Saver(tf.all_variables())
saver.restore(self.sess, save_path=save_path)
if __name__ == '__main__':
c = FastWeightsRecurrentNeuralNetworks(STEP_NUM, ELEM_NUM, 20)
c.train(verbose=10)