-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
82 lines (64 loc) · 2.72 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
import pandas as pd
from tqdm import tqdm
from bandit import ContextualBandit
from agent import KerasAgent
from utils import Mean_Log_Loss, High_Order_Iterative_Knockout
class Environment(object):
def __init__(self, features, labels, policy):
self.features = features
self.labels = labels
# init label rewards (adapt freely)
m = np.zeros((len(self.labels), max(self.labels)+1))
for i in range(0, len(self.labels)):
m[i][self.labels[i]] = 1
# label frame
self.y = pd.DataFrame(m)
# frame of features
self.X = pd.DataFrame(features)
self.X = self.X.reset_index().drop(columns=['index'])
self.cBandit = ContextualBandit(self.X, self.y)
self.myAgent = KerasAgent(lr=0.001,
a_size=self.cBandit.num_actions,
n_states=self.cBandit.num_state_features)
self.policy = policy
self.policy.setBandit(self.cBandit)
self.policy.setAgent(self.myAgent)
def iter(self):
# classical bandit interaction
# a) get state, b) perform action, c) get reward and update
s, t = self.cBandit.getInputState()
action = self.policy.select(s)
reward = self.cBandit.pullArm(action)
# Update the network.
y = self.policy.qval[:]
y[0][action] = reward
self.myAgent.model.fit(s, y, batch_size=1, epochs=1, verbose=0)
return t, action, reward
def experiment(self, total_rounds=1000000):
i = 0
pbar = tqdm(total=total_rounds)
while i < total_rounds:
t, action, reward = self.iter()
i += 1
pbar.update(1)
pbar.close()
inputs = self.myAgent.model.predict(self.cBandit.X)
probas = inputs.reshape(self.cBandit.num_samples, -1)
predictions = np.argmax(probas, axis=1)
accuracy = Mean_Log_Loss(predictions=predictions, labels=self.labels)
self.output(accuracy, predictions)
return predictions
def output(self, accuracy, predictions):
print("baseline accuracy: ", accuracy)
print("predicitons: ", predictions)
high_order_knockout, index = High_Order_Iterative_Knockout(
features_knockout=np.array(self.features),
model=self.myAgent.model,
baseline=accuracy,
labels=self.labels)
print("high-order knockout accuracy change: ")
Z = [(y, x) for y, x in sorted(zip(high_order_knockout, index),
reverse=True, key=lambda l:(l[0], -len(l[1])))]
for z in Z:
print(z)