-
Notifications
You must be signed in to change notification settings - Fork 81
/
user_simulator.py
370 lines (312 loc) · 16.5 KB
/
user_simulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
from dialogue_config import usersim_default_key, FAIL, NO_OUTCOME, SUCCESS, usersim_required_init_inform_keys, \
no_query_keys
from utils import reward_function
import random, copy
class UserSimulator:
"""Simulates a real user, to train the agent with reinforcement learning."""
def __init__(self, goal_list, constants, database):
"""
The constructor for UserSimulator. Sets dialogue config variables.
Parameters:
goal_list (list): User goals loaded from file
constants (dict): Dict of constants loaded from file
database (dict): The database in the format dict(long: dict)
"""
self.goal_list = goal_list
self.max_round = constants['run']['max_round_num']
self.default_key = usersim_default_key
# A list of REQUIRED to be in the first action inform keys
self.init_informs = usersim_required_init_inform_keys
self.no_query = no_query_keys
# TEMP ----
self.database = database
# ---------
def reset(self):
"""
Resets the user sim. by emptying the state and returning the initial action.
Returns:
dict: The initial action of an episode
"""
self.goal = random.choice(self.goal_list)
# Add default slot to requests of goal
self.goal['request_slots'][self.default_key] = 'UNK'
self.state = {}
# Add all inform slots informed by agent or user sim to this dict
self.state['history_slots'] = {}
# Any inform slots for the current user sim action, empty at start of turn
self.state['inform_slots'] = {}
# Current request slots the user sim wants to request
self.state['request_slots'] = {}
# Init. all informs and requests in user goal, remove slots as informs made by user or agent
self.state['rest_slots'] = {}
self.state['rest_slots'].update(self.goal['inform_slots'])
self.state['rest_slots'].update(self.goal['request_slots'])
self.state['intent'] = ''
# False for failure, true for success, init. to failure
self.constraint_check = FAIL
return self._return_init_action()
def _return_init_action(self):
"""
Returns the initial action of the episode.
The initial action has an intent of request, required init. inform slots and a single request slot.
Returns:
dict: Initial user response
"""
# Always request
self.state['intent'] = 'request'
if self.goal['inform_slots']:
# Pick all the required init. informs, and add if they exist in goal inform slots
for inform_key in self.init_informs:
if inform_key in self.goal['inform_slots']:
self.state['inform_slots'][inform_key] = self.goal['inform_slots'][inform_key]
self.state['rest_slots'].pop(inform_key)
self.state['history_slots'][inform_key] = self.goal['inform_slots'][inform_key]
# If nothing was added then pick a random one to add
if not self.state['inform_slots']:
key, value = random.choice(list(self.goal['inform_slots'].items()))
self.state['inform_slots'][key] = value
self.state['rest_slots'].pop(key)
self.state['history_slots'][key] = value
# Now add a request, do a random one if something other than def. available
self.goal['request_slots'].pop(self.default_key)
if self.goal['request_slots']:
req_key = random.choice(list(self.goal['request_slots'].keys()))
else:
req_key = self.default_key
self.goal['request_slots'][self.default_key] = 'UNK'
self.state['request_slots'][req_key] = 'UNK'
user_response = {}
user_response['intent'] = self.state['intent']
user_response['request_slots'] = copy.deepcopy(self.state['request_slots'])
user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots'])
return user_response
def step(self, agent_action):
"""
Return the response of the user sim. to the agent by using rules that simulate a user.
Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user.
Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.
Parameters:
agent_action (dict): The agent action that the user sim. responds to
Returns:
dict: User sim. response
int: Reward
bool: Done flag
int: Success: -1, 0 or 1 for loss, neither win nor loss, win
"""
# Assertions -----
# No UNK in agent action informs
for value in agent_action['inform_slots'].values():
assert value != 'UNK'
assert value != 'PLACEHOLDER'
# No PLACEHOLDER in agent at all
for value in agent_action['request_slots'].values():
assert value != 'PLACEHOLDER'
# ----------------
self.state['inform_slots'].clear()
self.state['intent'] = ''
done = False
success = NO_OUTCOME
# First check round num, if equal to max then fail
if agent_action['round'] == self.max_round:
done = True
success = FAIL
self.state['intent'] = 'done'
self.state['request_slots'].clear()
else:
agent_intent = agent_action['intent']
if agent_intent == 'request':
self._response_to_request(agent_action)
elif agent_intent == 'inform':
self._response_to_inform(agent_action)
elif agent_intent == 'match_found':
self._response_to_match_found(agent_action)
elif agent_intent == 'done':
success = self._response_to_done()
self.state['intent'] = 'done'
self.state['request_slots'].clear()
done = True
# Assumptions -------
# If request intent, then make sure request slots
if self.state['intent'] == 'request':
assert self.state['request_slots']
# If inform intent, then make sure inform slots and NO request slots
if self.state['intent'] == 'inform':
assert self.state['inform_slots']
assert not self.state['request_slots']
assert 'UNK' not in self.state['inform_slots'].values()
assert 'PLACEHOLDER' not in self.state['request_slots'].values()
# No overlap between rest and hist
for key in self.state['rest_slots']:
assert key not in self.state['history_slots']
for key in self.state['history_slots']:
assert key not in self.state['rest_slots']
# All slots in both rest and hist should contain the slots for goal
for inf_key in self.goal['inform_slots']:
assert self.state['history_slots'].get(inf_key, False) or self.state['rest_slots'].get(inf_key, False)
for req_key in self.goal['request_slots']:
assert self.state['history_slots'].get(req_key, False) or self.state['rest_slots'].get(req_key,
False), req_key
# Anything in the rest should be in the goal
for key in self.state['rest_slots']:
assert self.goal['inform_slots'].get(key, False) or self.goal['request_slots'].get(key, False)
assert self.state['intent'] != ''
# -----------------------
user_response = {}
user_response['intent'] = self.state['intent']
user_response['request_slots'] = copy.deepcopy(self.state['request_slots'])
user_response['inform_slots'] = copy.deepcopy(self.state['inform_slots'])
reward = reward_function(success, self.max_round)
return user_response, reward, done, True if success is 1 else False
def _response_to_request(self, agent_action):
"""
Augments the state in response to the agent action having an intent of request.
There are 4 main cases for responding.
Parameters:
agent_action (dict): Intent of request with standard action format (including 'speaker': 'Agent' and
'round_num': int)
"""
agent_request_key = list(agent_action['request_slots'].keys())[0]
# First Case: if agent requests for something that is in the user sims goal inform slots, then inform it
if agent_request_key in self.goal['inform_slots']:
self.state['intent'] = 'inform'
self.state['inform_slots'][agent_request_key] = self.goal['inform_slots'][agent_request_key]
self.state['request_slots'].clear()
self.state['rest_slots'].pop(agent_request_key, None)
self.state['history_slots'][agent_request_key] = self.goal['inform_slots'][agent_request_key]
# Second Case: if the agent requests for something in user sims goal request slots and it has already been
# informed, then inform it
elif agent_request_key in self.goal['request_slots'] and agent_request_key in self.state['history_slots']:
self.state['intent'] = 'inform'
self.state['inform_slots'][agent_request_key] = self.state['history_slots'][agent_request_key]
self.state['request_slots'].clear()
assert agent_request_key not in self.state['rest_slots']
# Third Case: if the agent requests for something in the user sims goal request slots and it HASN'T been
# informed, then request it with a random inform
elif agent_request_key in self.goal['request_slots'] and agent_request_key in self.state['rest_slots']:
self.state['request_slots'].clear()
self.state['intent'] = 'request'
self.state['request_slots'][agent_request_key] = 'UNK'
rest_informs = {}
for key, value in list(self.state['rest_slots'].items()):
if value != 'UNK':
rest_informs[key] = value
if rest_informs:
key_choice, value_choice = random.choice(list(rest_informs.items()))
self.state['inform_slots'][key_choice] = value_choice
self.state['rest_slots'].pop(key_choice)
self.state['history_slots'][key_choice] = value_choice
# Fourth and Final Case: otherwise the user sim does not care about the slot being requested, then inform
# 'anything' as the value of the requested slot
else:
assert agent_request_key not in self.state['rest_slots']
self.state['intent'] = 'inform'
self.state['inform_slots'][agent_request_key] = 'anything'
self.state['request_slots'].clear()
self.state['history_slots'][agent_request_key] = 'anything'
def _response_to_inform(self, agent_action):
"""
Augments the state in response to the agent action having an intent of inform.
There are 2 main cases for responding. Add the agent inform slots to history slots,
and remove the agent inform slots from the rest and request slots.
Parameters:
agent_action (dict): Intent of inform with standard action format (including 'speaker': 'Agent' and
'round_num': int)
"""
agent_inform_key = list(agent_action['inform_slots'].keys())[0]
agent_inform_value = agent_action['inform_slots'][agent_inform_key]
assert agent_inform_key != self.default_key
# Add all informs (by agent too) to hist slots
self.state['history_slots'][agent_inform_key] = agent_inform_value
# Remove from rest slots if in it
self.state['rest_slots'].pop(agent_inform_key, None)
# Remove from request slots if in it
self.state['request_slots'].pop(agent_inform_key, None)
# First Case: If agent informs something that is in goal informs and the value it informed doesnt match,
# then inform the correct value
if agent_inform_value != self.goal['inform_slots'].get(agent_inform_key, agent_inform_value):
self.state['intent'] = 'inform'
self.state['inform_slots'][agent_inform_key] = self.goal['inform_slots'][agent_inform_key]
self.state['request_slots'].clear()
self.state['history_slots'][agent_inform_key] = self.goal['inform_slots'][agent_inform_key]
# Second Case: Otherwise pick a random action to take
else:
# - If anything in state requests then request it
if self.state['request_slots']:
self.state['intent'] = 'request'
# - Else if something to say in rest slots, pick something
elif self.state['rest_slots']:
def_in = self.state['rest_slots'].pop(self.default_key, False)
if self.state['rest_slots']:
key, value = random.choice(list(self.state['rest_slots'].items()))
if value != 'UNK':
self.state['intent'] = 'inform'
self.state['inform_slots'][key] = value
self.state['rest_slots'].pop(key)
self.state['history_slots'][key] = value
else:
self.state['intent'] = 'request'
self.state['request_slots'][key] = 'UNK'
else:
self.state['intent'] = 'request'
self.state['request_slots'][self.default_key] = 'UNK'
if def_in == 'UNK':
self.state['rest_slots'][self.default_key] = 'UNK'
# - Otherwise respond with 'nothing to say' intent
else:
self.state['intent'] = 'thanks'
def _response_to_match_found(self, agent_action):
"""
Augments the state in response to the agent action having an intent of match_found.
Check if there is a match in the agent action that works with the current goal.
Parameters:
agent_action (dict): Intent of match_found with standard action format (including 'speaker': 'Agent' and
'round_num': int)
"""
agent_informs = agent_action['inform_slots']
self.state['intent'] = 'thanks'
self.constraint_check = SUCCESS
assert self.default_key in agent_informs
self.state['rest_slots'].pop(self.default_key, None)
self.state['history_slots'][self.default_key] = str(agent_informs[self.default_key])
self.state['request_slots'].pop(self.default_key, None)
if agent_informs[self.default_key] == 'no match available':
self.constraint_check = FAIL
# Check to see if all goal informs are in the agent informs, and that the values match
for key, value in self.goal['inform_slots'].items():
assert value != None
# For items that cannot be in the queries don't check to see if they are in the agent informs here
if key in self.no_query:
continue
# Will return true if key not in agent informs OR if value does not match value of agent informs[key]
if value != agent_informs.get(key, None):
self.constraint_check = FAIL
break
if self.constraint_check == FAIL:
self.state['intent'] = 'reject'
self.state['request_slots'].clear()
def _response_to_done(self):
"""
Augments the state in response to the agent action having an intent of done.
If the constraint_check is SUCCESS and both the rest and request slots of the state are empty for the agent
to succeed in this episode/conversation.
Returns:
int: Success: -1, 0 or 1 for loss, neither win nor loss, win
"""
if self.constraint_check == FAIL:
return FAIL
if not self.state['rest_slots']:
assert not self.state['request_slots']
if self.state['rest_slots']:
return FAIL
# TEMP: ----
assert self.state['history_slots'][self.default_key] != 'no match available'
match = copy.deepcopy(self.database[int(self.state['history_slots'][self.default_key])])
for key, value in self.goal['inform_slots'].items():
assert value != None
if key in self.no_query:
continue
if value != match.get(key, None):
assert True is False, 'match: {}\ngoal: {}'.format(match, self.goal)
break
# ----------
return SUCCESS