-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrainer.py
426 lines (325 loc) · 10.5 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
from AI import *
import random
import time
import sys
from statistics import mean
# graphing imports
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
# constants
TIMECONST = 's'
NUMCONST = 'n'
MAINCONT = 'm'
def train_main():
"""
Test and train the AI using test_train(...)
:return: None
"""
ai1 = AI()
wins_ai1, wins_ai2, draws_all = train_test(100, 100, 50, ai1)
# plot a1 wins
plt.clf()
plt.plot(wins_ai1, c='r')
plt.plot(wins_ai2, c='b')
plt.plot(draws_all, c='g')
axis = plt.gca()
axis.set_xlabel("Number of Iterations")
axis.set_ylabel("Percantage of wins")
# add legends
red_patch = mpatches.Patch(color='red', label='AI_1 wins')
blue_patch = mpatches.Patch(color='blue', label='AI_2 wins')
green_patch = mpatches.Patch(color='green', label='Draws')
plt.legend(handles=[red_patch, blue_patch, green_patch])
# print average values:
print("Average AI_1 wins: {}%".format( mean(wins_ai1) ))
print("Average AI_2/Trainer wins: {}%".format( mean(wins_ai2) ))
print("Average draws: {}%".format( mean(draws_all) ))
def train_test(train_iter, test_iter, num_rounds, ai1, ai2=None):
"""
Pits ai1 against ai2.
Allows them to train and improve for the first train_iter games,
then test them against each other for the next test_iter games,
returns the win, loss and draw percentage.
Then repeats the above steps num_rounds times.
Returns back a list with all the percentage wins, losses and draws of the
num_rounds tests for both ai1 and ai2.
:param train_iter: num of iterations to train for
:param test_iter: num of iterations to test for
:param num_rounds: num of rounds to do the train,test process
:param ai1: the ai to train/test
:param ai2: the second ai to train/test or the ai trainer
:return: [wins_ai_1, wins_ai_2, draws_all]
"""
# compile data of percentages, the will be appended to the below lists
wins_ai_1 = []
wins_ai_2 = []
draws_all = []
# stop saving to make training faster
ai1.stop_self_saving()
if ai2:
ai2.stop_self_saving()
#print("self_save: {}".format(ai1.self_save))
# do the whole process num_rounds times
for rnd in range(num_rounds):
# train them for the first train_iter games
# tell ai's to start learning
ai1.start_learning()
if ai2:
ai2.start_learning()
train(train_iter, ai1, ai2)
# test them for the next test_iter games
# tell ai's to stop learning
ai1.stop_learning()
if ai2:
ai2.stop_learning()
ai2_wins, ai1_wins, draws = train(test_iter, ai1, ai2)
wins_ai_1.append(ai1_wins/test_iter*100)
wins_ai_2.append(ai2_wins/test_iter*100)
draws_all.append(draws/test_iter*100)
# start saving again
ai1.start_self_saving()
ai1.save()
if ai2:
ai2.start_self_saving()
ai2.save()
return [wins_ai_1, wins_ai_2, draws_all]
def train_for(duration):
"""
Train the ai for a given amount of time
:param duration:
:return:
"""
start_time = int(time.time())
end_time = int(time.time())
trainer_wins = 0
ai_wins = 0
draws = 0
while end_time <= (start_time + duration):
# get some stats
stats = train(1)
trainer_wins += stats[0]
ai_wins += stats[1]
draws += stats[2]
# check end time
end_time = int(time.time())
# print out stats
print("Score[ Trainer : " + str(trainer_wins) + ", AI : " + str(ai_wins) + ", Draws: " + str(draws) + " ]\n")
def train(num, ai1=None,vs_ai=None):
"""
Train the ai num times, using the default ai trainer.
Or using another ai if desired.
:param num: int
:param vs_ai: boolean
:return: lst[]
"""
board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
turn = True
if ai1 != None:
ai = ai1
else:
ai = AI()
past_turn = turn
# get other ai if needed
ai_trainer = None
if vs_ai != None:
ai_trainer = vs_ai
# get some stats
trainer_wins = 0
ai_wins = 0
draws = 0
# for times given
for times in range(num):
turn = past_turn
# while the game is not over
while game_over(board) == -1:
# if it is the trainers turn
if turn:
# get move from other ai or trainer
index = 0
if vs_ai:
reflected_board = reflect_board(board)
index = ai_trainer.move(reflected_board)
print("Trainer AI played in :", index)
else:
index = get_trainer_move(board)
print("Trainer played in :", index)
board[index] = 1
turn = not turn
print(show_board(board))
# if it is the ai's turn
else:
index = ai.move(board)
board[index] = 2
turn = not turn
print("AI played in :", index)
print(show_board(board))
# figure out who won
if game_over(board) == 1:
print("Trainer won!")
ai.has_lost()
trainer_wins += 1
# notify the ai trainer if needed
if vs_ai:
ai_trainer.has_won()
elif game_over(board) == 2:
print("AI won!")
ai.has_won()
ai_wins += 1
# notify the ai trainer if needed
if vs_ai:
ai_trainer.has_lost()
elif game_over(board) == 0:
print("it was a draw!")
ai.has_drawn()
draws += 1
# notify the ai trainer if needed
if vs_ai:
ai_trainer.has_drawn()
# print out stats
print("Score[ Trainer : " + str(trainer_wins) + ", AI : " + str(ai_wins) + ", Draws: " + str(draws) + " ]\n")
# reset the board
board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
past_turn = not past_turn
return [trainer_wins, ai_wins, draws]
def reflect_board(board):
"""
Given a board with 1's and 2's
make all the 1's into 2's and 2's
into ones.
:board: lst[]
:return: lst[]
"""
new_board = []
for number in board:
if number == 1:
new_board.append(2)
elif number == 2:
new_board.append(1)
else:
new_board.append(0)
return new_board
def get_trainer_move(board, error=5):
"""
get move from trainer.
the trainer see's itself as 1.
:param board:
:return:
"""
# check if it can win
assessments = check_moves(board)
next_move = -1
if len(assessments) > 0:
# print(assessments)
for move in assessments:
# if move results in trainer winning, then make it
if move[0] == 1:
return move[2]
# if move results ai winning, then make a counter it
if move[1] == 2:
next_move = move[2]
# make the trainer make mistakes 1 out of error(default 5 times)
mistake = random.randint(1, error)
if next_move != -1 and mistake != 1:
return next_move
# loop through the board and make a random selection from all the possible moves
possible_moves = []
for index in range(len(board)):
if board[index] == 0:
possible_moves.append(index)
next_move = random.choice(possible_moves)
return next_move
def check_moves(board):
"""
gets the outcome of each move using that board.
the trainer see's itself as 1.
:param board:
:return: [int, int]
"""
result = []
# loop through the entire board
for index in range(len(board)):
if board[index] == 0:
board2 = board[:]
# check if I can win
board2[index] = 1
outcome_p1 = game_over(board2)
# check if the opponent can win
board2[index] = 2
outcome_p2 = game_over(board2)
result.append([outcome_p1, outcome_p2, index])
return result
def game_over(board):
"""
returns 1 or 2 or 0 if there is a winner and -1 if the game is still going on.
1: the winner is the trainer
2: the winner is the ai
0: there is a draw
-1: the game is still going on
:param board: list[]
:return: int
"""
# check for horizontal wins
if board[0] == board[1] == board[2] != 0:
return board[0]
elif board[3] == board[4] == board[5] != 0:
return board[3]
elif board[6] == board[7] == board[8] != 0:
return board[6]
# check verticals
elif board[0] == board[3] == board[6] != 0:
return board[0]
elif board[1] == board[4] == board[7] != 0:
return board[1]
elif board[2] == board[5] == board[8] != 0:
return board[2]
# check diagonals
elif board[0] == board[4] == board[8] != 0:
return board[0]
elif board[2] == board[4] == board[6] != 0:
return board[2]
# check for draw
elif 0 not in board:
return 0
# if none of this happens, then the game is still going on
return -1
def main(sys):
"""
Get data from arguments and use trainer.
Usage:
pythonX trainer.py [ s | n | m ] [ time_in_seconds | num_in_iterations ]
Example:
$> python trainer.py s 30
Training for 30 seconds
...
$> python trainer.py n 30
Training for 30 iterations
...
"""
# get args
format_msg = "trainer.py [ s/n/m ] [ time_in_seconds | num_in_iterations ]".format(TIMECONST, NUMCONST)
num_args = len(sys.argv)
# treat m seperately
if num_args == 2:
train_type = sys.argv[1]
if train_type == MAINCONT:
train_main()
return
if num_args != 3:
print(format_msg)
return -1
train_type = sys.argv[1]
train_duration = int(sys.argv[2])
if train_type == TIMECONST:
print('Training for {} seconds'.format(train_duration))
# train for train_duration seconds
train_for(train_duration)
elif train_type == NUMCONST:
print('Training for {} iterations'.format(train_duration))
# train for train_duration iterations
train(train_duration)
else:
print(format_msg)
# test out trainer
if __name__ == "__main__":
#main(sys)
train_main()