forked from lyang36/icml2021_rltheory
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
executable file
·1106 lines (925 loc) · 71.5 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<link rel="shortcut icon" href="favicon.ico?">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="Workshop on Reinforcement Learning at ICML 2021">
<title> Workshop on Reinforcement Learning Theory </title>
<!-- Bootstrap Core CSS -->
<link href="css/bootstrap.min.css" rel="stylesheet">
<!-- Custom CSS -->
<link href="css/agency.css" rel="stylesheet">
<!-- Custom Fonts -->
<link href="css/font-awesome.min.css" rel="stylesheet" type="text/css">
<link href="https://fonts.googleapis.com/css?family=Montserrat:400,700" rel="stylesheet" type="text/css">
<link href='https://fonts.googleapis.com/css?family=Kaushan+Script' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Droid+Serif:400,700,400italic,700italic' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Roboto+Slab:400,100,300,700' rel='stylesheet' type='text/css'>
</head>
<body id="page-top" class="index">
<style>
/*********************************
The list of publication items
*********************************/
/* The list of items */
.abslist { }
/* The item */
.abslist li { }
/* You can define custom styles for plstyle field here. */
/*************************************
The box that contain BibTeX code
*************************************/
div.noshow { display: none; }
div.abstract{
margin-left:5%;
margin-right:5%;
margin-top:1.2em;
margin-bottom:1em;
border:1px solid silver;
padding: 0em 1em;
background: #ffffee;
}
</style>
<script type="text/javascript">
function toggleAbstract(articleid) {
var abs = document.getElementById('abs_'+articleid);
if (abs) {
if(abs.className.indexOf('abstract') != -1) {
abs.className.indexOf('noshow') == -1?abs.className = 'abstract noshow':abs.className = 'abstract';
}
} else {
return;
}
}
</script>
<!--
div.bibtex {
margin-right: 0%;
margin-top: 1.2em;
margin-bottom: 1em;
border: 1px solid silver;
padding: 0em 1em;
background: #ffffee;
}
div.bibtex pre { font-size: 75%; text-align: left; overflow: auto; width: 100%; padding: 0em 0em;}</style>
<script type="text/javascript">
<--
// Toggle Display of BibTeX
function toggleBibtex(articleid) {
var bib = document.getElementById('bib_'+articleid);
if (bib) {
if(bib.className.indexOf('bibtex') != -1) {
bib.className.indexOf('noshow') == -1?bib.className = 'bibtex noshow':bib.className = 'bibtex';
}
} else {
return;
}
}
</scrip-->
<!-- Navigation -->
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header page-scroll">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<!-- Collect the nav links, forms, and other content for toggling -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-middle">
<li class="hidden">
<a href="#page-top"></a>
</li>
<!-- <li>
<a class="page-scroll" href="#shutdownstem">#ShutDownSTEM</a>
</li> -->
<li>
<a class="page-scroll" href="#overview">Overview</a>
</li>
<li>
<a class="page-scroll" href="#program">Schedule</a>
</li>
<li>
<a class="page-scroll" href="#keynote">Keynote Speakers</a>
</li>
<li>
<a class="page-scroll" href="#papers">Contributed Papers</a>
</li>
<!-- <li>
<a class="page-scroll" href="#call">Call</a>
</li> -->
<li>
<a class="page-scroll" href="#Program Committee">Program Committee</a>
</li>
<li>
<a class="page-scroll" href="#dates">Important Dates</a>
</li>
<li>
<a class="page-scroll" href="#organization">Organizers</a>
</li>
</ul>
</div>
<!-- /.navbar-collapse -->
</div>
<!-- /.container-fluid -->
</nav>
<!-- Header -->
<header >
<style>
header {
background-image:url(img/black.png);
filter: brightness(80%); color:#fff
}
</style>
<div class="container">
<div class="intro-text">
<div class="intro-heading"> Workshop on Reinforcement Learning Theory </br></div>
<div class="intro-lead-in"> @ <a href="https://icml.cc/Conferences/2021" target=_blank>ICML 2021</a></br></div>
<!--a href="#call" target=_blank class="page-scroll btn btn-xl">Submit a Paper</a-->
</div>
</div>
</header>
<!-- Introduction Section -->
<section id="overview">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Overview</h2>
<!--h3 class="section-subheading text-muted">Lorem ipsum dolor sit amet consectetur.</h3-->
</div>
</div>
<div class="row text-justify">
<div class="col-md-12">
<p class="large text-muted">
While over many years we have witnessed numerous impressive demonstrations of the power of various reinforcement learning (RL) algorithms, and while much progress was made on the theoretical side as well, the theoretical understanding of the challenges that underlie RL is still rather limited. The best studied problem settings, such as learning and acting in finite state-action Markov decision processes, or simple linear control systems fail to capture the essential characteristics of seemingly more practically relevant problem classes, where the size of the state-action space is often astronomical, the planning horizon is huge, the dynamics is complex, interaction with the controlled system is not permitted, or learning has to happen based on heterogeneous offline data, etc. To tackle these diverse issues, more and more theoreticians with a wide range of backgrounds came to study RL and have proposed numerous new models along with exciting novel developments on both algorithm design and analysis. The workshop's goal is to highlight advances in theoretical RL and bring together researchers from different backgrounds to discuss RL theory from different perspectives: modeling, algorithm, analysis, etc.
</p>
<p class ="large text-muted">
This workshop will feature seven keynote speakers from computer science, operation research, control, and statistics to highlight recent progress, identify key challenges, and discuss future directions. Invited keynotes will be augmented by contributed talks, poster presentations, panel discussions, and virtual social events.
</p>
</div>
</div>
</div>
</section>
<!-- Program Section -->
<section id="program" class="bg-mid-gray">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Schedule</h2>
<h3 class="section-subheading text-muted">
<!-- 6:30 am - 4:45 pm PDT <br>
June 17, 2020<br> -->
<tr>
<td>UTC 16:00 - 16:25 </td>
<td> <strong>Emily Kaufmann <em>(Invited Talk) </em></strong>
</td>
</tr>
<br>
<tr>
<td>UTC 16:25 - 16:50 </td>
<td> <strong>Christian Kroer <em>(Invited Talk) </em></strong>
</td>
</tr>
<br>
<tr>
<td>UTC 17:00 - 17:50</td>
<td> <strong>Short Contributed Talks: </strong>
<br>Sparsity in the Partially Controllable LQR
<br> On the Theory of Reinforcement Learning with Once-per-Episode Feedback
<br> Implicit Finite-Horizon Approximation for Stochastic Shortest Path
<br> Provable Benefits of Actor-Critic Methods for Offline Reinforcement Learning
</tr>
<br>
<tr>
<td>UTC 18:00 - 18:25 </td>
<td> <strong>Animashree Anandkumar <em>(Invited Talk) </em></strong>
</td>
</tr>
<br>
<td>UTC 18:25 - 18:50 </td>
<td> <strong>Shie Mannor <em>(Invited Talk) </em></strong>
</td>
<br>
<tr>
<td> UTC 19:00 - 19:30 </td>
<td><strong><a href="https://eventhosts.gather.town/aRbJAjii62VNi2Nu/worlt-lounge-room"> Social Session </a> </strong></em>
</td>
</tr>
<br>
<tr>
<td>UTC 19:30 - 21:00 </td>
<td><strong><a href="https://eventhosts.gather.town/bJEZJHY6WDAGJ1UX/worlt-social-session"> Poster Session </a></strong>
</td>
</tr>
<br>
<tr>
<td>UTC 21:00 - 21:25 </td>
<td><strong>Bo Dai <em>(Invited Talk)</em></strong>
</td>
</tr>
<br>
<tr>
<td>UTC 21:25 - 21:50 </td>
<td><strong>Qiaomin Xie <em>(Invited Talk)</em></strong>
</td>
</tr>
<br>
<tr>
<td>UTC 22:00 - 22:50 </td>
<td><strong>Short Contributed Talks:</strong></em>
<br> Bad-Policy Density: A Measure of Reinforcement-Learning Hardness
<br> Sample-Efficient Learning of Stackelberg Equilibria in General-Sum Games
<br> Solving Multi-Arm Bandit Using a Few Bits of Communication
<br> CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee
</td>
</tr><br>
<tr>
<td>UTC 23:00 - 23:25 </td>
<td><strong>Art Owen <em>(Invited Talk)</em></strong>
</td>
</tr><br>
<tr>
<td>UTC 23:30 - 0:00 </td>
<td><strong>Panel Discussion</strong>
</tr>
<br>
<tr>
<td>UTC 0:00 - 0:30 </td>
<td><strong><a href="https://eventhosts.gather.town/aRbJAjii62VNi2Nu/worlt-lounge-room"> Social Session </a> </strong></em>
</td>
</tr>
<br>
<tr>
<td>UTC 0:30 - 2:00 </td>
<td><strong><a href="https://eventhosts.gather.town/bJEZJHY6WDAGJ1UX/worlt-social-session"> Poster Session </a></strong>
</td>
</tr>
</h3>
</div>
</div>
</div>
</div>
</section>
<!-- Keynote Section -->
<section id="keynote">
<div class="container">
<div class="row text-justify">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Keynote Speakers</h2>
</div>
</div>
<div class="row centered">
<div class="col-sm-2">
<div class="team-member">
<img src="img/anima.jpeg" height="150" width="150" class="img-responsive img-circle">
<a href="http://tensorlab.cms.caltech.edu/users/anima/"><h4>Anima Anandkumar</h4></a>
<p class="text-muted">Professor<br/>California Institute of Technology</p>
</div>
</div>
<div class="col-sm-2">
<div class="team-member">
<img src="img/bo.png" height="150" width="150" class="img-responsive img-circle">
<a href="https://sites.google.com/site/daibohr/"><h4>Bo Dai</h4></a>
<p class="text-muted">Senior Research Scientist<br/>Google Brain</p>
</div>
</div>
<div class="col-sm-2">
<div class="team-member">
<img src="img/emilie.png" height="150" width="150" class="img-responsive img-circle">
<a href="http://chercheurs.lille.inria.fr/ekaufman/"><h4>Emilie Kaufmann</h4></a>
<p class="text-muted">Principal Researcher<br/>CNRS Junior Researcher</p>
</div>
</div>
</div>
<div class="row centered">
<div class="col-sm-2">
<div class="team-member">
<img src="img/chris.jpeg" height="150" width="150" class="img-responsive img-circle">
<a href="http://www.columbia.edu/~ck2945/"><h4>Christian Kroer</h4></a>
<p class="text-muted">Assistant Professor<br/>Columbia University</p>
</div>
</div>
<div class="col-sm-2">
<div class="team-member">
<img src="img/shie.jpeg" height="150" width="150" class="img-responsive img-circle">
<a href="https://webee.technion.ac.il/Sites/People/shie/"><h4>Shie Mannor</h4></a>
<p class="text-muted">Professor<br/>Technion</p>
</div>
</div>
<div class="col-sm-2">
<div class="team-member">
<img src="img/art.png" height="150" width="150" class="img-responsive img-circle">
<a href="http://statweb.stanford.edu/~owen/"><h4>Art Owen</h4></a>
<p class="text-muted">Professor<br/>Stanford University</p>
</div>
</div>
<div class="col-sm-2">
<div class="team-member">
<img src="img/qiaomin.jpeg" height="150" width="150" class="img-responsive img-circle">
<a href="https://sites.coecis.cornell.edu/qiaominxie/"><h4>Qiaomin Xie</h4></a>
<p class="text-muted">Visiting Assistant Professor<br/>Cornell University</p>
</div>
</div>
</div>
</div>
</section>
<!-- Papers -->
<section id="papers" class="bg-mid-gray">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Papers</h2>
</div>
</div>
<div class="row text-justify">
<div class="col-md-12">
<p class="large text-muted">
<ul class = "abslist text-left">
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Bad-Policy Density: A Measure of Reinforcement-Learning Hardness </strong>
<br><em>David Abel (DeepMind); Cameron S Allen (Brown University); Dilip Arumugam (Stanford University); D Ellis Hershkowitz (Carnegie Mellon University); Michael L. Littman (Brown University); Lawson L.S. Wong (Northeastern University)</em><br>
<a href="camera_ready/2.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Finding the Near Optimal Policy via Reductive Regularization in MDPs </strong>
<br><em>Wenhao Yang (Peking University); Xiang Li (Peking University); Guangzeng Xie (Peking University); Zhihua Zhang (Peking University)</em><br>
<a href="camera_ready/3.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Finite Sample Analysis of Average-Reward TD Learning and $Q$-Learning </strong>
<br><em>Sheng Zhang (Georgia Institute of Technology); Zhe Zhang (Georgia Institute of Technology); Siva Theja Maguluri (Georgia Tech)</em><br>
<a href="camera_ready/4.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Sample Complexity of Offline Reinforcement Learning with Deep ReLU Networks</strong>
<br><em>Thanh Nguyen-Tang (Deakin University); Sunil Gupta (Deakin University, Australia); Hung Tran-The (Deakin University); Svetha Venkatesh (Deakin University)</em><br>
<a href="camera_ready/5.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Triple-Q: A Model-Free Algorithm for Constrained Reinforcement Learning with Sublinear Regret and Zero Constraint Violation</strong>
<br><em>Honghao Wei (University of Michigan); Xin Liu (University of Michigan); Lei Ying (University of Michigan)</em><br>
<a href="camera_ready/6.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Subgaussian Importance Sampling for Off-Policy Evaluation and Learning</strong>
<br><em>Alberto Maria Metelli (Politecnico di Milano); Alessio Russo (Politecnico di Milano); Marcello Restelli (Politecnico di Milano) </em><br>
<a href="camera_ready/7.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Minimax Regret for Stochastic Shortest Path </strong>
<br><em>Alon Cohen (Technion and Google Inc.); Yonathan Efroni (Microsoft Research); Yishay Mansour (Tel Aviv University and Google Research); Aviv Rosenberg (Tel Aviv University)</em><br>
<a href="camera_ready/8.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Collision Resolution in Multi-player Bandits Without Observing Collision Information </strong>
<br><em>Eleni Nisioti (Inria); Nikolaos Thomos (U of Essex); Boris Bellalta (Pompeu Fabra University); Anders Jonsson (UPF) </em><br>
<a href="camera_ready/9.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Marginalized Operators for Off-Policy Reinforcement Learning </strong>
<br><em>Yunhao Tang (Columbia University); Mark Rowland (DeepMind); Remi Munos (DeepMind); Michal Valko (DeepMind)</em><br>
<a href="camera_ready/10.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Nonstationary Reinforcement Learning with Linear Function Approximation </strong>
<br><em>Huozhi Zhou (UIUC); Jinglin Chen (University of Illinois at Urbana-Champaign); Lav Varshney (UIUC: ECE); Ashish Jagmohan (IBM Research)</em><br>
<a href="camera_ready/12.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee</strong>
<br><em>Tengyu Xu (The Ohio State University); Yingbin Liang (The Ohio State University); Guanghui Lan (Georgia Tech)</em><br>
<a href="camera_ready/13.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Sparsity in the Partially Controllable LQR</strong>
<br><em>Yonathan Efroni (Microsoft Research); Sham Kakade (University of Washington); Akshay Krishnamurthy (Microsoft); Cyril Zhang (Microsoft Research)</em><br>
<a href="camera_ready/14.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Finite-Sample Analysis of Off-Policy TD-Learning via Generalized Bellman Operators </strong>
<br><em>Zaiwei Chen (Georgia Institute of Technology); Siva Theja Maguluri (Georgia Tech); Sanjay Shakkottai (University of Texas at Austin); Karthikeyan Shanmugam (IBM Research NY)</em><br>
<a href="camera_ready/15.pdf">[Paper]</a>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Derivative-Free Policy Optimization for Linear Risk-Sensitive and Robust Control Design: Implicit Regularization and Sample Complexity</strong>
<br><em>Kaiqing Zhang (University of Illinois at Urbana-Champaign (UIUC)/MIT); Xiangyuan Zhang (University of Illinois at Urbana-Champaign); Bin Hu (University of Illinois at Urbana-Champaign); Tamer Basar (University of Illinois at Urbana-Champaign) </em><br>
<a href="camera_ready/16.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> When Is Generalizable Reinforcement Learning Tractable?</strong>
<br><em>Dhruv Malik (Carnegie Mellon University); Yuanzhi Li (CMU); Pradeep Ravikumar (Carnegie Mellon University)</em><br>
<a href="camera_ready/17.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Finite-Sample Analysis of Off-Policy Natural Actor-Critic With Linear Function Approximation</strong>
<br><em>Zaiwei Chen (Georgia Institute of Technology); Sajad khodadadian (Georgia Tech); Siva Theja Maguluri (Georgia Tech) </em><br>
<a href="camera_ready/19.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>The Importance of Non-Markovianity in Maximum State Entropy Exploration</strong>
<br><em>Mirco Mutti (Politecnico di Milano, Università di Bologna); Riccardo De Santi (ETH Zurich ); Marcello Restelli (Politecnico di Milano) </em><br>
<a href="camera_ready/20.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Global Convergence of Multi-Agent Policy Gradient in Markov Potential Games </strong>
<br><em>Stefanos Leonardos (Singapore University of Technology and Design); Will Overman (University of California, Irvine); Ioannis Panageas (UC Irvine); Georgios Piliouras (Singapore University of Technology and Design)</em><br>
<a href="camera_ready/21.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Efficient Inverse Reinforcement Learning of Transferable Rewards</strong>
<br><em>Giorgia Ramponi (Politecnico di Milano); Alberto Maria Metelli (Politecnico di Milano); Marcello Restelli (Politecnico di Milano)</em><br>
<a href="camera_ready/22.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Learning to Observe with Reinforcement Learning </strong>
<br><em>Mehmet Koseoglu (Hacettepe University); Ece Kunduracioglu (Hacetttepe University); Ayca Ozcelikkale (Uppsala University) </em><br>
<a href="camera_ready/23.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Sample Efficient Reinforcement Learning In Continuous State Spaces: A Perspective Beyond Linearity </strong>
<br><em>Dhruv Malik (Carnegie Mellon University); Aldo Pacchiano (UC Berkeley); Vishwak Srinivasan (Carnegie Mellon University); Yuanzhi Li (CMU) </em><br>
<a href="camera_ready/25.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Bagged Critic for Continuous Control </strong>
<br><em>Payal Bawa (University of Sydney)</em><br>
<a href="camera_ready/26.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Reinforcement Learning in Linear MDPs: Constant Regret and Representation Selection </strong>
<br><em>Matteo Papini (Politecnico di Milano); Andrea Tirinzoni (Inria); Aldo Pacchiano (UC Berkeley); Marcello Restelli (Politecnico di Milano); Alessandro Lazaric (FAIR); Matteo Pirotta (Facebook AI Research) </em><br>
<a href="camera_ready/27.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> A Fully Problem-Dependent Regret Lower Bound for Finite-Horizon MDPs</strong>
<br><em>Andrea Tirinzoni (Inria); Matteo Pirotta (Facebook AI Research); Alessandro Lazaric (FAIR</em><br>
<a href="camera_ready/28.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Optimal and instance-dependent oracle inequalities for policy evaluation </strong>
<br><em>Wenlong Mou (UC Berkeley); Ashwin Pananjady (Georgia Institute of Technology); Martin Wainwright (UC Berkeley) </em><br>
<a href="camera_ready/29.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Optimistic Exploration with Backward Bootstrapped Bonus for Deep Reinforcement Learning </strong>
<br><em>Chenjia Bai (Harbin Institute of Technology); Lingxiao Wang (Northwestern University); Lei Han (Tencent AI Lab); Jianye Hao (Tianjin University); Animesh Garg (University of Toronto, Vector Institute, Nvidia); Peng Liu (Harbin Institute of Technology); Zhaoran Wang (Northwestern U)</em><br>
<a href="camera_ready/30.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Provable Benefits of Actor-Critic Methods for Offline Reinforcement Learning </strong>
<br><em>Andrea Zanette (Stanford University); Martin Wainwright (UC Berkeley); Emma Brunskill (Stanford University)</em><br>
<a href="camera_ready/31.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Reward-Weighted Regression Converges to a Global Optimum </strong>
<br><em>Miroslav Strupl (IDSIA); Francesco Faccio (The Swiss AI Lab IDSIA); Dylan Ashley (IDSIA); Rupesh Kumar Srivastava (NNAISENSE); Jürgen Schmidhuber (IDSIA - Lugano)</em><br>
<a href="camera_ready/32.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Solving Multi-Arm Bandit Using a Few Bits of Communication </strong>
<br><em>Osama A Hanna (UCLA); Lin Yang (UCLA); Christina Fragouli ()</em><br>
<a href="camera_ready/33.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Comparison and Unification of Three Regularization Methods in Batch Reinforcement Learning</strong>
<br><em>Sarah Rathnam (Harvard University); Susan Murphy (Harvard University); Finale Doshi-Velez (Harvard)</em><br>
<a href="camera_ready/34.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Oracle-Efficient Regret Minimization in Factored MDPs with Unknown Structure</strong>
<br><em>Aviv Rosenberg (Tel Aviv University); Yishay Mansour (Tel Aviv University and Google Research) </em><br>
<a href="camera_ready/35.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Learning Adversarial Markov Decision Processes with Delayed Feedback </strong>
<br><em>Tal Lancewicki (Tel-Aviv University); Aviv Rosenberg (Tel Aviv University); Yishay Mansour (Tel Aviv University and Google Research)</em><br>
<a href="camera_ready/36.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Why Generalization in RL is Difficult: Epistemic POMDPs and Implicit Partial Observability</strong>
<br><em>Dibya Ghosh (UC Berkeley); Jad Rahme (Princeton University); Aviral Kumar (UC Berkeley); Amy Zhang (McGill University); Ryan P Adams (Princeton University); Sergey Levine (UC Berkeley)</em><br>
<a href="camera_ready/37.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Statistical Inference with M-Estimators on Adaptively Collected Data </strong>
<br><em>Kelly W Zhang (Harvard University); Lucas Janson (Harvard University); Susan Murphy (Harvard University)</em><br>
<a href="camera_ready/38.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Randomized Least Squares Policy Optimization </strong>
<br><em>Haque Ishfaq (Mila, McGill University); Zhuoran Yang (Princeton.edu); Andrei Lupu (Mila, McGill University); Viet Nguyen (Mila, McGill University); Lewis Liu (Mila & DIRO); Riashat Islam (MILA, Mcgill University); Zhaoran Wang (Northwestern); Doina Precup (McGill University)</em><br>
<a href="camera_ready/39.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Gap-Dependent Unsupervised Exploration for Reinforcement Learning</strong>
<br><em>Jingfeng Wu (Johns Hopkins University); Vladimir Braverman (Johns Hopkins University); Lin Yang (UCLA)</em><br>
<a href="camera_ready/40.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Online Learning for Stochastic Shortest Path Model via Posterior Sampling</strong>
<br><em>Mehdi Jafarnia Jahromi (University of Southern California); Liyu Chen (USC); Rahul Jain (University of Southern California); Haipeng Luo (USC)</em><br>
<a href="camera_ready/41.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Provable Model-based Nonlinear Bandit and Reinforcement Learning: Shelve Optimism, Embrace Virtual Curvature </strong>
<br><em>Kefan Dong (Stanford University); Jiaqi Yang (Tsinghua University); Tengyu Ma (Stanford University)</em><br>
<a href="camera_ready/42.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Linear Convergence of Entropy-Regularized Natural Policy Gradient with Linear Function Approximation </strong>
<br><em>Semih Cayci (University of Illinois at Urbana-Champaign); Niao He (ETH Zurich); R Srikant (UIUC) </em><br>
<a href="camera_ready/43.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Decentralized Q-Learning in Zero-sum Markov Games </strong>
<br><em>Muhammed Sayin (MIT); Kaiqing Zhang (University of Illinois at Urbana-Champaign (UIUC)/MIT); David S Leslie (Lancaster University); Tamer Basar (University of Illinois at Urbana-Champaign); Asuman Ozdaglar (MIT)</em><br>
<a href="camera_ready/44.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Implicit Finite-Horizon Approximation for Stochastic Shortest Path</strong>
<br><em>Liyu Chen (USC); Mehdi Jafarnia Jahromi (University of Southern California); Rahul Jain (University of Southern California); Haipeng Luo (USC)</em><br>
<a href="camera_ready/45.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>On the Theory of Reinforcement Learning with Once-per-Episode Feedback </strong>
<br><em>Niladri S Chatterji (UC Berkeley); Aldo Pacchiano (UC Berkeley); Peter Bartlett (); Michael Jordan (UC Berkeley)</em><br>
<a href="camera_ready/46.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Model-based Offline Reinforcement Learning with Local Misspecification</strong>
<br><em>Kefan Dong (Stanford University); Ramtin Keramati (Stanford University); Emma Brunskill (Stanford University)</em><br>
<a href="camera_ready/47.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Nearly Minimax Optimal Regret for Learning Infinite-horizon Average-reward MDPs with Linear Function Approximation</strong>
<br><em>Yue Wu (University of California, Los Angeles); Dongruo Zhou (UCLA); Quanquan Gu (University of California, Los Angeles)</em><br>
<a href="camera_ready/48.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Learning from an Exploring Demonstrator: Optimal Reward Estimation for Bandits</strong>
<br><em>Wenshuo Guo (UC Berkeley); Kumar Krishna Agrawal (UC Berkeley); Aditya Grover (Facebook AI Research); Vidya Muthukumar (UC Berkeley); Ashwin Pananjady (UC Berkeley)</em><br>
<a href="camera_ready/49.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Model-Free Approach to Evaluate Reinforcement Learning Algorithms </strong>
<br><em>Denis Belomestny (Universitaet Duisburg-Essen); Ilya Levin (National Research University "Higher School of Economics"); Eric Moulines (Ecole Polytechnique); Alexey Naumov (National Research University Higher School of Economics); Sergey Samsonov (National Research University Higher School of Economics); Veronika Zorina (National Research University Higher School of Economics)</em><br>
<a href="camera_ready/50.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Provable RL with Exogenous Distractors via Multistep Inverse Dynamics </strong>
<br><em>Yonathan Efroni (Microsoft Research); Dipendra Misra (Microsoft); Akshay Krishnamurthy (Microsoft); Alekh Agarwal (Microsoft); John Langford (Microsoft)</em><br>
<a href="camera_ready/51.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Learning Pareto-Optimal Policies in Low-Rank Cooperative Markov Games </strong>
<br><em>Abhimanyu Dubey (Massachusetts Institute of Technology); Alex `Sandy' Pentland (MIT)</em><br>
<a href="camera_ready/52.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Optimal Uniform OPE and Model-based Offline Reinforcement Learning in Time-Homogeneous, Reward-Free and Task-Agnostic Settings</strong>
<br><em>Ming Yin (UC Santa Barbara); Yu-Xiang Wang (UC Santa Barbara)</em><br>
<a href="camera_ready/53.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Bridging The Gap between Local and Joint Differential Privacy in RL</strong>
<br><em>Evrard Garcelon (Facebook AI Research ); Vianney Perchet (ENS Paris-Saclay & Criteo AI Lab); Ciara Pike-Burke (Imperial College London); Matteo Pirotta (Facebook AI Research)</em><br>
<a href="camera_ready/54.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Sample-Efficient Learning of Stackelberg Equilibria in General-Sum Games</strong>
<br><em>Yu Bai (Salesforce Research); Chi Jin (Princeton University); Huan Wang (Salesforce Research); Caiming Xiong (Salesforce Research)</em><br>
<a href="camera_ready/55.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Near-Optimal Offline Reinforcement Learning via Double Variance Reduction </strong>
<br><em>Ming Yin (UC Santa Barbara); Yu Bai (Salesforce Research); Yu-Xiang Wang (UC Santa Barbara)</em><br>
<a href="camera_ready/56.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Mixture of Step Returns in Bootstrapped DQN</strong>
<br><em>Po-Han Chiang (National Tsing Hua University); Hsuan-Kung Yang (National Tsing Hua University); Zhang-Wei Hong (Preferred Networks); Chun-Yi Lee (National Tsing Hua University)</em><br>
<a href="camera_ready/57.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Nearly Optimal Regret for Learning Adversarial MDPs with Linear Function Approximation</strong>
<br><em>Jiafan He (UCLA); Dongruo Zhou (UCLA); Quanquan Gu (University of California, Los Angeles)</em><br>
<a href="camera_ready/58.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Provably efficient exploration-free transfer RL for near-deterministic latent dynamics</strong>
<br><em>Yao Liu (Stanford University); Dipendra Misra (Microsoft); Miroslav Dudik (Microsoft); Robert Schapire (Microsoft)</em><br>
<a href="camera_ready/59.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Stochastic Shortest Path: Minimax, Parameter-Free and Towards Horizon-Free Regret </strong>
<br><em>Jean Tarbouriech (FAIR & Inria); Runlong Zhou (Tsinghua University); Simon Du (University of Washington); Matteo Pirotta (Facebook AI Research); Michal Valko (DeepMind); Alessandro Lazaric (FAIR)</em><br>
<a href="camera_ready/60.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> A Spectral Approach to Off-Policy Evaluation for POMDPs</strong>
<br><em>Yash Nair (Harvard College); Nan Jiang (University of Illinois at Urbana-Champaign)</em><br>
<a href="camera_ready/61.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Mind the Gap: Safely Bridging Offline and Online Reinforcement Learning </strong>
<br><em>Wanqiao Xu (University of Michigan); Kan Xu (University of Pennsylvania); Hamsa Bastani (Wharton); Osbert Bastani (University of Pennsylvania)</em><br>
<a href="camera_ready/62.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Learning Nash Equilibria in Zero-Sum Stochastic Games via Entropy-Regularized Policy Approximation </strong>
<br><em>Yue Guan (Georgia Institute of Technology); Qifan Zhang (Georgia Institute of Technology); Panagiotis Tsiotras (Georgia Institute of Technology) </em><br>
<a href="camera_ready/63.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Invariant Policy Learning: A Causal Perspective</strong>
<br><em>Sorawit Saengkyongam (University of Copenhagen); Nikolaj Thams (University of Copenhagen); Jonas Peters (University of Copenhagen); Niklas Pfister (University of Copenhagen)</em><br>
<a href="camera_ready/64.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>A functional mirror ascent view of policy gradient methods with function approximation </strong>
<br><em>Sharan Vaswani (Amii, University of Alberta); Olivier Bachem (Google Brain); Simone Totaro (Mila, Université de Montréal); Robert Mueller (TU Munich); Matthieu Geist (Google Brain); Marlos C. Machado (Amii, University of Alberta, and DeepMind); Pablo Samuel Castro (Google Brain); Nicolas Le Roux (MILA, Université de Montréal and McGill University)</em><br>
<a href="camera_ready/65.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Policy Finetuning: Bridging Sample-Efficient Offline and Online Reinforcement Learning</strong>
<br><em>Tengyang Xie (University of Illinois at Urbana-Champaign); Nan Jiang (University of Illinois at Urbana-Champaign); Huan Wang (Salesforce Research); Caiming Xiong (Salesforce Research); Yu Bai (Salesforce Research) </em><br>
<a href="camera_ready/66.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Robust online control with model misspecification</strong>
<br><em>Xinyi Chen (Google); Udaya Ghai (Princeton University); Elad Hazan (Princeton University); Alexandre Megretsky (Massachusetts Institute of Technology) </em><br>
<a href="camera_ready/67.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Online Sub-Sampling for Reinforcement Learning with General Function Approximation </strong>
<br><em>Dingwen Kong (Peking University); Ruslan Salakhutdinov (Carnegie Mellon University); Ruosong Wang (Carnegie Mellon University); Lin Yang (UCLA) </em><br>
<a href="camera_ready/68.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Is Pessimism Provably Efficient for Offline RL? </strong>
<br><em>Ying Jin (Stanford University); Zhuoran Yang (Princeton.edu); Zhaoran Wang (Northwestern U) </em><br>
<a href="camera_ready/69.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Topological Experience Replay for Fast Q-Learning </strong>
<br><em>Zhang-Wei Hong (Massachusetts Institute of Technology); Tao Chen (MIT); Yen-Chen Lin (MIT); Joni Pajarinen (Aalto University); Pulkit Agrawal (MIT) </em><br>
<a href="camera_ready/70.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Nearly Minimax Optimal Reinforcement Learning for Discounted MDPs </strong>
<br><em>Jiafan He (UCLA); Dongruo Zhou (UCLA); Quanquan Gu (University of California, Los Angeles)</em><br>
<a href="camera_ready/71.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> A general sample complexity analysis of vanilla policy gradient</strong>
<br><em>Rui YUAN (Facebook AI Research); Robert M Gower (Telecom Paris Tech); Alessandro Lazaric (FAIR) </em><br>
<a href="camera_ready/72.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> The Power of Exploiter: Provable Multi-Agent RL in Large State Spaces</strong>
<br><em>Chi Jin (Princeton University); Qinghua Liu (Princeton University); Tiancheng Yu (MIT)</em><br>
<a href="camera_ready/73.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Bellman Eluder Dimension: New Rich Classes of RL Problems, and Sample-Efficient Algorithms </strong>
<br><em>Chi Jin (Princeton University); Qinghua Liu (Princeton University); Sobhan Miryoosefi (Princeton University) </em><br>
<a href="camera_ready/74.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Estimating Optimal Policy Value in Linear Contextual Bandits beyond Gaussianity </strong>
<br><em>Jonathan Lee (Stanford University); Weihao Kong (University of Washington); Aldo Pacchiano (UC Berkeley); Vidya K Muthukumar (Georgia Institute of Technology); Emma Brunskill (Stanford University)</em><br>
<a href="camera_ready/75.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> A Short Note on the Relationship of Information Gain and Eluder Dimension</strong>
<br><em>Kaixuan Huang (Princeton University); Sham Kakade (University of Washington); Jason Lee (Princeton); Qi Lei (Princeton University)</em><br>
<a href="camera_ready/76.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Convergence and Optimality of Policy Gradient Methods in Weakly Smooth Settings </strong>
<br><em>Shunshi Zhang (University of Toronto, Vector Institute); Murat A Erdogdu (University of Toronto, Vector Institute); Animesh Garg (University of Toronto, Vector Institute, Nvidia)</em><br>
<a href="camera_ready/77.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Almost Optimal Algorithms for Two-player Markov Games with Linear Function Approximation </strong>
<br><em>Zixiang Chen (UCLA); Dongruo Zhou (UCLA); Quanquan Gu (University of California, Los Angeles)</em><br>
<a href="camera_ready/78.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Improved Estimator Selection for Off-Policy Evaluation </strong>
<br><em>George Tucker (Google Brain); Jonathan Lee (Stanford)</em><br>
<a href="camera_ready/79.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>A Boosting Approach to Reinforcement Learning </strong>
<br><em>Nataly Brukhim (Princeton University); Elad Hazan (Princeton University); Karan Singh (Microsoft Research)</em><br>
<a href="camera_ready/80.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Learning Stackelberg Equilibria in Sequential Price Mechanisms </strong>
<br><em>Gianluca Brero (Harvard University); Darshan Chakrabarti (Harvard University); Alon Eden (Harvard University); Matthias Gerstgrasser (Harvard University); Vincent Li (Harvard University); David Parkes (Harvard University)</em><br>
<a href="camera_ready/81.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Refined Policy Improvement Bounds for MDPs </strong>
<br><em>Jim Dai (Cornell University); Mark Gluzman (Cornell University)</em><br>
<a href="camera_ready/82.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Meta Learning MDPs with linear transition models</strong>
<br><em>Robert Müller (Technical University of Munich); Aldo Pacchiano (UC Berkeley); Jack Parker-Holder (University of Oxford)</em><br>
<a href="camera_ready/83.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> The best of both worlds: stochastic and adversarial episodic MDPs with unknown transition</strong>
<br><em>Tiancheng Jin (University of Southern California); Longbo Huang (IIIS, Tsinghua Univeristy); Haipeng Luo (USC)</em><br>
<a href="camera_ready/84.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Identification and Adaptive Control of Markov Jump Systems: Sample Complexity and Regret Bounds</strong>
<br><em>Yahya Sattar (University of California Riverside); Zhe Du (University of Michigan); Davoud Ataee Tarzanagh (Michigan); Necmiye Ozay (University of Michigan); Laura Balzano (University of Michigan); Samet Oymak (University of California, Riverside)</em><br>
<a href="camera_ready/85.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Non-Stationary Representation Learning in Sequential Multi-Armed Bandits</strong>
<br><em>Qin Yuzhen (University of California, Riverside); Tommaso Menara (University of California Riverside); Samet Oymak (University of California, Riverside); ShiNung Ching (Washington University in St. Louis); Fabio Pasqualetti (University of California, Riverside)</em><br>
<a href="camera_ready/86.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Value-Based Deep Reinforcement Learning Requires Explicit Regularization</strong>
<br><em>Aviral Kumar (UC Berkeley); Rishabh Agarwal (Google Research, Brain Team); Aaron Courville (University of Montreal); Tengyu Ma (Stanford); George Tucker (Google Brain); Sergey Levine (UC Berkeley)</em><br>
<a href="camera_ready/87.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong>Policy Optimization in Adversarial MDPs: Improved Exploration via Dilated Bonuses</strong>
<br><em>Haipeng Luo (USC); Chen-Yu Wei (University of Southern California); Chung-Wei Lee (University of Southern California) </em><br>
<a href="camera_ready/88.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> On the Sample Complexity of Average-reward MDPs</strong>
<br><em>Yujia Jin (Stanford University); Aaron Sidford (Stanford)</em><br>
<a href="camera_ready/89.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Finite time analysis of temporal difference learning with linear function approximation: the tail averaged case</strong>
<br><em>Gandharv Patil (McGill University); Prashanth L.A. (IIT Madras); Doina Precup (McGill University)</em><br>
<a href="camera_ready/90.pdf">[Paper]</a>
</li>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Multi-Task Offline Reinforcement Learning with Conservative Data Sharing</strong>
<br><em>Tianhe Yu (Stanford University); Aviral Kumar (UC Berkeley); Yevgen Chebotar (Google); Karol Hausman (Google Brain); Sergey Levine (UC Berkeley); Chelsea Finn (Stanford)</em><br>
<a href="camera_ready/91.pdf">[Paper]</a>
</li>
<li style = "margin-top:1px;margin-bottom:1px"> <strong> Provably Efficient Multi-Task Reinforcement Learning with Model Transfer</strong>
<br><em>Chicheng Zhang (University of Arizona); Zhi Wang (University of California, San Diego) </em><br>
<a href="camera_ready/92.pdf">[Paper]</a>
<div class="row centered">
<ul>
</ul>
</div>
</div>
</section>
<!-- call for papers -->
<!-- <section id="call" class="bg-mid-gray">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Call for Papers</h2>
<div class="row text-justify">
<div class="col-md-12">
<p class="large text-muted" >
We will invite submissions on topics such as, but not limited to:
<ul style="list-style: none; float:left; width: 100%">
<li> Sample complexity of RL</li>
<li> RL with function approximation</li>
<li> Model-based RL</li>
<li> Model-free RL</li>
<li> Computation efficiency of RL</li>
<li> Exploration</li>
<li> Causality and reinforcement learning</li>
<li> Game theory in RL</li>
<li> Multi-agent reinforcement learning </li>
<li> Partially observed RL setting</li>
<li> RL under constraints</li>
</ul>
<p class="large text-muted" >
We encourage participants to submit a 4-page extended abstract using ICML submission template. Please submit a single PDF in ICML format that includes the main paper and supplementary material. Submissions must be anonymized. All submissions will be reviewed and will be evaluated on the basis of their technical content and relevance to the workshop. Accepted papers will be selected for either a short virtual poster session or a spotlight presentation. <a href="https://cmt3.research.microsoft.com/WORLT2021/">Submission Link</a>
<p class="large text-muted" >
This workshop will not have a conference proceedings, so we welcome the submission of work currently under review at other archival ML venues.
</div>
</div>
</div>
</section> -->
<!-- Dates Section -->
<section id="dates">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Important Dates</h2>
<!--h3 class="section-subheading text-muted">Lorem ipsum dolor sit amet consectetur.</h3-->
</div>
</div>
<div class="row">
<div class="col-lg-4 text-left">
</div>
<div class="col-lg-6 text-left">
<div class="col-md-12">
<p class="large text-muted">
<b>Paper Submission Deadline:</b> June 7th, 2021, 11:59 PM UTC (<a href='https://cmt3.research.microsoft.com/WORLT2021'>[CMT]</a>)
</p>
<p class="large text-muted">
<b>Author Notification:</b> July 7th, 2021
</p>
<p class="large text-muted">
<b>Final Version:</b> July 14th, 2021
</p>
<p class="large text-muted">
<b>Workshop:</b> July 24th, 4:00PM UTC - July 25, 2: 00AM UTC
</p>
</div>
</div>
</div>
</div>
</section>
<!-- Programe Committee Section -->
<section id="Program Committee" class="bg-mid-gray">
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Program Committee</h2>
<ul style="list-style: none; float:left; width: 50%">
<li>David Abel (DeepMind)</li>
<li>Sanae Amani (UCLA) </li>
<li>Zaiwei Chen (Georgia Tech) </li>
<li>Yifang Chen (University of Washington) </li>
<li> Xinyi Chen (Princeton) </li>
<li> Qiwen Cui (Peking University) </li>
<li> Yaqi Duan (Princeton) </li>
<li> Vikranth Dwaracherla (Stanford) </li>
<li> Fei Feng (UCLA) </li>
<li> Dylan Foster (MIT) </li>
<li> Botao Hao (DeepMind) </li>
<li> Ying Jin (Stanford) </li>
<li> Sajad Khodadadian (Georgia Tech) </li>
<li> Tor Lattimore (DeepMind) </li>
<li> Qinghua Liu (Princeton) </li>
<li> Thodoris Lykouris (MSR) </li>
<li> Gaurav Mahajan (UCSD) </li>
<li> Sobhan Miryoosefi (Princeton)</li>
</ul>
<ul style="list-style: none; float:right; width: 50%">
<li> Aditiya Modi (UMich) </li>
<li> Vidya Muthukumar (Georgia Tech) </li>
<li> Gergely Neu (Pompeu Fabra University) </li>
<li> Nived Rajaraman (UC Berkeley) </li>
<li> Max Simchowitz (UC Berkeley) </li>
<li> Yi Su (Cornell) </li>
<li> Jean Tarbouriech (Inria Lille) </li>
<li> Masatoshi Uehara (Cornell) </li>
<li> Ruosong Wang (CMU) </li>
<li> Jingfeng Wu (JHU) </li>
<li> Tengyang Xie (UIUC) </li>
<li> Jiaqi Yang (Tsinghua Univeristy) </li>
<li> Ming Yin (UCSB) </li>
<li> Andrea Zanette (Stanford University) </li>
<li> Zihan Zhang (Tsinghua University) </li>
<li> Kaiqing Zhang (UIUC) </li>
<li> Angela Zhou (Cornell) </li> </ul>
<!-- <div class="row text-justify"> -->
<!-- <div class="col-md-12"> -->
<!-- <p class="large text-muted" -->
<!-- </div> -->
</div>
</div>
</div>
</section>
<!-- Organization Section -->
<section id="organization" >
<div class="container">
<div class="row">
<div class="col-lg-12 text-center">
<h2 class="section-heading">Workshop Organizers </h2>
<!--h3 class="section-subheading text-muted">Lorem ipsum dolor sit amet consectetur.</h3-->
</div>
</div>