-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.html
748 lines (711 loc) · 33.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description" content="⚡️: Symbolic Regression with a Learned Concept Library">
<meta name="keywords"
content="LaSR, Symbolic Regression, PySR, Scientific Discovery, Neurosymbolic Learning, Program Synthesis">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>LaSR: Symbolic Regression with a Learned Concept Library</title>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() {
dataLayer.push(arguments);
}
gtag('js', new Date());
gtag('config', 'G-PYVRSFMDRL');
</script>
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
<link rel="stylesheet" href="./static/css/bulma.min.css">
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="./static/css/index.css">
<link rel="stylesheet" href="./static/css/scrollytelling.css">
<link rel="icon" href="https://fav.farm/⚡️">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script defer src="./static/js/fontawesome.all.min.js"></script>
<script src="./static/js/bulma-carousel.min.js"></script>
<script src="./static/js/bulma-slider.min.js"></script>
<script src="./static/js/index.js"></script>
</head>
<body>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title"><span class="lasr">LaSR</span>: Symbolic Regression
with a Learned Concept Library</h1>
<div class="is-size-5 publication-authors">
<span class="author-block">
<a href="https://www.linkedin.com/in/aryagrayeli">Arya
Grayeli</a><sup>1,4,*</sup>,</span>
<span class="author-block">
<a href="https://atharvas.net">Atharva Sehgal</a><sup>1,*</sup>,</span>
<span class="author-block">
<a href="https://omarcostilla.mit.edu">Omar Costilla Reyes</a><sup>2</sup>,
</span>
<span class="author-block">
<a href="https://astroautomata.com">Miles Cranmer</a><sup>3</sup>,
</span>
<span class="author-block">
<a href="https://www.cs.utexas.edu/~swarat">Swarat Chaudhuri</a><sup>1</sup>,
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block"><sup>1</sup>UT Austin,</span>
<span class="author-block"><sup>2</sup>MIT CSAIL</span>
<span class="author-block"><sup>3</sup>University of Cambridge</span>
<span class="author-block"><sup>4</sup>Foundry Technologies</span>
</br>
<span class="author-block"><sup>*</sup>Equal Contribution</span>
</div>
<div class="column has-text-centered">
<div class="publication-links">
<!-- PDF Link. -->
<span class="link-block">
<a href="https://arxiv.org/abs/2409.09359"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Arxiv</span>
</a>
</span>
<!-- Video Link. -->
<!-- <span class="link-block">
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-youtube"></i>
</span>
<span>Video</span>
</a>
</span> -->
<!-- Code Link. -->
<span class="link-block">
<a href="https://github.com/trishullab/LibraryAugmentedSymbolicRegression.jl"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code</span>
</a>
</span>
<span class="link-block">
<a href="static/lasr-slides.pdf"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-external-link-alt"></i>
</span>
<span>Short Slide Deck</span>
</a>
<!-- <span class="link-block">
<a href="static/presentation.pdf"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Brief Slide Deck</span>
</a>
</span> -->
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<img src="./static/images/teaser.svg" style="max-width: 100%; height: auto;" loading="eager" <h2
class="subtitle has-text-centered">
<span class="lasr">LaSR</span> leverages <em>concept guidance</em>
to accelerate symbolic regression for scientific discovery.
It iteratively refines a library of interpretable textual
concepts which are used to bias the search for hypotheses
for scientific discovery tasks. This involves three distinct
phases: (<strong>Top</strong>) finding optimal hypotheses within
a concept-directed hypothesis evolution, (<strong>Right</strong>)
leveraging the optimal hypotheses to find new concept abstractions,
and (<strong>Left</strong>) iterating on learned concepts to discover
new concepts to accelerate hypothesis evolution. LaSR introduces an
orthogonal direction of improvement over
<a href="https://arxiv.org/abs/2305.01582">current symbolic regression algorithms</a>
(in gray).
</h2>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
<span class="lasr">LaSR</span> is a novel method for
symbolic regression (SR), the task of searching for
compact programmatic hypotheses that best explain a dataset.
The problem is commonly solved using genetic algorithms;
we show that we can enhance such methods by inducing a
library of abstract textual concepts.
</p>
<p>
LaSR (pronounced 'Laser') uses zero-shot queries to a large language model (LLM)
to discover and evolve concepts occurring in known
high-performing hypotheses. We discover new hypotheses
using a mix of standard evolutionary steps and
LLM-guided steps (obtained through zero-shot LLM queries)
conditioned on discovered concepts. Once discovered, hypotheses
are used in a new round of concept abstraction and evolution.
</p>
<p>
We validate LaSR on the Feynman equations, a popular
SR benchmark, as well as a set of synthetic tasks.
On these benchmarks, LaSR substantially outperforms
a variety of state-of-the-art SR approaches based on
deep learning and evolutionary algorithms.
</p>
</div>
</div>
</div>
<!--/ Abstract. -->
<div class="container is-hidden-tablet is-max-desktop">
<!-- Abstract. -->
<h2 class="title has-text-centered is-3">⚠️Warning⚠️</h2>
<div class="content has-text-justified">
<p>
The next sections might not render correctly on mobile devices. Please view this page on a
desktop or enable "desktop mode!"
</p>
</div>
</div>
</div>
</section>
<!-- Removed the scientific discovery scrollytelling section @ static/scientific-discovery.html.txt -->
<section class="section">
<div class="container">
<div class="columns is-vcentered">
<div class="column is-max-mobile is-max-tablet is-max-desktop is-max-widescreen article">
<h3 class="title is-size-6-mobile is-size-4-tablet">Symbolic Regression</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left">
<p>
Symbolic regression is the task of finding a mathematical
expression that best fits a given dataset. The goal is to find a compact, interpretable
expression that can be used to make predictions and infer new relationships. Symbolic
regression is a key tool in scientific discovery, as it can help uncover hidden
relationships in the data and suggest new hypotheses to test.
</p>
<p>
One of the earliest known example of symbolic regression is Johannes Kepler's discovery of
the empirical laws of planetary motion. Kepler fit various geometric shapes to the ground
truth celestial data (Rudolphine Tables) and found that elliptical orbits best explained the data. Kepler's work
in transforming the empirical data into a set of mathematical relationships paved
the way for broader interpretation. For instance, Newton validates his laws of motion and
his law of universal gravitation by showing that the empirical relationships discoverd by
Kepler are a consequence of his laws. In this way, the interpretation of empirical law in
one phenomena often generalizes to and explains other phenomena that didn't have a-priori
data collection mechanisms.
</p>
</div>
</div>
<!-- Image. -->
<div class="column content">
<img src="static/scientific-discovery-frames/7.svg" loading="eager">
<!-- <video src="static/pysr.webm" autoplay loop muted playsinline></video> -->
</div>
</div>
</div>
</section>
<section class="section">
<div class="container">
<div class="columns is-vcentered">
<div class="column is-max-mobile is-max-tablet is-max-desktop is-max-widescreen article">
<h3 class="title is-size-6-mobile is-size-4-tablet">PySR: evolutionary algorithm for Symbolic
Regression</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left">
<p>
PySR is a framework for symbolic regression that implements search using multi-population
genetic programming.
Intuitively, for each population, PySR runs a tournament to select the programs with the
highest fitness score.
These programs are randomly mutated / crossed with each other to
produce new programs. The best performing new programs replaces the oldest candidate. This process is
repeated for a fixed
number
of iteration and, in implementation, can be parallelized across multiple cores. This
enables scalable exploration of a large search space, and is crucial for PySR's success.
</p>
<p>
However, PySR does not allow for the incorporation of prior knowledge or domain-specific
concepts. This can make it difficult to discover complex relationships, especially in
scientific
domains where the data is noisy and/or high-dimensional but supplemented with rich
background knowledge.
</p>
<p>
This fundamental limitation surfaces as an <em>exploration bottleneck</em>.
</p>
</div>
</div>
<!-- Image. -->
<div class="column content">
<video src="static/pysr.webm" autoplay loop muted playsinline></video>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container">
<h2 class="title is-2">PySR's Exploration bottleneck</h2>
<!-- Method. -->
<div class="columns is-centered" id="pysr">
<div class="column is-max-mobile is-max-tablet is-max-desktop is-max-widescreen article">
<h3 class="title is-size-6-mobile is-size-4-tablet">Sketch of PySR's search space</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
PySR's search strategy is essentially a form of "local search." That is, each population
starts
with a randomly sampled set of programs and iteratively optimizes them by sampling programs
that are <strong>syntactically close</strong> to the best programs in the population. This
process continues until a local optimum is reached. Intuitively, this forms an "island"
in the search space of programs.
</p>
<p>
As PySR runs multiple populations in parallel, we end up with a disjoint set of "islands" in
the search space. Now, PySR has many heuristics to deal with such "islands" -- for instance,
by migrating programs between populations. However, there is no obvious way to overcome this
fundamental exploration bottleneck.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">Our Hypothesis</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Consecutively, <strong>our main goal is to understand if we can overcome this exploration
bottleneck and increase exploration in relevant parts of the search space.</strong>
</p>
<p>
Our hypothesis is that the islands emerge because PySR (or any symbolic algorithm) can only
sample programs that are <strong>syntactically close</strong> to the best program in each
population.
But syntactic closeness does not necessarily imply semantic closeness. Can we instead sample
programs that are <strong>semantically close</strong> to the best programs in each
population?
</p>
</div>
</div>
<!-- Image. -->
<!-- Make sure image fits in the container. -->
<div class="column content">
<img src="static/pysr-frames/1.svg" id="updateableFigure" loading="eager">
</div>
</div>
</div>
</section>
<section class="section">
<div class="container">
<h2 class="title is-2">LaSR: Symbolic Regression with a Learned Concept Library</h2>
<!-- Method. -->
<div class="columns is-centered" id="lasr-learning-loop">
<div class="column is-max-mobile is-max-tablet is-max-desktop is-max-widescreen article">
<h3 class="title is-size-6-mobile is-size-4-tablet">Concept Library Desiderata: (1) Symbolic
Abstraction</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
We're going to achieve this by abstracting programs into relevant concepts. A concept is a
natural language representation of a program. In symbolic regression, our programs are
equations.
Hence, we desire two properties from a concept: (1) Symbolic Abstraction and (2) Symbolic
guidance.
</p>
<p>
<strong>Symbolic Abstraction:</strong> A concept captures an abstract property that is
common to
a set of programs. For instance, many empirical trends such as Zipf's law (Linguistics),
Moore's law (Computer Science),
and Arrhenius' equation (Chemistry) can be represented by the equation sketch:
$$y = a x^k + \epsilon$$
This trend is pervasive across many domains and is commonly referred to as a "power law."
This is just one example of how a concept can "summarize" an important property of a set of
programs.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">Concept Library Desiderata: (2) Symbolic
Guidance</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
<strong>Symbolic Guidance</strong>: A concept can also guide the search for new programs.
This arises naturally
from the abstraction property: if a concept is a good abstraction of a set of equations, we
should be able to
use it to sample new equations that are similar to the ones in the set.
</p>
<p>
This is particularly useful in scientific discovery, where scientists often have a
preconception of how certain
variables will interact with each other, but may not know the exact form of the
relationship<em>
(The example with gravitational waves is a little misleading as the theoretical
prediction prompted the
experimental discovery).
</em>
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR's Three phases</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
LaSR consists of three phases: <em>Hypothesis Evolution</em>, <em>Concept Abstraction</em>,
and <em>Concept Evolution</em>. In
<em>Hypothesis Evolution</em>, we search for programs that best fit the data and are guided
by concepts discovered in the previous iteration.
<em>Concept Abstraction</em> deals with updating the concept library with new concepts
derived from the best
performing programs. Finally, <em>Concept Evolution</em> involves updating the concept
library with new concepts that are implications of the discovered concepts.
</p>
<p>
This is a self-amplifying loop: better programs lead to better concepts, which can, in turn,
guide the search for even better programs.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Phase 1 Hypothesis Evolution</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
The hypothesis evolution searches for programs that best fit the data and are guided by
concepts discovered in the previous iteration. To do this, we will <em>extend</em> PySR's
search process to incorporate concept guidance.
The next few slides will detail how we do this.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">Phase 1: Base Algorithm</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
We build upon PySR's multi-population genetic programming framework to integrate concept
guidance.
Let's begin by understanding PySR's main loop first. Assuming \(k\) programs in a single
population, PySR starts with
a randomly initialized set of programs \( \left( \{Pi_1^1, \dots \Pi_1^k \} \right) \),
from which the best program is selected \( \left( \pi_1^\star \right) \)
by evaluating its fitness on a supervised dataset. This program undergoes either mutation (a
part of the program is resampled),
crossover (a part of the program is swapped with a part of the second best program),
simplification (the program is simplified), or
optimization (the constants in the program are optimized). The generated program replaces
the oldest program in the population, and the process
repeats for a fixed number of iterations. Each iteration takes less than a second, and many
such populations can be run in parallel.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">Phase 1: LLM Operations </h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Integrating concept guidance into PySR involves augmenting all symbolic operations that
generate new programs with
an LLM zero shot query that mimics the symbolic operation it is replacing
</p>
<p>
Instead of fully replacing the symbolic operations, we propose a hybrid approach where we
replace the symbolic operation with the LLM
zero shot query with probability \(p\) (usually 1%). This allows us to incorporate the
concept guidance into the search process without
sacrificing the 'local search' nature of PySR.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">Phase 1: Concept Library Integration</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Each LLM zero shot query is conditioned on concepts discovered in the previous iteration.
These concepts are stored in a concept library
and ensure that the LLM zero shot query is relevant to the current search space.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Phase 2 Concept Abstraction</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Next, the best performing programs are abstracted into concepts. We rely on another LLM zero
shot query
to "summarize" the programs into a natural language representation. This concept is then
stored in the concept library.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Phase 3 Concept Evolution</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Finally, the concept library is updated with new concepts that are implications of the
discovered concepts. This is done by
another LLM zero shot query that is conditioned on the discovered concepts.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Intuition (1)</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Let's take a step back and understand the intuition behind LaSR. We noted that
PySR's ends up with "islands" in the search space because it can only sample programs
that are syntactically close to the best programs in each population. As LaSR builds upon
PySR,
we will also end up with these "islands" in the search space at the end of Phase 1.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Intuition (2)</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
In Phase 2, LaSR abstracts the best performing programs into concepts. These concepts serve
as "bridges" between the "islands" in the search space.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Intuition (3)</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
We can now sample new programs conditioned on the discovered concepts. This allows us to
explore
new parts of the search space that were previously inaccessible. Since we retain the "local
search"
capabilities of PySR, intuitively, this allows us to explore more "islands" in the search
space.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Intuition (4)</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Furthermore, in Phase 3, we can sample more concepts that are derived from the discovered
concepts.
these concepts, in turn, increase the exploration of the search space in areas that were
previously
inaccessible.
</div>
</div>
<!-- Image. -->
<div class="column content">
<img src="static/lasr-frames/1.svg" id="updateableFigure" loading="eager">
</div>
</div>
</div>
</section>
<section class="section">
<div class="container">
<h2 class="title is-2">LaSR: Results</h2>
<!-- Method. -->
<div class="columns is-centered" id="lasr-results">
<div class="column is-max-mobile is-max-tablet is-max-desktop is-max-widescreen article">
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Feynman Equations</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
LaSR doesn't require human provided concepts. Hence, we can test LaSR against other
symbolic regression algorithms on the Feynman equations; a collection of equations that
describe empirical relationships from the <a
href="https://www.feynmanlectures.caltech.edu/">Feynman Lectures Series</a> .
We found that LaSR outperforms other symbolic regression algorithms on this benchmark.
</p>
<p>
LaSR's performance is ultimately bottlenecked by the quality of the language guidance. To
evaluate this,
we conducted a set of cascading experiments where we varied the quality of the language
guidance by (1) increasing
the probability of replacing symbolic operations with LLM zero shot queries and (2) changing
the backend LLM model.
We found that even a small model like <a href="https://github.com/meta-llama/llama3">Llama 3
8B</a> can provide
sufficient guidance for LaSR to outperform other symbolic regression algorithms.
</p>
<p>
More details on this set of experiments (and a set of experiments on an unseen synthetic
dataset) can be found in our paper.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Feynman Equations with Hints</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
We also conducted an enhancement study where we provided LaSR with hints for the equations
in the Feynman equations dataset. We found that LaSR with hints accelerated the search
process and found the correct equation faster than LaSR without hints, even discovering
some new equations.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Feynman Equations Qualitative Study</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
We also conducted a qualitative study on the equations discovered by LaSR. Here, we should
Equation #10 from the Feynman
equations dataset: Coulomb's law. LaSR and PySR both discover a high performing program for
this equation. Coulomb's law
is interesting because it embodies multiple concepts: (1) The force and the distance of the
charges are inversely proportional and follow a
"power law" trend, (2) In the scalar form, the force is proportional to the product of the
charges and since multiplication is commutative,
the order of the charges does not matter.
</p>
<p>
<strong>PySR's equation:</strong> PySR's equation is unwieldy and simplifies to the correct
form after about 10 manual steps of simplification. Also,
as this equation requires more constants, it is more prone to optimization errors.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Feynman Equations Qualitative Study</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
<strong>LaSR's equation:</strong> LaSR's equation is much simpler and reduces to ground
truth after four manual steps of simplification. Surprisingly,
we notice that smaller models tend to produce simpler equations.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR: Feynman Equations Qualitative Study</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
LaSR produces two artifacts: an equation of best fit and a concept library. Here, we
showcase snippets of the concept library generated for
Coulomb's law. As a consequence of LLM training, the concepts are rather verbose and small
relevant concepts are often buried in the middle of the
generations.
</p>
<p>
<strong>Limitations:</strong> As a consequence of using LLM zero shot queries, LaSR cannot
guarantee the factuality or the correctness of the concepts in the concept library.
Furthermore, concepts deemed to be "important" may be a consequence of the LLM model's
training data and may mislead scientists.
Addressing these concerns is an exciting direction for future work in LLM guided program
induction.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LLM Scaling Laws: Present methodology</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
To investigate LaSR's utility in finding novel and practical empirical trends,
we investigate whether LaSR can discover novel LLM scaling laws on the BigBench dataset.
</p>
<p>
Traditionally, to identify an LLM scaling law, practitioners must first manually posit a "skeleton
equation" with a fixed set of known variables and unknown free parameters, and then optimize
the unknown parameters based on a dataset of model hyperparameters and resulting dataset fitness.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LLM Scaling Laws: Methodology with LaSR</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
Instead of starting with a predefined equation, we use LaSR to discover the skeleton
equation that best fits various subsets of the BigBench dataset.
</p>
<p>
Removing the need to manually posit a skeleton equation simplifies the methodology for finding scaling laws in many ways. (1) It removes human preconceptions about the expected relationships between hyperparameters. (2) It increases the number of variables and the type of variables human practitioners can jointly reason about. (3) It enables positing equations of much higher complexity and variable interdependence than otherwise possible.
</p>
</div>
<h3 class="title is-size-6-mobile is-size-4-tablet">LaSR's LLM Scaling Law</h3>
<div class="content is-size-7-mobile is-size-6-tablet has-text-left step">
<p>
LaSR discovers the following scaling law on the subset of BigBench:
$$\texttt{score} = \frac{A}{\left( \frac{\texttt{train_steps}}{B}\right)^\texttt{#shots}} + E$$
More details are in the paper but, in essence, this scaling law suggests that increasing the number of shots exponentially increases the model's performance for instances with less training data, while having diminishing gains as the number of training steps of the model increase.
</p>
<p>
As the output artifacts of LaSR are interpretable, we can even augment existing scaling laws with the discovered empirical! More details/limitations are in the paper!
</p>
</div>
</div>
<!-- Image. -->
<div class="column content">
<img src="static/results-frames/1.svg" id="updateableFigure" loading="eager">
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column is-full-width">
<h2 class="title is-3">Related Links</h2>
<div class="content has-text-left">
<p>
This project would not be possible without the excellent work of the community. These are
some relevant papers to better understand the
premise of our work:
</p>
<ul>
<li><a href="https://www.nature.com/articles/s41586-023-06924-6">FunSearch: Making new
discoveries in mathematical sciences using Large Language Models</a> </li>
<li><a href="https://arxiv.org/abs/2305.01582">Interpretable Machine Learning for Science
with PySR and SymbolicRegression.jl</a> </li>
<li><a href="https://arxiv.org/abs/2310.19791">LILO: Learning Interpretable Libraries by
Compressing and Documenting Code</a> </li>
<li><a href="https://arxiv.org/abs/1911.12247 ">LLM-SR: Scientific Equation Discovery via
Programming with Large Language Models</a> </li>
<li><a href="https://arxiv.org/abs/2210.05050 ">Neurosymbolic Programming for Science</a>
</li>
</ul>
</div>
</div>
</div>
</div>
</section>
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<p>
If you found this post interesting, please read <a href="https://arxiv.org/abs/2409.09359">our
paper</a> for mathematical details and
experimental results. You can cite our paper as follows:
</p>
<pre><code>@misc{grayeli2024symbolicregressionlearnedconcept,
title={Symbolic Regression with a Learned Concept Library},
author={Arya Grayeli and Atharva Sehgal and Omar Costilla-Reyes and Miles Cranmer and Swarat Chaudhuri},
year={2024},
eprint={2409.09359},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2409.09359},
}</code></pre>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content has-text-centered">
<a class="icon-link" href="https://arxiv.org/abs/2409.09359">
<i class="fas fa-file-pdf"></i>
</a>
<a class="icon-link" href="https://github.com/trishullab/LibraryAugmentedSymbolicRegression.jl/tree/lasr-experiments" class="external-link" disabled>
<i class="fab fa-github"></i>
</a>
</div>
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
This template is based on the <a href="https://nerfies.github.io/">Nerfiles</a> project
page.
The source code is available <a href="https://github.com/nerfies/nerfies.github.io">here</a>
and is
licensed under a <a rel="license"
href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
Commons Attribution-ShareAlike 4.0 International License</a>. I also make heavy use of
the
<a href="https://github.com/russellsamora/scrollama">Scrollama.js</a> package. Please
remember
to cite either the <a href="https://nerfies.github.io/">Nerfiles</a> website or
<a href="https://github.com/trishullab/lasr-web">this website</a> if you use this
template!
</p>
</div>
</div>
</div>
</div>
</footer>
<script src="https://unpkg.com/d3@5.9.1/dist/d3.min.js"></script>
<!-- Scrolly Storytelling -->
<!-- <script src="https://unpkg.com/scrollama"></script> -->
<script src="./static/scrollama.js"></script>
<script src="./static/js/scrollytelling.js"></script>
<script>
// Init scrollable sections.
mobileCorrections();
// init("#scientific-discovery");
init("#pysr");
init("#lasr-learning-loop");
init("#lasr-results");
</script>
</body>
</html>