-
Notifications
You must be signed in to change notification settings - Fork 1
/
proofs.lean
2547 lines (2211 loc) · 94.5 KB
/
proofs.lean
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---------------------------------------------------------------------
-- AAAI 2024 Submission 12181
-- What do Hebbian Learners Learn?
-- Reduction Axioms for Unstable Hebbian Learning
--
-- This file contains the Lean code for our submission. As explained
-- in the technical appendix, the main point of using the proof
-- assistant Lean was to get the proofs of
-- - theorem hebb_extens
-- - theorem hebb_updated_path
-- - theorem reach_of_hebb_prop
-- - theorem hebb_reduction
-- right. Our vision was to have all of the supporting lemmas
-- verified as well, but time ran out. (We plan on fully verifying
-- these claims in Lean by the AAAI-24 author feedback window.)
--
-- This file is searchable. If you want to find the Lean definition
-- and theorem corresponding to the one in the paper, just press
-- Ctrl+F and type, e.g.,
-- - "Definition 4"
-- - "Lemma 2"
-- - "Theorem 3"
-- and so on. Note that not everything was defined/included in
-- Lean (e.g., the semantics and completeness). Conversely, not
-- everying in this file was included in the paper --- Lean often
-- needs very basic things spelled out for it that are fairly obvious
-- to a theoretical AAAI audience (e.g., Hebbian update preserves
-- predecessors)
---------------------------------------------------------------------
import Mathlib.Mathport.Syntax
open Set
open Classical
-------------------------------------------------
-- Weighted Directed Graphs
--
-- These definitions are so basic, they're not
-- spelled out in the paper. Skip to [Definition 1]
-- to see our definition of a feed-forward net.
--
-- NOTE: For hygiene, we expect that the vertices
-- are natural numbers given in order, i.e. 0, 1, 2, ...
-- In principle, you can pick any other labels for your
-- vertices, but we will rely on the fact that they come
-- in this order.
--
-- WARNING: We assume that graphs are acyclic and
-- fully connected. Nothing in this file direclty
-- enforces 'acyclic' except for hygiene (just please
-- don't use cycles in graphs!)
--
-- As for 'fully connected', this condition is captured
-- by the axiom 'connected' (which we state after we
-- define the layers of the graph.)
--
-- These graphs are based on Peter Kementzey's
-- implementation, but modified slightly. See
-- https://lean-forward.github.io/pubs/kementzey_bsc_thesis.pdf
-------------------------------------------------
-- α is the type of the nodes
-- β is the type of the weights
structure Vertex (α β : Type) where
label : α
successors : List (ℕ × β)
deriving Repr
instance [Inhabited α] : Inhabited (Vertex α β) :=
⟨ { label := default, successors := default } ⟩
structure Graph (α : Type) (β : Type) where
vertices : List (Vertex α β) := []
deriving Repr
-- Notice that this graph is acyclic, since each predecessor
-- list only refers to nodes above the current node.
def graphA : Graph ℕ ℚ :=
⟨[⟨0, [⟨1, 0.5⟩, ⟨2, 0.6⟩, ⟨3, 0.7⟩]⟩,
⟨1, [⟨2, 0.8⟩, ⟨3, 0.9⟩]⟩,
⟨2, [⟨3, 1⟩, ⟨3, 3.0⟩]⟩,
⟨3, []⟩]⟩
#check graphA
#eval graphA
namespace Graph
def get_vertices (g : Graph ℕ ℚ) : List ℕ :=
List.map (fun ⟨label, _⟩ => label) g.vertices
-- Helper function to collect the List of pairs of n's successors
def successor_pairs (vertices : List (Vertex ℕ ℚ)) (n : ℕ) : List (ℕ × ℚ) :=
match vertices with
| [] => []
| ⟨vertex, succ⟩ :: rest =>
if vertex = n then succ
else successor_pairs rest n
-- We get all vertices that are in n's successor list
def successors (g : Graph ℕ ℚ) (n : ℕ) : List ℕ :=
List.filter
(fun m => m ∈ (successor_pairs g.vertices n).unzip.1)
g.get_vertices
def predecessors (g : Graph ℕ ℚ) (n : ℕ) : List ℕ :=
List.filter (fun v => n ∈ (g.successors v)) g.get_vertices
-- Using 'predecessors' is slower than 'successors',
-- but we will tend to look backwards from a node rather
-- than forwards.
def hasEdge (g : Graph ℕ ℚ) (m n : ℕ) : Bool :=
m ∈ (g.predecessors n)
-- Returns the weight of the edge m ⟶ n, if it exists.
-- If it does not exist, we say the weight is 0
-- TODO: In the future, it's better to use Option here
-- and return none if none!!!
def getEdgeWeight (g : Graph ℕ ℚ) (m n : ℕ) : ℚ :=
match g.vertices[m]!.successors.find? (fun ⟨label, _⟩ => label = n) with
| some ⟨_, weight⟩ => weight
| none => 0
-- Some sanity checks
#eval get_vertices graphA
#eval successors graphA 0
#eval successors graphA 1
#eval successors graphA 2
#eval successors graphA 3
#eval predecessors graphA 0
#eval predecessors graphA 1
#eval predecessors graphA 2
#eval predecessors graphA 3
#eval hasEdge graphA 1 2
#eval hasEdge graphA 1 3
#eval hasEdge graphA 4 2
#eval getEdgeWeight graphA 1 2
#eval getEdgeWeight graphA 1 3
#eval getEdgeWeight graphA 4 2
inductive hasPath (g : Graph ℕ ℚ) : ℕ → ℕ → Prop where
| trivial {u : ℕ} :
hasPath g u u
| from_path {u v w : ℕ} :
hasPath g u v → hasEdge g v w → hasPath g u w
--------------------------------------------------------------------
theorem hasPath_trans {u v w : ℕ} (g : Graph ℕ ℚ) :
hasPath g u v → hasPath g v w → hasPath g u w := by
--------------------------------------------------------------------
intro (h₁ : hasPath g u v)
intro (h₂ : hasPath g v w)
induction h₂
case trivial => exact h₁
case from_path x y _ edge_xy path_ux =>
exact hasPath.from_path path_ux edge_xy
def is_refl (g : Graph ℕ ℚ) : Prop := ∀ (u : ℕ),
u ∈ g.get_vertices → g.hasEdge u u
def is_symm (g : Graph ℕ ℚ) : Prop := ∀ (u v : ℕ),
g.hasEdge u v → g.hasEdge v u
def is_trans (g : Graph ℕ ℚ) : Prop := ∀ (u v w : ℕ),
g.hasEdge u v → g.hasEdge v w → g.hasEdge u w
def is_acyclic (g : Graph ℕ ℚ) : Prop := ∀ (u v : ℕ),
g.hasPath u v → g.hasPath v u → u = v
end Graph
/-══════════════════════════════════════════════════════════════════
Feed-forward Neural Nets
══════════════════════════════════════════════════════════════════-/
-- [Definition 1] in our paper.
structure Net where
graph : Graph ℕ ℚ
activation : ℚ → ℚ
rate : ℚ -- learning rate
-- The restrictions on our nets. Note that 'acyclic' is
-- not explitly included here, but depends on the user
-- of our code not using graphs with cycles. The
-- fully-connected assumption is captured by
-- - axiom connected
-- stated later.
structure BFNN extends Net where
-- The activation function is binary
binary : ∀ (x : ℚ),
(activation x = 0) ∨ (activation x = 1)
-- The activation function is nondecreasing
activ_nondecr : ∀ (x₁ x₂ : ℚ),
x₁ ≤ x₂ → activation x₁ ≤ activation x₂
-- There is *some* input for which the activation is 1
active_input : ℚ
activ_pos : activation (is_active) = 1
-- [Not included in paper]
-- Because our activation function is bounded above by 1,
-- if act(x₁) = 1 then any act(x₂) greater than act(x₁) also = 1
--------------------------------------------------------------------
lemma activation_from_inequality (net : BFNN) (x₁ x₂ : ℚ) :
net.activation x₁ ≤ net.activation x₂
→ net.activation x₁ = 1 → net.activation x₂ = 1 := by
--------------------------------------------------------------------
intro h₁ h₂
cases net.binary x₂
case inr actx₂_is_one => exact actx₂_is_one
case inl actx₂_is_zero =>
-- This case is impossible! 1 is not ≤ 0!
rw [h₂] at h₁
rw [actx₂_is_zero] at h₁
exact absurd h₁ (by native_decide)
variable (net : BFNN)
/-══════════════════════════════════════════════════════════════════
Forward propagation in a net
══════════════════════════════════════════════════════════════════-/
-- I would like to write
-- List.sum [w * x | for w in weights, for x in lst],
-- but list comprehensions are harder to reason about.
def weighted_sum (weights : List ℚ) (lst : List ℚ) : ℚ :=
List.foldr (· + ·) 0 (List.zipWith (· * ·) weights lst)
-- Sanity checks
#eval weighted_sum [] []
#eval weighted_sum [1] [3.0]
#eval weighted_sum [1, 2.0, 3.0] [5.0, 5.0, 5.0]
-- Not well-defined behavior (we expect the weights and lst to be of equal size,
-- but this is left implicit.)
#eval weighted_sum [1, 2.0] [3.0]
-- [Not included in paper]
-- This is a technical lemma that need in order to prove
-- [GOTO]
--------------------------------------------------------------------
lemma weighted_sum_eq (fw₁ fw₂ fx₁ fx₂ : ℕ → ℚ) (ls : List ℕ) :
let x₁ : List ℚ := List.map (fun i => fx₁ i) (List.range ls.length)
let x₂ : List ℚ := List.map (fun i => fx₂ i) (List.range ls.length)
let w₁ : List ℚ := List.map (fun i => fw₁ i) (List.range ls.length)
let w₂ : List ℚ := List.map (fun i => fw₂ i) (List.range ls.length)
(∀ i, (fw₁ i) * (fx₁ i) = (fw₂ i) * (fx₂ i))
→ weighted_sum w₁ x₁ = weighted_sum w₂ x₂ := by
--------------------------------------------------------------------
-- Simplify the weighted sum down to
-- fw₁ i * fx₁ j = fw₂ i * fx₂ j
intro x₁ x₂ w₁ w₂ h₁
simp only [weighted_sum]
rw [List.zipWith_map_left]
rw [List.zipWith_map_left]
rw [List.zipWith_map_right]
rw [List.zipWith_map_right]
simp
congr 2
-- Now we just need to show
-- fw₁ i * fx₁ i = fw₂ i * fx₂ i,
-- but this was exactly our assumption.
exact funext fun i => h₁ i
-- [Not included in paper]
-- This is a technical lemma that we need in order to prove
-- [GOTO]
--------------------------------------------------------------------
lemma weighted_sum_le (fw₁ fw₂ fx₁ fx₂ : ℕ → ℚ) (ls : List ℕ) :
let x₁ : List ℚ := List.map (fun i => fx₁ i) (List.range ls.length)
let x₂ : List ℚ := List.map (fun i => fx₂ i) (List.range ls.length)
let w₁ : List ℚ := List.map (fun i => fw₁ i) (List.range ls.length)
let w₂ : List ℚ := List.map (fun i => fw₂ i) (List.range ls.length)
(∀ i, (fw₁ i) * (fx₁ i) ≤ (fw₂ i) * (fx₂ i))
→ weighted_sum w₁ x₁ ≤ weighted_sum w₂ x₂ := by
--------------------------------------------------------------------
intro x₁ x₂ w₁ w₂ h₁
simp only [weighted_sum]
rw [List.zipWith_map_left]
rw [List.zipWith_map_left]
rw [List.zipWith_map_right]
rw [List.zipWith_map_right]
simp
rw [List.foldr_map]
rw [List.foldr_map]
-- By induction on the length of the list we're foldr-ing
induction List.range (List.length ls)
case nil => simp only [List.foldr]
case cons i is IH =>
simp only [List.foldr]
exact add_le_add (h₁ i) IH
-- [Not included in paper]
-- This just says that for any list, the ith element of
-- that list is in fact a member of that list.
--
-- WARNING: This is a hack! For in infinite sets, l[i] is
-- not provably in l. Our sets are finite, so we don't
-- need to worry about this (we take this as an axiom).
-- The fix for this is to replace all uses of 'Set' with
-- 'FinSet' (finite sets), and all get!_mem with get_mem
-- (that takes an explicit upper bound).
axiom get!_mem {α : Type} [Inhabited α] :
∀ (l : List α) i, (l.get! i) ∈ l
@[simp]
def preds (net : BFNN) (n : ℕ): List ℕ :=
net.graph.predecessors n
-- [Not included in paper]
-- m is a predecessor of n iff (m, n) is an edge in the graph.
-- (this is trivial to see, but two steps for Lean)
--------------------------------------------------------------------
theorem edge_from_preds (net : BFNN) (m n : ℕ) :
m ∈ preds net n ↔ net.graph.hasEdge m n := by
--------------------------------------------------------------------
simp only [preds, Graph.hasEdge]
rw [Bool.decide_iff]
-- [Definition 2] in our paper, although we define
-- it in a way that makes it easier to do induction on.
--
-- Accumulator-style helper function for 'layer'
-- Defined recursively on the *reverse* of the vertex list
-- (this means we are looking at vertices backwards -- each
-- vertex can only "see" the vertices preceding it.)
def layer_acc (net : BFNN) (n : ℕ) (ls : List ℕ) : ℕ :=
match ls with
| [] => 0
| v :: rest =>
if v = n then
let layers := List.map (fun x => layer_acc net x rest) (preds net n)
let max := layers.maximum
match max with
| some L => L + 1
| none => 0
else layer_acc net n rest
-- [Definition 2] in our paper.
-- The layer of n in the net
def layer (net : BFNN) (n : ℕ) : ℕ :=
layer_acc net n (net.graph.get_vertices.reverse)
-- [Not included in paper]
-- The layer relation layer[m] ≤ layer[n] is well-founded
-- (i.e. it has a least element). This is obvious, since
-- we only have finitely many nodes in the net. But
-- since we're using Set instead of Finset, it's hard
-- to convince Lean.
--------------------------------------------------------------------
lemma layer_wellfounded (net : BFNN) :
WellFounded (fun x y => layer net x ≤ layer net y) := by
--------------------------------------------------------------------
exact WellFounded.wellFounded_iff_has_min.mpr
(fun S hS => sorry)
-- This is our assumption that the net is fully connected!
-- Note that we're actually using the form from [Proposition 1]
-- in the paper --- but this follows from our definition of
-- fully connected in the paper.
axiom connected : ∀ (net : BFNN) (m n : ℕ),
layer net m < layer net n → net.graph.hasEdge m n
-- The other direction of [Proposition 1] in our paper.
--------------------------------------------------------------------
lemma preds_decreasing (net : BFNN) (m n : ℕ) :
m ∈ preds net n
→ layer net m < layer net n := by
--------------------------------------------------------------------
intro h₁
simp only [layer]
generalize hls : (List.reverse (Graph.get_vertices net.toNet.graph)) = ls
induction ls
case nil =>
-- This case is impossible;
-- m ∈ preds(n) means that there is *something* in the graph.
-- This contradicts the fact that the graph is empty!
simp [preds, Graph.predecessors, Graph.get_vertices] at h₁
simp [Graph.get_vertices] at hls
rw [hls] at h₁
rw [List.map_nil] at h₁
rw [List.filter_nil] at h₁
exact False.elim ((List.mem_nil_iff _).mp h₁)
case cons v rest IH =>
sorry
-- Essentially, A(∑ wᵢ xᵢ) = 1.
-- (the inner part of [Definition 4] in our paper.)
-- Note that we do not need to define [Definition 3] in our
-- Lean code.
noncomputable
def activ (net : BFNN) (prev_activ : List ℚ) (n : ℕ) : Prop :=
let preds := preds net n
let weights := List.map (fun i =>
let m := preds.get! i
net.graph.getEdgeWeight m n)
(List.range preds.length)
let weight_sum := weighted_sum weights prev_activ
let curr_activ := net.activation weight_sum
curr_activ = 1
-- [Definition 4] in our paper.
-- The forward propagation operator.
-- By recursion on the layers of our feedforward network.
-- (_acc indicates that we are using the layer as an accumulator
-- to do recursion.)
--
-- n ∈ propagate_acc(S) iff either:
-- Base Case (L=0): n ∈ S
-- Rcrs Case (L≥0): n ∈ S, or the nodes m preceding n activate n.
-- (We check their activation values via propagate_acc on m)
--
-- NOTE: Although 'do' notation might be more readable here,
-- I avoid it because it's hard to reason about.
@[simp]
def propagate_acc (net : BFNN) (S : Set ℕ) (n : ℕ) (L : ℕ) : Prop :=
match L with
| Nat.zero => n ∈ S
| Nat.succ _ =>
let preds := preds net n
let prev_activ := List.map (fun i =>
let m := preds.get! i
if propagate_acc net S m (layer net m) then 1 else 0)
(List.range preds.length)
n ∈ S ∨ activ net prev_activ n
termination_by propagate_acc net S n L => layer net n
decreasing_by exact preds_decreasing net m n (get!_mem preds i)
-- [Definition 4] in our paper.
-- Set variation of propagate
@[simp]
def propagate (net : BFNN) (S : Set ℕ) : Set ℕ :=
fun n => propagate_acc net S n (layer net n)
-------------------------------------------------
-- Some helper simplification lemmas
-- (just to clean up the monstrous proofs ahead!)
-------------------------------------------------
-- [Not included in paper]
--------------------------------------------------------------------
lemma simp_propagate_acc (net : BFNN) :
n ∉ S
→ propagate_acc net S n (Nat.succ L) =
let preds := preds net n
let prev_activ := List.map (fun i =>
let m := preds.get! i
if propagate_acc net S m (layer net m) then 1 else 0)
(List.range preds.length)
activ net prev_activ n := by
--------------------------------------------------------------------
intro (h₁ : n ∉ S)
apply Iff.to_eq
apply Iff.intro
case mp =>
intro h₂
simp only [propagate_acc]
simp only [propagate_acc] at h₂
cases h₂
case inl h₃ => contradiction
case inr h₃ => exact h₃
case mpr =>
intro h₂
simp only [propagate_acc]
simp only [propagate_acc] at h₂
exact Or.inr h₂
-- [Not included in paper]
-- If A and B agree on all the predecessors of n, then they agree on n.
-- This lemma is extremely inefficient to use. In order to make it
-- remotely usable, we've simplified everything down to unreadable
-- garbage.
--------------------------------------------------------------------
lemma activ_agree (net : BFNN) (A B : Set ℕ) (n : ℕ) :
(∀ (m : ℕ), m ∈ preds net n → (m ∈ A ↔ m ∈ B))
→ activ net (List.map (fun i =>
let m := (preds net n).get! i
if A m then 1 else 0)
(List.range (preds net n).length)) n
→ activ net (List.map (fun i =>
let m := (preds net n).get! i
if B m then 1 else 0)
(List.range (preds net n).length)) n := by
--------------------------------------------------------------------
intro h₁ h₂
simp only [activ]
simp only [activ] at h₂
convert ← h₂ using 6
rename_i i
let m := (preds net n).get! i
have h₃ : m ∈ (preds net n) := get!_mem (preds net n) i
exact h₁ m h₃
/-══════════════════════════════════════════════════════════════════
Properties of Propagation
══════════════════════════════════════════════════════════════════-/
-- [Not included in paper, since it's essentially just the
-- base case of 'propagate']
--------------------------------------------------------------------
lemma prop_layer_zero (net : BFNN) : ∀ (S : Set ℕ) (n : ℕ),
layer net n = 0
→ n ∈ propagate net S
→ n ∈ S := by
--------------------------------------------------------------------
intro (S : Set ℕ) (n : ℕ)
(hL : layer net n = 0)
(h₁ : n ∈ propagate net S)
simp only [propagate, Membership.mem, Set.Mem] at h₁
rw [hL] at h₁
simp only [propagate_acc] at h₁
exact h₁
-- [Proposition 2, part 1] in our paper.
--------------------------------------------------------------------
theorem propagate_is_extens :
∀ (S : Set ℕ),
S ⊆ propagate net S := by
--------------------------------------------------------------------
intro (S : Set ℕ)
(n : ℕ) (h₁ : n ∈ S)
simp only [Membership.mem, Set.Mem, propagate]
-- By induction on the layer of the net containing n
generalize hL : layer net n = L
induction L
-- Base Step
case zero =>
simp only [propagate_acc]
exact h₁
-- Inductive Step
case succ k _ =>
simp only [propagate_acc]
exact Or.inl h₁
-- [Proposition 2, part 1] in our paper.
-- Corollary: The same statement, but for propagate_acc
-- (this is a helper lemma for the properties that follow.)
-------------------------------------------------
lemma propagate_acc_is_extens :
∀ (S : Set ℕ),
n ∈ S → propagate_acc net S n (layer net n) := by
-------------------------------------------------
intro (S : Set ℕ)
intro (h₁ : n ∈ S)
have h₂ : n ∈ propagate net S := propagate_is_extens _ _ h₁
simp only [Membership.mem, Set.Mem, propagate] at h₂
exact h₂
-- [Proposition 2, part 2] in our paper.
--------------------------------------------------------------------
theorem propagate_is_idempotent :
∀ (S : Set ℕ),
propagate net S =
propagate net (propagate net S) := by
--------------------------------------------------------------------
intro (S : Set ℕ)
apply ext
intro (n : ℕ)
simp only [Membership.mem, Set.Mem, propagate]
-- By strong induction on the layer of the net containing n
generalize hL : layer net n = L
induction L using Nat.case_strong_induction_on generalizing n
-- Base Step
case hz =>
simp only [Membership.mem, Set.Mem, propagate_acc]
conv in (layer net n) => rw [hL]
simp only [propagate_acc]
exact ⟨fun x => x, fun x => x⟩
-- Inductive Step
case hi L IH =>
apply Iff.intro
-- Forward direction is easy, just apply extensive
case mp =>
intro h₁
rw [symm hL]
rw [symm hL] at h₁
exact @propagate_acc_is_extens net _ _ h₁
-- Backwards direction is trickier
case mpr =>
intro h₁
-- By cases; either n ∈ S or n ∉ S
-- Again; either n ∈ propagate S or n ∉ propagate S
by_cases n ∈ S
case pos =>
rw [symm hL]
exact @propagate_acc_is_extens net _ _ h
case neg =>
by_cases propagate_acc net S n (layer net n)
case pos =>
rw [symm hL]
exact h
case neg =>
-- Just some simplifications and rewriting definitions
rename_i n_not_in_S
rw [simp_propagate_acc net n_not_in_S]
have h₂ : ¬n ∈ propagate net S := h
simp [propagate] at h₂
rw [simp_propagate_acc net h₂] at h₁
-- We abbreviate the two 'simp'ed sets for convenience.
let S₁ := fun m => propagate_acc net (fun n =>
propagate_acc net S n (layer net n)) m (layer net m)
let S₂ := fun m => propagate_acc net S m (layer net m)
-- Apply IH to the predecessors
have h₂ : ∀ (m : ℕ), m ∈ preds net n → (S₁ m ↔ S₂ m) := by
simp only [Membership.mem] -- really just want to unfold the let
intro m hm
generalize hLm : layer net m = Lm
have h₃ : Lm ≤ L := by
rw [symm hLm]
apply Nat.lt_succ.mp
rw [symm hL]
exact preds_decreasing net m n hm
exact (symm (IH Lm h₃ m hLm).to_eq).to_iff
-- Apply the activ_agree lemma
simp
simp at h₁
exact activ_agree _ S₁ S₂ _ h₂ h₁
-- [Proposition 2, part 3] in our paper.
-- This is essentially Hannes' proof.
--------------------------------------------------------------------
theorem propagate_is_cumulative :
∀ (A B : Set ℕ), A ⊆ B
→ B ⊆ propagate net A
→ propagate net A = propagate net B := by
--------------------------------------------------------------------
intro (A : Set ℕ) (B : Set ℕ)
(h₁ : A ⊆ B)
(h₂ : B ⊆ propagate net A)
apply ext
intro (n : ℕ)
simp only [Membership.mem, Set.Mem, propagate]
simp only [Membership.mem, Set.Mem, propagate] at h₂
-- By induction on the layer of the net containing n
generalize hL : layer net n = L
induction L using Nat.case_strong_induction_on generalizing n
-- Base Step
case hz =>
simp only [propagate_acc]
apply Iff.intro
case mp => exact fun h₃ => h₁ h₃
case mpr =>
intro h₃
have h₄ : propagate_acc net A n (layer net n) := h₂ h₃
conv at h₄ in (layer net n) => rw [hL]
simp only [propagate_acc] at h₄
exact h₄
-- Inductive Step
case hi k IH =>
apply Iff.intro
-- Forward Direction
case mp =>
intro h₃
-- By cases; either n ∈ B or n ∉ B.
-- Similarly, either n ∈ A or n ∉ A.
by_cases n ∈ B
case pos =>
rw [symm hL]
exact @propagate_acc_is_extens net _ _ h -- TODO: replace acc variation
case neg =>
by_cases n ∈ A
case pos =>
rename_i n_not_in_B
exact absurd (h₁ h) n_not_in_B
case neg =>
-- Just some simplifications and rewriting definitions
rename_i n_not_in_B
rw [simp_propagate_acc net h] at h₃
rw [simp_propagate_acc net n_not_in_B]
-- Just try to prove it and turn it into an 'activ_agree'
-- lemma later!
-- Go into 'prev_activ' and substitute using our IH.
-- Then try to prove what's left.
simp
simp at h₃
convert h₃ using 4
rename_i i
generalize hm : List.get! (net.graph.predecessors n) i = m
generalize hLm : layer net m = Lm
-- Apply the inductive hypothesis!
have h₃ : m ∈ preds net n := by
rw [symm hm]
simp [preds]
exact get!_mem (net.graph.predecessors n) i
have h₄ : Lm ≤ k := by
rw [symm hLm]
apply Nat.lt_succ.mp
rw [symm hL]
exact preds_decreasing net m n h₃
exact (symm (IH Lm h₄ m hLm).to_eq).to_iff
-- Backwards Direction (should be very similar)
case mpr =>
intro h₃
-- By cases; either n ∈ A or n ∉ A
by_cases n ∈ A
case pos =>
rw [symm hL]
exact @propagate_acc_is_extens net _ _ h -- TODO: replace acc variation
case neg =>
by_cases n ∈ B
case pos =>
rename_i n_not_in_A
rw [symm hL]
exact h₂ h
case neg =>
-- Just some simplifications and rewriting definitions
rename_i n_not_in_A
rw [simp_propagate_acc net h] at h₃
rw [simp_propagate_acc net n_not_in_A]
-- Just try to prove it and turn it into an 'activ_agree'
-- lemma later!
-- Go into 'prev_activ' and substitute using our IH.
-- Then try to prove what's left.
simp
simp at h₃
convert h₃ using 4
rename_i i
generalize hm : List.get! (net.graph.predecessors n) i = m
generalize hLm : layer net m = Lm
-- Apply the inductive hypothesis!
have h₃ : m ∈ preds net n := by
rw [symm hm]
simp [preds]
exact get!_mem (net.graph.predecessors n) i
have h₄ : Lm ≤ k := by
rw [symm hLm]
apply Nat.lt_succ.mp
rw [symm hL]
exact preds_decreasing net m n h₃
exact IH Lm h₄ m hLm
/-══════════════════════════════════════════════════════════════════
Conditional Graph Reachability
══════════════════════════════════════════════════════════════════-/
-- reachable(A, B) is the set of all nodes reachable from B
-- using a path where all nodes are inside A (i.e. there is a
-- focusedPath from B to n).
--
-- I don't know what the complete axioms are for this conditional
-- knowledge. But this isn't the main focus here. I'll just
-- prove a few sound things to give an idea about what it's like.
-- [Not included in paper]
-- A focused path is a path where every node is contained
-- within the set S.
inductive focusedPath (g : Graph ℕ ℚ) (S : Set ℕ) : ℕ → ℕ → Prop where
| trivial {u : ℕ} :
u ∈ S → focusedPath g S u u
| from_path {u v w : ℕ} :
focusedPath g S u v → g.hasEdge v w → w ∈ S → focusedPath g S u w
-- [Not included in paper]
-- focusedPath is transitive
--------------------------------------------------------------------
theorem focusedPath_trans {u v w : ℕ} (g : Graph ℕ ℚ) (A : Set ℕ) :
focusedPath g A u v → focusedPath g A v w → focusedPath g A u w := by
--------------------------------------------------------------------
intro (h₁ : focusedPath g A u v)
intro (h₂ : focusedPath g A v w)
induction h₂
case trivial _ => exact h₁
case from_path x y _ edge_xy hy path_ux =>
exact focusedPath.from_path path_ux edge_xy hy
-- [Not included in paper]
-- focusedPath is contained in A
--------------------------------------------------------------------
theorem focusedPath_subset {u v : ℕ} (g : Graph ℕ ℚ) (A : Set ℕ) :
focusedPath g A u v → u ∈ A := by
--------------------------------------------------------------------
intro h₁
induction h₁
case trivial hA => exact hA
case from_path _ _ _ _ _ hA => exact hA
-- [Definition 5] in our paper
-- This is the set of all nodes reachable from B using
-- paths where *every* node in the path is within A
-- (including the initial and final nodes)
def reachable (net : BFNN) (A B : Set ℕ) : Set ℕ :=
fun (n : ℕ) =>
∃ (m : ℕ), m ∈ B ∧ focusedPath net.graph A m n
-- [Proposition 3, part 1] in our paper
-- Argument: If there is a path from B to n, but n is in
-- layer 0 -- there are *no* incoming nodes -- then the path
-- must be of length 0. So n must be that n ∈ B with
-- a path to n, i.e. n ∈ B.
--------------------------------------------------------------------
lemma reach_layer_zero (net : BFNN) : ∀ (A B : Set ℕ) (n : ℕ),
layer net n = 0
→ n ∈ reachable net A B
→ n ∈ B := by
--------------------------------------------------------------------
intro (A : Set ℕ)
(B : Set ℕ)
(n : ℕ) (hL : layer net n = 0)
(h₁ : n ∈ reachable net A B)
match h₁ with
| ⟨m, h₂⟩ =>
-- By induction on the length of the path from B to n.
-- path length = 0 => m ∈ B means n ∈ B
-- path length ≥ 0 => this case should be impossible,
-- because at layer 0 n has *no predecessors*!
induction h₂.2
case trivial _ => exact h₂.1
case from_path x y _ edge_xy _ _ =>
-- Contradiction; y's layer is 0, but there is an edge from x to y!
-- (layer net x < layer net y, but that means layer net x < 0)
have h₃ : layer net x < layer net y :=
preds_decreasing net x y ((edge_from_preds _ _ _).mpr edge_xy)
exact absurd hL (Nat.not_eq_zero_of_lt h₃)
-- [Proposition 3, part 2] in our paper
-- If A ∩ B is empty, then there are no nodes reachable
-- from B within A.
-- (This does *not* follow from [reach_is_extens]!)
--------------------------------------------------------------------
theorem reach_empty_of_inter_empty (net : BFNN) : ∀ (A B : Set ℕ),
(A ∩ B) = ∅ → reachable net A B = ∅ := by
--------------------------------------------------------------------
intro A B
rw [← Set.not_nonempty_iff_eq_empty]
rw [← Set.not_nonempty_iff_eq_empty]
-- Start a proof by contraposition, and simplify
contrapose
intro h₁
rw [Classical.not_not]
rw [Classical.not_not] at h₁
-- Since Reach(A, B) is nonempty, we have n ∈ Reach(A, B).
-- We argue that the m ∈ B that reaches n must be m ∈ A ∩ B.
match h₁ with
| ⟨n, hn⟩ =>
match hn with
| ⟨m, hm⟩ =>
-- m ∈ B is easy; we show inductively that m ∈ A as well.
induction hm.2
case trivial hmA => exact ⟨m, ⟨hmA, hm.1⟩⟩
case from_path x y path_mx _ _ IH =>
exact IH ⟨m, ⟨hm.1, path_mx⟩⟩ ⟨hm.1, path_mx⟩
-- [Proposition 3, part 3] in our paper
--------------------------------------------------------------------
theorem reach_subset (net : BFNN) : ∀ (A B : Set ℕ),
reachable net A B ⊆ A := by
--------------------------------------------------------------------
intro (A : Set ℕ)
(B : Set ℕ)
(n : ℕ) (h₁ : n ∈ reachable net A B)
simp only [Membership.mem, Set.Mem] at h₁
match h₁ with
| ⟨m, hm⟩ =>
induction hm.2
case trivial hbase => exact hbase
case from_path _ y _ _ hy _ => exact hy
-- [Proposition 3, part 4] in our paper
-- This is like propag_is_extens, except we also have to ensure
-- that n ∈ A.
--------------------------------------------------------------------
theorem reach_is_extens (net : BFNN) : ∀ (A B : Set ℕ),
(A ∩ B) ⊆ reachable net A B := by
--------------------------------------------------------------------
intro (A : Set ℕ)
(B : Set ℕ)
(n : ℕ) (h₁ : n ∈ A ∩ B)
have (h₂ : focusedPath net.graph A n n) :=
focusedPath.trivial h₁.1
exact ⟨n, ⟨h₁.2, h₂⟩⟩
-- [Proposition 3, part 5] in our paper
--------------------------------------------------------------------
theorem reach_is_idempotent (net : BFNN) : ∀ (A B : Set ℕ),
reachable net A B = reachable net A (reachable net A B) := by
--------------------------------------------------------------------
intro (A : Set ℕ)
(B : Set ℕ)
exact Set.ext (fun (n : ℕ) =>
-- ⊆ direction; easy, just apply reach_subset and reach_is_extens
⟨(fun (h₁ : n ∈ reachable net A B) =>
have h₂ : n ∈ A := reach_subset _ _ _ h₁
reach_is_extens _ _ _ ⟨h₂, h₁⟩),
-- ⊇ direction
(fun (h₁ : n ∈ reachable net A (reachable net A B)) =>
match h₁ with
| ⟨x, h₂⟩ =>
match h₂.1 with
| ⟨m, h₃⟩ =>
⟨m, ⟨h₃.1, focusedPath_trans _ A h₃.2 h₂.2⟩⟩)⟩)
-- [Proposition 3, part 6] in our paper
--------------------------------------------------------------------
theorem reach_is_monotone (net : BFNN) : ∀ (S A B : Set ℕ),
A ⊆ B → reachable net S A ⊆ reachable net S B := by
--------------------------------------------------------------------
intro (S : Set ℕ)
(A : Set ℕ)
(B : Set ℕ)
(h₁ : A ⊆ B)
(n : ℕ) (h₂ : n ∈ reachable net S A)
exact match h₂ with
| ⟨m, hm⟩ => ⟨m, ⟨h₁ hm.1, hm.2⟩⟩
-- [Proposition 3, part 7] in our paper
-- Reach is closed under union
-- (This is really a consequence of monotonicity)
--------------------------------------------------------------------
theorem reach_union (net : BFNN) : ∀ (S A B : Set ℕ),
reachable net S (A ∪ B) = (reachable net S A) ∪ (reachable net S B) := by
--------------------------------------------------------------------
intro S A B
apply ext
intro n
apply Iff.intro
-- Backwards direction
-- (easy; A, B ⊆ A ∪ B, so we just apply monotonicity)
case mpr =>
intro h₁
cases h₁
case inl h₂ => exact reach_is_monotone _ _ _ _ (subset_union_left _ _) h₂
case inr h₂ => exact reach_is_monotone _ _ _ _ (subset_union_right _ _) h₂
-- Forward direction
case mp =>
intro h₁
-- We have a path from m ∈ A ∪ B to n;
-- from here we go by cases; either m ∈ A or m ∈ B.
-- In either case, the path from m ⟶ n gives us n ∈ Reach(_).
match h₁ with
| ⟨m, hm⟩ =>
cases hm.1
case inl h₂ => exact Or.inl ⟨m, ⟨h₂, hm.2⟩⟩
case inr h₂ => exact Or.inr ⟨m, ⟨h₂, hm.2⟩⟩
/-══════════════════════════════════════════════════════════════════
Naive (Unstable) Hebbian Update