From 3bc7baaa7df70589fc9b4170848656fce6f4f44b Mon Sep 17 00:00:00 2001 From: Baber Date: Mon, 2 Dec 2024 10:16:39 +0000 Subject: [PATCH 1/4] add ai2d --- lm_eval/tasks/ai2d/ai2d.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 lm_eval/tasks/ai2d/ai2d.yaml diff --git a/lm_eval/tasks/ai2d/ai2d.yaml b/lm_eval/tasks/ai2d/ai2d.yaml new file mode 100644 index 0000000000..b7711a9f4b --- /dev/null +++ b/lm_eval/tasks/ai2d/ai2d.yaml @@ -0,0 +1,20 @@ +dataset_path: lmms-lab/ai2d +test_split: test +output_type: generate_until +doc_to_image: !function utils.doc_to_image +doc_to_text: "{{question | capitalize}} +{{% for option in options %}} +{{loop.index | chr(64)}}. {{option | capitalize}} +{{% endfor %}}" +doc_to_target: "{{options[answer]}}" +generation_kwargs: + until: [] + temperature: 0.0 + do_sample: false + max_gen_toks: 512 +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 From 5ed56f7ce32f97210ba5cb091044504d5f5b5df8 Mon Sep 17 00:00:00 2001 From: Baber Date: Mon, 2 Dec 2024 10:19:46 +0000 Subject: [PATCH 2/4] nit --- lm_eval/api/instance.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/instance.py b/lm_eval/api/instance.py index d3c6afa064..fb57b1b6ae 100644 --- a/lm_eval/api/instance.py +++ b/lm_eval/api/instance.py @@ -1,6 +1,8 @@ from dataclasses import dataclass, field from typing import Literal, Optional, Tuple +from lm_eval.api.types import GenerateUntilRequest, LogLiklehoodRequest + OutputType = Literal[ "loglikelihood", "loglikelihood_rolling", "generate_until", "multiple_choice" @@ -34,5 +36,7 @@ def args(self): Returns (string,) where `string` is the string to calculate loglikelihood over """ return ( - self.arguments if isinstance(self.arguments, tuple) else (self.arguments,) + GenerateUntilRequest(*self.arguments) + if self.request_type != "generate_until" + else LogLiklehoodRequest(*self.arguments) ) From c0c7a72f24b6c2e40bfaa5e3e8601f21d01c2528 Mon Sep 17 00:00:00 2001 From: Baber Date: Mon, 2 Dec 2024 10:20:20 +0000 Subject: [PATCH 3/4] nit --- lm_eval/api/instance.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lm_eval/api/instance.py b/lm_eval/api/instance.py index fb57b1b6ae..d3c6afa064 100644 --- a/lm_eval/api/instance.py +++ b/lm_eval/api/instance.py @@ -1,8 +1,6 @@ from dataclasses import dataclass, field from typing import Literal, Optional, Tuple -from lm_eval.api.types import GenerateUntilRequest, LogLiklehoodRequest - OutputType = Literal[ "loglikelihood", "loglikelihood_rolling", "generate_until", "multiple_choice" @@ -36,7 +34,5 @@ def args(self): Returns (string,) where `string` is the string to calculate loglikelihood over """ return ( - GenerateUntilRequest(*self.arguments) - if self.request_type != "generate_until" - else LogLiklehoodRequest(*self.arguments) + self.arguments if isinstance(self.arguments, tuple) else (self.arguments,) ) From 59053d58f3133bf0665af4546ea0a57079e7c57e Mon Sep 17 00:00:00 2001 From: Baber Date: Mon, 2 Dec 2024 10:20:44 +0000 Subject: [PATCH 4/4] nit --- lm_eval/tasks/ai2d/ai2d.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/ai2d/ai2d.yaml b/lm_eval/tasks/ai2d/ai2d.yaml index b7711a9f4b..dfbaf75e04 100644 --- a/lm_eval/tasks/ai2d/ai2d.yaml +++ b/lm_eval/tasks/ai2d/ai2d.yaml @@ -4,7 +4,7 @@ output_type: generate_until doc_to_image: !function utils.doc_to_image doc_to_text: "{{question | capitalize}} {{% for option in options %}} -{{loop.index | chr(64)}}. {{option | capitalize}} +{{loop.index | chr(64)}}. {{option | capitalize}} {{% endfor %}}" doc_to_target: "{{options[answer]}}" generation_kwargs: