From 459769193818224667d5d20f3ae8e0f18163fb6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Li=20Yudong=20=28=E6=9D=8E=E7=85=9C=E4=B8=9C=29?= Date: Sun, 5 Nov 2023 10:42:53 +0800 Subject: [PATCH] Update dataloader.py --- tencentpretrain/utils/dataloader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tencentpretrain/utils/dataloader.py b/tencentpretrain/utils/dataloader.py index 5ab9084..88c9000 100755 --- a/tencentpretrain/utils/dataloader.py +++ b/tencentpretrain/utils/dataloader.py @@ -28,6 +28,11 @@ def __init__(self, args, dataset_path, batch_size, global_rank, world_size, loca self.span_masking = args.span_masking self.span_geo_prob = args.span_geo_prob self.span_max_length = args.span_max_length + self.skip_data_num = args.skip_data_num + + if self.skip_data_num > 0: + for _ in range(self.skip_data_num): + instance = pickle.load(self.dataset_reader) def _fill_buf(self): try: