Skip to content

Commit

Permalink
Update dataloader.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ydli-ai authored Nov 5, 2023
1 parent 3ab1161 commit 4597691
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tencentpretrain/utils/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def __init__(self, args, dataset_path, batch_size, global_rank, world_size, loca
self.span_masking = args.span_masking
self.span_geo_prob = args.span_geo_prob
self.span_max_length = args.span_max_length
self.skip_data_num = args.skip_data_num

if self.skip_data_num > 0:
for _ in range(self.skip_data_num):
instance = pickle.load(self.dataset_reader)

def _fill_buf(self):
try:
Expand Down

0 comments on commit 4597691

Please sign in to comment.