From acfe785ea8f8b1d71ca0be61bacd45addc53ae59 Mon Sep 17 00:00:00 2001 From: Olzhas Arystanov Date: Tue, 17 Dec 2024 01:52:35 +0500 Subject: [PATCH] Fix continuation for started large files with no fully finished parts --- b2sdk/_internal/transfer/emerge/executor.py | 15 ++++++++------- changelog.d/+fix_large_file_continuation.fixed.md | 1 + test/unit/bucket/test_bucket.py | 2 +- test/unit/v0/test_bucket.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 changelog.d/+fix_large_file_continuation.fixed.md diff --git a/b2sdk/_internal/transfer/emerge/executor.py b/b2sdk/_internal/transfer/emerge/executor.py index 21596e29..2a7ba07d 100644 --- a/b2sdk/_internal/transfer/emerge/executor.py +++ b/b2sdk/_internal/transfer/emerge/executor.py @@ -393,6 +393,7 @@ def _find_matching_unfinished_file( continue finished_parts = {} + conflict_detected = False for part in self.services.large_file.list_parts(file_.file_id): emerge_part = emerge_parts_dict.get(part.part_number) @@ -405,7 +406,7 @@ def _find_matching_unfinished_file( file_.file_id, part.part_number, ) - finished_parts = None + conflict_detected = True break # Compare part sizes @@ -413,25 +414,25 @@ def _find_matching_unfinished_file( logger.debug( 'Rejecting %s: part %s size mismatch', file_.file_id, part.part_number ) - continue # part size doesn't match - so we reupload + conflict_detected = True + break # part size doesn't match - so we reupload # Compare part hashes if emerge_part.is_hashable() and emerge_part.get_sha1() != part.content_sha1: logger.debug( 'Rejecting %s: part %s sha1 mismatch', file_.file_id, part.part_number ) - continue # part.sha1 doesn't match - so we reupload + conflict_detected = True + break # part.sha1 doesn't match - so we reupload finished_parts[part.part_number] = part - if finished_parts is None: + if conflict_detected: continue finished_parts_len = len(finished_parts) - if finished_parts and ( - best_match_file is None or finished_parts_len > best_match_parts_len - ): + if best_match_file is None or finished_parts_len > best_match_parts_len: best_match_file = file_ best_match_parts = finished_parts best_match_parts_len = finished_parts_len diff --git a/changelog.d/+fix_large_file_continuation.fixed.md b/changelog.d/+fix_large_file_continuation.fixed.md new file mode 100644 index 00000000..200b3284 --- /dev/null +++ b/changelog.d/+fix_large_file_continuation.fixed.md @@ -0,0 +1 @@ +Fix continuation for started large files with no fully finished parts. diff --git a/test/unit/bucket/test_bucket.py b/test/unit/bucket/test_bucket.py index 62d100a3..108c46ef 100644 --- a/test/unit/bucket/test_bucket.py +++ b/test/unit/bucket/test_bucket.py @@ -2006,7 +2006,7 @@ def test_upload_large_resume_no_parts(self): large_file_id = self._start_large_file('file1') progress_listener = StubProgressListener() file_info = self.bucket.upload_bytes(data, 'file1', progress_listener=progress_listener) - self.assertNotEqual(large_file_id, file_info.id_) # it's not a match if there are no parts + self.assertEqual(large_file_id, file_info.id_) self._check_file_contents('file1', data) self.assertTrue(progress_listener.is_valid()) diff --git a/test/unit/v0/test_bucket.py b/test/unit/v0/test_bucket.py index de44fc4b..b2890937 100644 --- a/test/unit/v0/test_bucket.py +++ b/test/unit/v0/test_bucket.py @@ -906,7 +906,7 @@ def test_upload_large_resume_no_parts(self): large_file_id = self._start_large_file('file1') progress_listener = StubProgressListener() file_info = self.bucket.upload_bytes(data, 'file1', progress_listener=progress_listener) - self.assertNotEqual(large_file_id, file_info.id_) # it's not a match if there are no parts + self.assertEqual(large_file_id, file_info.id_) self._check_file_contents('file1', data) self.assertTrue(progress_listener.is_valid())