Skip to content

Commit

Permalink
get rid of warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
Xixiang-Liu committed Apr 2, 2024
1 parent 991b124 commit d58a0f5
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
Binary file modified example.parquet
Binary file not shown.
11 changes: 7 additions & 4 deletions parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,17 @@ def degrade_sortedness_to_target(df: pd.DataFrame) -> None:
# swap random value pairs in the block till reach target_sortedness
def degrade_block(block: pd.DataFrame) -> None:
N = len(block)
index_values = block.index.tolist()
# prevent infinite loop in case target can't be reach
for _ in range(10000):
sortedness_block = get_sortedness_block(block)
if sortedness_block <= target_sortedness:
return
idx1 = random.randint(0, N - 1)
idx2 = random.randint(0, N - 1)
block.iloc[idx1], block.iloc[idx2] = block.iloc[idx2], block.iloc[idx1].copy()
idx1 = random.choice(index_values)
idx2 = random.choice(index_values)
tmp = block['col'][idx1].copy()
block.at[idx1, 'col'] = block.at[idx2, 'col']
block.at[idx2, 'col'] = tmp

num_rows = len(df)
num_full_blocks = num_rows // size_block
Expand All @@ -78,7 +81,7 @@ def degrade_block(block: pd.DataFrame) -> None:
target_sortedness = 0.8
size_block = 512

df = pd.DataFrame({"col": range(1000)})
df = pd.DataFrame({"col": range(100)})
degrade_sortedness_to_target(df)
table = pa.Table.from_pandas(df)
pq.write_table(table, "example.parquet")
Expand Down

0 comments on commit d58a0f5

Please sign in to comment.