From d58a0f5e2899af49e922f1a00f8313b2429f9489 Mon Sep 17 00:00:00 2001 From: Xixiang-Liu Date: Mon, 1 Apr 2024 20:07:51 -0500 Subject: [PATCH] get rid of warnings --- example.parquet | Bin 2169 -> 2169 bytes parquet.py | 11 +++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/example.parquet b/example.parquet index e56baf7c7fb63875721ad14164a93c1c9b540d2e..647727c41f2eeba762fb450ddb8297a5a8c8c215 100644 GIT binary patch delta 332 zcmWN@TQWic007X;ko$9IZiw;<{rISeP~`nqDR1(&5nHGw_^^UDV=MhRA7|$Dqkhy` zObmuWY=}6o1l>xe5lNX5Mj0c?I1{8u(_o5eW|-xKIp$d)+x^Xc=Oj;oC6+0&!YU=! zSZ9L-6*k#opKW&7WsfQc9CE}l8BRIloC|7U(2mt*l1rMjxZ;`{Zn@*02OfE%%`-0y S^TsJ=j5AAy73NuGtNWY%-jqBAmRP3991E;b zVx0}LRLD`L$^$#>vd0w%9CE}lC!BJ|Ef-vdK|5BL1~r None: # swap random value pairs in the block till reach target_sortedness def degrade_block(block: pd.DataFrame) -> None: N = len(block) + index_values = block.index.tolist() # prevent infinite loop in case target can't be reach for _ in range(10000): sortedness_block = get_sortedness_block(block) if sortedness_block <= target_sortedness: return - idx1 = random.randint(0, N - 1) - idx2 = random.randint(0, N - 1) - block.iloc[idx1], block.iloc[idx2] = block.iloc[idx2], block.iloc[idx1].copy() + idx1 = random.choice(index_values) + idx2 = random.choice(index_values) + tmp = block['col'][idx1].copy() + block.at[idx1, 'col'] = block.at[idx2, 'col'] + block.at[idx2, 'col'] = tmp num_rows = len(df) num_full_blocks = num_rows // size_block @@ -78,7 +81,7 @@ def degrade_block(block: pd.DataFrame) -> None: target_sortedness = 0.8 size_block = 512 -df = pd.DataFrame({"col": range(1000)}) +df = pd.DataFrame({"col": range(100)}) degrade_sortedness_to_target(df) table = pa.Table.from_pandas(df) pq.write_table(table, "example.parquet")