generated from lasseufpa/python_template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
histogram.py
84 lines (77 loc) · 3.11 KB
/
histogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def bihist(ax, upatches, lpatches, orientation='vertical'):
if orientation.startswith('v'): # Vertical orientation
for p in lpatches:
try:
p._height *= -1 # matplotlib.patches.Rectangle
except AttributeError:
p._path.vertices[:, 1] *= -1 # matplotlib.patches.Polygon
elif orientation.startswith('h'): # Horizontal orientation
for p in upatches:
try:
p._width *= -1 # matplotlib.patches.Rectangle
except AttributeError:
p._path.vertices[:, 0] *= -1 # matplotlib.patches.Polygon
else:
raise ValueError("Unknown orientation '%s'" % orientation)
ax.relim()
ax.autoscale_view()
def calculate_iqr_min_max(data):
Q1 = np.percentile(data, 25)
Q3 = np.percentile(data, 75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
filtered_data = data[(data >= lower_bound) & (data <= upper_bound)]
return filtered_data.min(), filtered_data.max()
if __name__ == '__main__':
import numpy as np
# import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('src/features.csv')
x1 = df[df['label'] == 1].drop(columns=['label', 'commit'])
x2 = df[df['label'] == 0].drop(columns=['label', 'commit'])
for column in x1.columns:
if not column == 'rexp':
continue
min_val1, max_val1 = calculate_iqr_min_max(x1[column])
min_val2, max_val2 = calculate_iqr_min_max(x2[column])
min_val = min(min_val1, min_val2)
max_val = max(max_val1, max_val2)
count = 0
for line in x1[column]:
if line > max_val:
count += 1
for line in x2[column]:
if line > max_val:
count += 1
print(count)
break
orientation = 'vertical'
fig = plt.figure()
min_val1, max_val1 = calculate_iqr_min_max(x1[column])
min_val2, max_val2 = calculate_iqr_min_max(x2[column])
min_val = min(min_val1, min_val2)
max_val = max(max_val1, max_val2)
# min_val = min(min(x1[column]), min(x2[column]))
# max_val = max(max(x1[column]), max(x2[column]))
print(max_val)
bins = np.linspace(min_val, max_val, 30)
ax2 = fig.add_subplot(1, 1, 1, title=column + " Bi-histogram")
n1, b, p1 = ax2.hist(x2[column], bins=bins,
histtype='bar',
color='b', alpha=0.5, orientation=orientation,
label="Not Bug",
density=False)
n2, b, p2 = ax2.hist(x1[column], bins=bins,
histtype='bar',
color='r', alpha=0.5, orientation=orientation,
label="Bug",
density=False)
bihist(ax2, p1, p2, orientation=orientation)
ax2.legend()
plt.show()
# plt.savefig(f"{column}_bihistogram.png", format="png", bbox_inches="tight")
plt.close(fig)
# plt.show()
# break
# Save the figure