simplify top correlation list, info about correlation p-values
Browse files
app.py
CHANGED
|
@@ -297,11 +297,17 @@ def create_scatter_plot(df, x_bench, y_bench, stderr_df=None):
|
|
| 297 |
p = np.poly1d(z)
|
| 298 |
x_line = np.linspace(x_vals.min(), x_vals.max(), 100)
|
| 299 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
fig.add_trace(go.Scatter(
|
| 301 |
x=x_line,
|
| 302 |
y=p(x_line),
|
| 303 |
mode='lines',
|
| 304 |
-
name=f'r = {corr:.3f},
|
| 305 |
line=dict(color='red', dash='dash')
|
| 306 |
))
|
| 307 |
|
|
@@ -533,9 +539,7 @@ def show_overview_dashboard(df, stderr_df):
|
|
| 533 |
with col1:
|
| 534 |
st.markdown("**π₯ Top 5 Highest Correlations**")
|
| 535 |
for i, (bench1, bench2, corr, cat1, cat2) in enumerate(pairs[:5]):
|
| 536 |
-
|
| 537 |
-
st.write(f"{i+1}. {clean_benchmark_name(bench1)} β {clean_benchmark_name(bench2)}")
|
| 538 |
-
st.write(f" r = {corr:.3f} {same_cat}")
|
| 539 |
|
| 540 |
with col2:
|
| 541 |
st.markdown("**π Category Analysis**")
|
|
@@ -639,17 +643,27 @@ def show_scatter_explorer(df, stderr_df):
|
|
| 639 |
spearman_r, spearman_p = spearmanr(common_data[x_benchmark], common_data[y_benchmark])
|
| 640 |
kendall_r, kendall_p = kendalltau(common_data[x_benchmark], common_data[y_benchmark])
|
| 641 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
with col1:
|
| 643 |
st.metric("Pearson r", f"{pearson_r:.3f}")
|
| 644 |
-
st.caption(
|
| 645 |
|
| 646 |
with col2:
|
| 647 |
st.metric("Spearman Ο", f"{spearman_r:.3f}")
|
| 648 |
-
st.caption(
|
| 649 |
|
| 650 |
with col3:
|
| 651 |
st.metric("Kendall Ο", f"{kendall_r:.3f}")
|
| 652 |
-
st.caption(
|
|
|
|
|
|
|
|
|
|
| 653 |
|
| 654 |
# Show data table
|
| 655 |
st.subheader("Data Points")
|
|
@@ -1107,11 +1121,17 @@ def show_uncertainty_analysis(df, stderr_df):
|
|
| 1107 |
p = np.poly1d(z)
|
| 1108 |
x_line = np.linspace(x_clean.min(), x_clean.max(), 100)
|
| 1109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
fig.add_trace(go.Scatter(
|
| 1111 |
x=x_line,
|
| 1112 |
y=p(x_line),
|
| 1113 |
mode='lines',
|
| 1114 |
-
name=f'r = {corr:.3f},
|
| 1115 |
line=dict(color='red', dash='dash')
|
| 1116 |
))
|
| 1117 |
|
|
|
|
| 297 |
p = np.poly1d(z)
|
| 298 |
x_line = np.linspace(x_vals.min(), x_vals.max(), 100)
|
| 299 |
|
| 300 |
+
# Format p-value appropriately
|
| 301 |
+
if p_val < 0.001:
|
| 302 |
+
p_str = f"p < 0.001"
|
| 303 |
+
else:
|
| 304 |
+
p_str = f"p = {p_val:.3f}"
|
| 305 |
+
|
| 306 |
fig.add_trace(go.Scatter(
|
| 307 |
x=x_line,
|
| 308 |
y=p(x_line),
|
| 309 |
mode='lines',
|
| 310 |
+
name=f'r = {corr:.3f}, {p_str}',
|
| 311 |
line=dict(color='red', dash='dash')
|
| 312 |
))
|
| 313 |
|
|
|
|
| 539 |
with col1:
|
| 540 |
st.markdown("**π₯ Top 5 Highest Correlations**")
|
| 541 |
for i, (bench1, bench2, corr, cat1, cat2) in enumerate(pairs[:5]):
|
| 542 |
+
st.write(f"{i+1}. {clean_benchmark_name(bench1)} β {clean_benchmark_name(bench2)} r = {corr:.3f}")
|
|
|
|
|
|
|
| 543 |
|
| 544 |
with col2:
|
| 545 |
st.markdown("**π Category Analysis**")
|
|
|
|
| 643 |
spearman_r, spearman_p = spearmanr(common_data[x_benchmark], common_data[y_benchmark])
|
| 644 |
kendall_r, kendall_p = kendalltau(common_data[x_benchmark], common_data[y_benchmark])
|
| 645 |
|
| 646 |
+
# Format p-values appropriately
|
| 647 |
+
def format_pvalue(p):
|
| 648 |
+
if p < 0.001:
|
| 649 |
+
return "p < 0.001"
|
| 650 |
+
else:
|
| 651 |
+
return f"p = {p:.3f}"
|
| 652 |
+
|
| 653 |
with col1:
|
| 654 |
st.metric("Pearson r", f"{pearson_r:.3f}")
|
| 655 |
+
st.caption(format_pvalue(pearson_p))
|
| 656 |
|
| 657 |
with col2:
|
| 658 |
st.metric("Spearman Ο", f"{spearman_r:.3f}")
|
| 659 |
+
st.caption(format_pvalue(spearman_p))
|
| 660 |
|
| 661 |
with col3:
|
| 662 |
st.metric("Kendall Ο", f"{kendall_r:.3f}")
|
| 663 |
+
st.caption(format_pvalue(kendall_p))
|
| 664 |
+
|
| 665 |
+
# Add explanation about p-values
|
| 666 |
+
st.info("βΉοΈ **P-values < 0.001** indicate very strong statistical significance. This is common with benchmark correlations due to reasonable sample sizes and meaningful relationships.")
|
| 667 |
|
| 668 |
# Show data table
|
| 669 |
st.subheader("Data Points")
|
|
|
|
| 1121 |
p = np.poly1d(z)
|
| 1122 |
x_line = np.linspace(x_clean.min(), x_clean.max(), 100)
|
| 1123 |
|
| 1124 |
+
# Format p-value appropriately
|
| 1125 |
+
if p_val < 0.001:
|
| 1126 |
+
p_str = f"p < 0.001"
|
| 1127 |
+
else:
|
| 1128 |
+
p_str = f"p = {p_val:.3f}"
|
| 1129 |
+
|
| 1130 |
fig.add_trace(go.Scatter(
|
| 1131 |
x=x_line,
|
| 1132 |
y=p(x_line),
|
| 1133 |
mode='lines',
|
| 1134 |
+
name=f'r = {corr:.3f}, {p_str}',
|
| 1135 |
line=dict(color='red', dash='dash')
|
| 1136 |
))
|
| 1137 |
|