engine / refinement /old /explore_data.py
VeuReu's picture
Upload 207 files
6bcbd7c verified
raw
history blame
1.87 kB
import sqlite3
from pathlib import Path
DB_PATH = Path(__file__).resolve().parents[2] / "demo" / "data" / "audiodescriptions.db"
def main() -> None:
print(f"DB path: {DB_PATH} exists={DB_PATH.exists()}")
if not DB_PATH.exists():
return
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
cur = conn.cursor()
# Listar tablas de usuario
cur.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
)
tables = [r[0] for r in cur.fetchall()]
print("\nTABLAS EN audiodescriptions.db:")
for t in tables:
print(f" - {t}")
print("\nRESUMEN POR TABLA Y COLUMNA:\n")
for table in tables:
print(f"=== Tabla: {table} ===")
# Número total de filas
cur.execute(f"SELECT COUNT(*) AS n FROM {table}")
row_count = cur.fetchone()["n"]
print(f"Filas totales: {row_count}")
# Información de columnas
cur.execute(f"PRAGMA table_info({table})")
cols = [r[1] for r in cur.fetchall()]
for col in cols:
# Distintos valores (incluyendo NULL y vacíos)
cur.execute(f"SELECT COUNT(DISTINCT {col}) AS n_distinct FROM {table}")
n_distinct = cur.fetchone()["n_distinct"]
# Missing: NULL o cadena vacía (tras TRIM)
cur.execute(
f"SELECT COUNT(*) AS n_missing FROM {table} "
f"WHERE {col} IS NULL OR (typeof({col}) = 'text' AND TRIM({col}) = '')"
)
n_missing = cur.fetchone()["n_missing"]
print(
f" - Columna '{col}': distintos={n_distinct}, "
f"missing (NULL o '')={n_missing}"
)
print("")
conn.close()
if __name__ == "__main__":
main()