Spaces:

satyaki-mitra
/

Text_Authenticator

Sleeping

App Files Files Community

Text_Authenticator / test_integration.py

satyaki-mitra

Architecture updated

44d0409 2 days ago

raw

history blame contribute delete

12.3 kB

	# test_integration.py
	import os
	import sys
	import json
	from pathlib import Path
	from io import StringIO
	import contextlib

	# Add the project root to Python path for imports
	project_root = Path(__file__).parent.parent
	sys.path.insert(0, str(project_root))

	# Create a string buffer to capture output
	output_buffer = StringIO()

	with contextlib.redirect_stdout(output_buffer):
	# Now import modules
	from config.enums import ModelType, Domain, Language
	from config.schemas import ModelConfig, ExtractedDocument, ProcessedText
	from config.constants import document_extraction_params
	from config.model_config import MODEL_REGISTRY, get_model_config
	from config.settings import settings
	from config.threshold_config import get_threshold_for_domain

	print("=" * 70)
	print("CONFIG MODULE INTEGRATION TEST")
	print("=" * 70)

	# Test 1: Enum usage
	print(f"\n✓ Model Types: {[m.value for m in ModelType][:5]}...")

	# Test 2: Schema instantiation
	config = ModelConfig(
	model_id="test",
	model_type=ModelType.TRANSFORMER,
	description="Test",
	size_mb=100
	)
	print(f"✓ Schema instantiation: {config.model_id}")

	# Test 3: Constants usage
	print(f"✓ Max file size: {document_extraction_params.MAX_FILE_SIZE / 1024 / 1024:.1f} MB")

	# Test 4: Model registry
	print(f"✓ Available models: {list(MODEL_REGISTRY.keys())}")

	# Test 5: Settings
	print(f"✓ App name: {settings.APP_NAME}")
	print(f"✓ Environment: {settings.ENVIRONMENT}")
	print(f"✓ Log dir: {settings.LOG_DIR}")
	print(f"✓ Model cache dir: {settings.MODEL_CACHE_DIR}")

	# Test 6: Thresholds
	thresholds = get_threshold_for_domain(Domain.ACADEMIC)
	print(f"✓ Academic thresholds: {thresholds.ensemble_threshold}")

	print("\n" + "=" * 70)
	print("PROCESSORS MODULE INTEGRATION TEST")
	print("=" * 70)

	# Test 7: Document Extractor
	try:
	from processors.document_extractor import DocumentExtractor

	# Create a test text file
	test_text = "This is a test document for integration testing.\n" * 10
	test_file = Path("test_document.txt")

	# Write test file
	test_file.write_text(test_text)

	# Test extractor
	extractor = DocumentExtractor(extract_metadata=True)
	result = extractor.extract(str(test_file))

	print(f"\n✓ Document Extractor Test:")
	print(f" - Success: {result.is_success}")
	print(f" - Text length: {len(result.text)} chars")
	print(f" - File type: {result.file_type}")
	print(f" - Method: {result.extraction_method}")

	# Clean up test file
	test_file.unlink()

	except Exception as e:
	print(f"\n✗ Document Extractor failed: {e}")

	# Test 8: Text Processor
	try:
	# First check if we have the needed constants
	from config.constants import text_processing_params
	print(f"\n✓ Text processing params available")

	from processors.text_processor import TextProcessor

	test_text = "This is a sample text for processing. It contains multiple sentences! " \
	"Here is another sentence. And one more for testing."

	processor = TextProcessor()
	processed = processor.process(test_text)

	print(f"\n✓ Text Processor Test:")
	print(f" - Is valid: {processed.is_valid}")
	print(f" - Words: {processed.word_count}")
	print(f" - Sentences: {processed.sentence_count}")
	print(f" - Avg sentence length: {processed.avg_sentence_length:.1f}")
	print(f" - Avg word length: {processed.avg_word_length:.1f}")

	except Exception as e:
	print(f"\n✗ Text Processor failed: {e}")
	print(" Note: You need to add TextProcessingParams to constants.py")

	# Test 9: Domain Classifier (without model)
	try:
	from processors.domain_classifier import DomainClassifier, get_domain_name, is_technical_domain

	test_text = "This is a scientific paper about machine learning and artificial intelligence."

	classifier = DomainClassifier()
	print(f"\n✓ Domain Classifier initialized")

	# Note: This will fail if models aren't loaded, but we can test the class structure
	print(f" - Class structure verified")
	print(f" - Domain enum available")

	# Test helper functions
	ai_ml_domain = Domain.AI_ML
	print(f" - AI/ML domain name: {get_domain_name(ai_ml_domain)}")
	print(f" - Is technical domain: {is_technical_domain(ai_ml_domain)}")

	except Exception as e:
	print(f"\n✗ Domain Classifier setup failed: {e}")

	# Test 10: Language Detector (heuristic mode)
	try:
	from processors.language_detector import LanguageDetector

	# Test in English
	english_text = "This is an English text for language detection testing."

	# Use heuristic mode (no model dependency)
	detector = LanguageDetector(use_model=False)
	result = detector.detect(english_text)

	print(f"\n✓ Language Detector Test (heuristic):")
	print(f" - Primary language: {result.primary_language.value}")
	print(f" - Evidence strength: {result.evidence_strength:.2f}")
	print(f" - Method: {result.detection_method}")
	print(f" - Script: {result.script.value}")

	# Test language check
	is_english = detector.is_language(english_text, Language.ENGLISH, threshold=0.5)
	print(f" - Is English check: {is_english}")

	except Exception as e:
	print(f"\n✗ Language Detector failed: {e}")

	print("\n" + "=" * 70)
	print("MODELS MODULE INTEGRATION TEST")
	print("=" * 70)

	# Test 11: Model Registry
	try:
	from models.model_registry import ModelRegistry, get_model_registry

	registry = get_model_registry()

	print(f"\n✓ Model Registry Test:")
	print(f" - Singleton pattern working")
	print(f" - Registry initialized")

	# Test usage tracking
	registry.record_model_usage("test_model", 1.5)
	stats = registry.get_usage_stats("test_model")
	print(f" - Usage tracking: {stats.usage_count if stats else 'N/A'}")

	# Test dependency tracking
	registry.add_dependency("model_b", ["model_a"])
	deps = registry.get_dependencies("model_b")
	print(f" - Dependency tracking: {deps}")

	# Generate report
	report = registry.generate_usage_report()
	print(f" - Report generation: {len(report)} items")

	# Test reset
	registry.reset_usage_stats("test_model")
	print(f" - Reset functionality working")

	except Exception as e:
	print(f"\n✗ Model Registry failed: {e}")

	# Test 12: Model Manager (without actual downloads)
	try:
	from models.model_manager import ModelManager, get_model_manager

	manager = get_model_manager()

	print(f"\n✓ Model Manager Test:")
	print(f" - Singleton pattern working")
	print(f" - Device: {manager.device}")
	print(f" - Cache directory: {manager.cache_dir}")

	# Test metadata
	metadata = manager.metadata
	print(f" - Metadata loaded: {len(metadata)} entries")

	# Test cache
	cache_size = manager.cache.size()
	print(f" - Cache initialized: size {cache_size}")

	# Test model info check
	model_name = list(MODEL_REGISTRY.keys())[0] if MODEL_REGISTRY else "perplexity_reference_lm"
	is_downloaded = manager.is_model_downloaded(model_name)
	print(f" - Model check: {model_name} downloaded={is_downloaded}")

	# Test memory usage
	memory_info = manager.get_memory_usage()
	print(f" - Memory monitoring: {len(memory_info)} metrics")

	# Test model configuration access
	model_config = get_model_config(model_name)
	if model_config:
	print(f" - Model config access: {model_config.model_id}")

	except Exception as e:
	print(f"\n✗ Model Manager failed: {e}")

	# Test 13: Integration between models and config
	try:
	print(f"\n✓ Config-Models Integration Test:")

	# Check model config from registry
	for model_name, config in MODEL_REGISTRY.items():
	if config.required:
	print(f" - {model_name}: {config.model_type.value}")
	break

	# Check settings integration
	print(f" - Max cached models from settings: {settings.MAX_CACHED_MODELS}")
	print(f" - Use quantization from settings: {settings.USE_QUANTIZATION}")

	except Exception as e:
	print(f"\n✗ Config-Models integration failed: {e}")

	# Test 14: End-to-End System Integration
	try:
	print(f"\n" + "=" * 70)
	print("FULL SYSTEM INTEGRATION TEST")
	print("=" * 70)

	# Create a test scenario
	sample_text = """
	Machine learning is a subset of artificial intelligence.
	It involves algorithms that learn patterns from data.
	Deep learning uses neural networks with multiple layers.
	"""

	# 1. Process text
	from processors.text_processor import TextProcessor
	processor = TextProcessor()
	processed = processor.process(sample_text)

	print(f"✓ 1. Text Processing Complete:")
	print(f" - Cleaned text: {len(processed.cleaned_text)} chars")
	print(f" - Valid: {processed.is_valid}")

	# 2. Detect language
	from processors.language_detector import LanguageDetector
	detector = LanguageDetector(use_model=False)
	lang_result = detector.detect(processed.cleaned_text)

	print(f"\n✓ 2. Language Detection Complete:")
	print(f" - Language: {lang_result.primary_language.value}")
	print(f" - Script: {lang_result.script.value}")

	# 3. Domain classification structure
	from processors.domain_classifier import get_domain_name, is_technical_domain
	ai_ml_domain = Domain.AI_ML

	print(f"\n✓ 3. Domain System Ready:")
	print(f" - Domain enum: {ai_ml_domain.value}")
	print(f" - Human name: {get_domain_name(ai_ml_domain)}")
	print(f" - Is technical: {is_technical_domain(ai_ml_domain)}")

	# 4. Model management
	from models.model_manager import get_model_manager
	from models.model_registry import get_model_registry

	model_manager = get_model_manager()
	model_registry = get_model_registry()

	print(f"\n✓ 4. Model Management Ready:")
	print(f" - Manager: {type(model_manager).__name__}")
	print(f" - Registry: {type(model_registry).__name__}")
	print(f" - Cache dir exists: {model_manager.cache_dir.exists()}")

	# 5. Settings integration
	print(f"\n✓ 5. Settings Integration:")
	print(f" - App: {settings.APP_NAME} v{settings.APP_VERSION}")
	print(f" - Environment: {settings.ENVIRONMENT}")
	print(f" - Debug: {settings.DEBUG}")

	print(f"\n🎯 FULL SYSTEM INTEGRATION SUCCESSFUL!")

	except Exception as e:
	print(f"\n✗ Full system integration failed: {e}")
	import traceback
	print(traceback.format_exc())

	print("\n" + "=" * 70)
	print("TEST COMPLETED")
	print("=" * 70)

	# Get the captured output
	output_text = output_buffer.getvalue()

	# Print the output
	print(output_text)

	# Count successes and failures
	success_count = sum(1 for line in output_text.split('\n') if '✓' in line)
	failure_count = sum(1 for line in output_text.split('\n') if '✗' in line)

	print(f"Successes: {success_count}")
	print(f"Failures: {failure_count}")

	if failure_count == 0:
	print("\n🎉 ALL TESTS PASSED! Complete system is properly integrated.")
	else:
	print(f"\n⚠️ {failure_count} tests failed. Check the issues above.")