# test_integration.py import os import sys import json from pathlib import Path from io import StringIO import contextlib # Add the project root to Python path for imports project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) # Create a string buffer to capture output output_buffer = StringIO() with contextlib.redirect_stdout(output_buffer): # Now import modules from config.enums import ModelType, Domain, Language from config.schemas import ModelConfig, ExtractedDocument, ProcessedText from config.constants import document_extraction_params from config.model_config import MODEL_REGISTRY, get_model_config from config.settings import settings from config.threshold_config import get_threshold_for_domain print("=" * 70) print("CONFIG MODULE INTEGRATION TEST") print("=" * 70) # Test 1: Enum usage print(f"\n✓ Model Types: {[m.value for m in ModelType][:5]}...") # Test 2: Schema instantiation config = ModelConfig( model_id="test", model_type=ModelType.TRANSFORMER, description="Test", size_mb=100 ) print(f"✓ Schema instantiation: {config.model_id}") # Test 3: Constants usage print(f"✓ Max file size: {document_extraction_params.MAX_FILE_SIZE / 1024 / 1024:.1f} MB") # Test 4: Model registry print(f"✓ Available models: {list(MODEL_REGISTRY.keys())}") # Test 5: Settings print(f"✓ App name: {settings.APP_NAME}") print(f"✓ Environment: {settings.ENVIRONMENT}") print(f"✓ Log dir: {settings.LOG_DIR}") print(f"✓ Model cache dir: {settings.MODEL_CACHE_DIR}") # Test 6: Thresholds thresholds = get_threshold_for_domain(Domain.ACADEMIC) print(f"✓ Academic thresholds: {thresholds.ensemble_threshold}") print("\n" + "=" * 70) print("PROCESSORS MODULE INTEGRATION TEST") print("=" * 70) # Test 7: Document Extractor try: from processors.document_extractor import DocumentExtractor # Create a test text file test_text = "This is a test document for integration testing.\n" * 10 test_file = Path("test_document.txt") # Write test file test_file.write_text(test_text) # Test extractor extractor = DocumentExtractor(extract_metadata=True) result = extractor.extract(str(test_file)) print(f"\n✓ Document Extractor Test:") print(f" - Success: {result.is_success}") print(f" - Text length: {len(result.text)} chars") print(f" - File type: {result.file_type}") print(f" - Method: {result.extraction_method}") # Clean up test file test_file.unlink() except Exception as e: print(f"\n✗ Document Extractor failed: {e}") # Test 8: Text Processor try: # First check if we have the needed constants from config.constants import text_processing_params print(f"\n✓ Text processing params available") from processors.text_processor import TextProcessor test_text = "This is a sample text for processing. It contains multiple sentences! " \ "Here is another sentence. And one more for testing." processor = TextProcessor() processed = processor.process(test_text) print(f"\n✓ Text Processor Test:") print(f" - Is valid: {processed.is_valid}") print(f" - Words: {processed.word_count}") print(f" - Sentences: {processed.sentence_count}") print(f" - Avg sentence length: {processed.avg_sentence_length:.1f}") print(f" - Avg word length: {processed.avg_word_length:.1f}") except Exception as e: print(f"\n✗ Text Processor failed: {e}") print(" Note: You need to add TextProcessingParams to constants.py") # Test 9: Domain Classifier (without model) try: from processors.domain_classifier import DomainClassifier, get_domain_name, is_technical_domain test_text = "This is a scientific paper about machine learning and artificial intelligence." classifier = DomainClassifier() print(f"\n✓ Domain Classifier initialized") # Note: This will fail if models aren't loaded, but we can test the class structure print(f" - Class structure verified") print(f" - Domain enum available") # Test helper functions ai_ml_domain = Domain.AI_ML print(f" - AI/ML domain name: {get_domain_name(ai_ml_domain)}") print(f" - Is technical domain: {is_technical_domain(ai_ml_domain)}") except Exception as e: print(f"\n✗ Domain Classifier setup failed: {e}") # Test 10: Language Detector (heuristic mode) try: from processors.language_detector import LanguageDetector # Test in English english_text = "This is an English text for language detection testing." # Use heuristic mode (no model dependency) detector = LanguageDetector(use_model=False) result = detector.detect(english_text) print(f"\n✓ Language Detector Test (heuristic):") print(f" - Primary language: {result.primary_language.value}") print(f" - Evidence strength: {result.evidence_strength:.2f}") print(f" - Method: {result.detection_method}") print(f" - Script: {result.script.value}") # Test language check is_english = detector.is_language(english_text, Language.ENGLISH, threshold=0.5) print(f" - Is English check: {is_english}") except Exception as e: print(f"\n✗ Language Detector failed: {e}") print("\n" + "=" * 70) print("MODELS MODULE INTEGRATION TEST") print("=" * 70) # Test 11: Model Registry try: from models.model_registry import ModelRegistry, get_model_registry registry = get_model_registry() print(f"\n✓ Model Registry Test:") print(f" - Singleton pattern working") print(f" - Registry initialized") # Test usage tracking registry.record_model_usage("test_model", 1.5) stats = registry.get_usage_stats("test_model") print(f" - Usage tracking: {stats.usage_count if stats else 'N/A'}") # Test dependency tracking registry.add_dependency("model_b", ["model_a"]) deps = registry.get_dependencies("model_b") print(f" - Dependency tracking: {deps}") # Generate report report = registry.generate_usage_report() print(f" - Report generation: {len(report)} items") # Test reset registry.reset_usage_stats("test_model") print(f" - Reset functionality working") except Exception as e: print(f"\n✗ Model Registry failed: {e}") # Test 12: Model Manager (without actual downloads) try: from models.model_manager import ModelManager, get_model_manager manager = get_model_manager() print(f"\n✓ Model Manager Test:") print(f" - Singleton pattern working") print(f" - Device: {manager.device}") print(f" - Cache directory: {manager.cache_dir}") # Test metadata metadata = manager.metadata print(f" - Metadata loaded: {len(metadata)} entries") # Test cache cache_size = manager.cache.size() print(f" - Cache initialized: size {cache_size}") # Test model info check model_name = list(MODEL_REGISTRY.keys())[0] if MODEL_REGISTRY else "perplexity_reference_lm" is_downloaded = manager.is_model_downloaded(model_name) print(f" - Model check: {model_name} downloaded={is_downloaded}") # Test memory usage memory_info = manager.get_memory_usage() print(f" - Memory monitoring: {len(memory_info)} metrics") # Test model configuration access model_config = get_model_config(model_name) if model_config: print(f" - Model config access: {model_config.model_id}") except Exception as e: print(f"\n✗ Model Manager failed: {e}") # Test 13: Integration between models and config try: print(f"\n✓ Config-Models Integration Test:") # Check model config from registry for model_name, config in MODEL_REGISTRY.items(): if config.required: print(f" - {model_name}: {config.model_type.value}") break # Check settings integration print(f" - Max cached models from settings: {settings.MAX_CACHED_MODELS}") print(f" - Use quantization from settings: {settings.USE_QUANTIZATION}") except Exception as e: print(f"\n✗ Config-Models integration failed: {e}") # Test 14: End-to-End System Integration try: print(f"\n" + "=" * 70) print("FULL SYSTEM INTEGRATION TEST") print("=" * 70) # Create a test scenario sample_text = """ Machine learning is a subset of artificial intelligence. It involves algorithms that learn patterns from data. Deep learning uses neural networks with multiple layers. """ # 1. Process text from processors.text_processor import TextProcessor processor = TextProcessor() processed = processor.process(sample_text) print(f"✓ 1. Text Processing Complete:") print(f" - Cleaned text: {len(processed.cleaned_text)} chars") print(f" - Valid: {processed.is_valid}") # 2. Detect language from processors.language_detector import LanguageDetector detector = LanguageDetector(use_model=False) lang_result = detector.detect(processed.cleaned_text) print(f"\n✓ 2. Language Detection Complete:") print(f" - Language: {lang_result.primary_language.value}") print(f" - Script: {lang_result.script.value}") # 3. Domain classification structure from processors.domain_classifier import get_domain_name, is_technical_domain ai_ml_domain = Domain.AI_ML print(f"\n✓ 3. Domain System Ready:") print(f" - Domain enum: {ai_ml_domain.value}") print(f" - Human name: {get_domain_name(ai_ml_domain)}") print(f" - Is technical: {is_technical_domain(ai_ml_domain)}") # 4. Model management from models.model_manager import get_model_manager from models.model_registry import get_model_registry model_manager = get_model_manager() model_registry = get_model_registry() print(f"\n✓ 4. Model Management Ready:") print(f" - Manager: {type(model_manager).__name__}") print(f" - Registry: {type(model_registry).__name__}") print(f" - Cache dir exists: {model_manager.cache_dir.exists()}") # 5. Settings integration print(f"\n✓ 5. Settings Integration:") print(f" - App: {settings.APP_NAME} v{settings.APP_VERSION}") print(f" - Environment: {settings.ENVIRONMENT}") print(f" - Debug: {settings.DEBUG}") print(f"\n🎯 FULL SYSTEM INTEGRATION SUCCESSFUL!") except Exception as e: print(f"\n✗ Full system integration failed: {e}") import traceback print(traceback.format_exc()) print("\n" + "=" * 70) print("TEST COMPLETED") print("=" * 70) # Get the captured output output_text = output_buffer.getvalue() # Print the output print(output_text) # Count successes and failures success_count = sum(1 for line in output_text.split('\n') if '✓' in line) failure_count = sum(1 for line in output_text.split('\n') if '✗' in line) print(f"Successes: {success_count}") print(f"Failures: {failure_count}") if failure_count == 0: print("\n🎉 ALL TESTS PASSED! Complete system is properly integrated.") else: print(f"\n⚠️ {failure_count} tests failed. Check the issues above.")