File size: 12,321 Bytes
44d0409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# test_integration.py
import os
import sys
import json
from pathlib import Path
from io import StringIO
import contextlib

# Add the project root to Python path for imports
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

# Create a string buffer to capture output
output_buffer = StringIO()

with contextlib.redirect_stdout(output_buffer):
    # Now import modules
    from config.enums import ModelType, Domain, Language
    from config.schemas import ModelConfig, ExtractedDocument, ProcessedText
    from config.constants import document_extraction_params
    from config.model_config import MODEL_REGISTRY, get_model_config
    from config.settings import settings
    from config.threshold_config import get_threshold_for_domain

    print("=" * 70)
    print("CONFIG MODULE INTEGRATION TEST")
    print("=" * 70)

    # Test 1: Enum usage
    print(f"\nβœ“ Model Types: {[m.value for m in ModelType][:5]}...")

    # Test 2: Schema instantiation
    config = ModelConfig(
        model_id="test",
        model_type=ModelType.TRANSFORMER,
        description="Test",
        size_mb=100
    )
    print(f"βœ“ Schema instantiation: {config.model_id}")

    # Test 3: Constants usage
    print(f"βœ“ Max file size: {document_extraction_params.MAX_FILE_SIZE / 1024 / 1024:.1f} MB")

    # Test 4: Model registry
    print(f"βœ“ Available models: {list(MODEL_REGISTRY.keys())}")

    # Test 5: Settings
    print(f"βœ“ App name: {settings.APP_NAME}")
    print(f"βœ“ Environment: {settings.ENVIRONMENT}")
    print(f"βœ“ Log dir: {settings.LOG_DIR}")
    print(f"βœ“ Model cache dir: {settings.MODEL_CACHE_DIR}")

    # Test 6: Thresholds
    thresholds = get_threshold_for_domain(Domain.ACADEMIC)
    print(f"βœ“ Academic thresholds: {thresholds.ensemble_threshold}")

    print("\n" + "=" * 70)
    print("PROCESSORS MODULE INTEGRATION TEST")
    print("=" * 70)

    # Test 7: Document Extractor
    try:
        from processors.document_extractor import DocumentExtractor
        
        # Create a test text file
        test_text = "This is a test document for integration testing.\n" * 10
        test_file = Path("test_document.txt")
        
        # Write test file
        test_file.write_text(test_text)
        
        # Test extractor
        extractor = DocumentExtractor(extract_metadata=True)
        result = extractor.extract(str(test_file))
        
        print(f"\nβœ“ Document Extractor Test:")
        print(f"  - Success: {result.is_success}")
        print(f"  - Text length: {len(result.text)} chars")
        print(f"  - File type: {result.file_type}")
        print(f"  - Method: {result.extraction_method}")
        
        # Clean up test file
        test_file.unlink()
        
    except Exception as e:
        print(f"\nβœ— Document Extractor failed: {e}")

    # Test 8: Text Processor
    try:
        # First check if we have the needed constants
        from config.constants import text_processing_params
        print(f"\nβœ“ Text processing params available")
        
        from processors.text_processor import TextProcessor
        
        test_text = "This is a sample text for processing. It contains multiple sentences! " \
                    "Here is another sentence. And one more for testing."
        
        processor = TextProcessor()
        processed = processor.process(test_text)
        
        print(f"\nβœ“ Text Processor Test:")
        print(f"  - Is valid: {processed.is_valid}")
        print(f"  - Words: {processed.word_count}")
        print(f"  - Sentences: {processed.sentence_count}")
        print(f"  - Avg sentence length: {processed.avg_sentence_length:.1f}")
        print(f"  - Avg word length: {processed.avg_word_length:.1f}")
        
    except Exception as e:
        print(f"\nβœ— Text Processor failed: {e}")
        print("  Note: You need to add TextProcessingParams to constants.py")

    # Test 9: Domain Classifier (without model)
    try:
        from processors.domain_classifier import DomainClassifier, get_domain_name, is_technical_domain
        
        test_text = "This is a scientific paper about machine learning and artificial intelligence."
        
        classifier = DomainClassifier()
        print(f"\nβœ“ Domain Classifier initialized")
        
        # Note: This will fail if models aren't loaded, but we can test the class structure
        print(f"  - Class structure verified")
        print(f"  - Domain enum available")
        
        # Test helper functions
        ai_ml_domain = Domain.AI_ML
        print(f"  - AI/ML domain name: {get_domain_name(ai_ml_domain)}")
        print(f"  - Is technical domain: {is_technical_domain(ai_ml_domain)}")
        
    except Exception as e:
        print(f"\nβœ— Domain Classifier setup failed: {e}")

    # Test 10: Language Detector (heuristic mode)
    try:
        from processors.language_detector import LanguageDetector
        
        # Test in English
        english_text = "This is an English text for language detection testing."
        
        # Use heuristic mode (no model dependency)
        detector = LanguageDetector(use_model=False)
        result = detector.detect(english_text)
        
        print(f"\nβœ“ Language Detector Test (heuristic):")
        print(f"  - Primary language: {result.primary_language.value}")
        print(f"  - Evidence strength: {result.evidence_strength:.2f}")
        print(f"  - Method: {result.detection_method}")
        print(f"  - Script: {result.script.value}")
        
        # Test language check
        is_english = detector.is_language(english_text, Language.ENGLISH, threshold=0.5)
        print(f"  - Is English check: {is_english}")
        
    except Exception as e:
        print(f"\nβœ— Language Detector failed: {e}")

    print("\n" + "=" * 70)
    print("MODELS MODULE INTEGRATION TEST")
    print("=" * 70)

    # Test 11: Model Registry
    try:
        from models.model_registry import ModelRegistry, get_model_registry
        
        registry = get_model_registry()
        
        print(f"\nβœ“ Model Registry Test:")
        print(f"  - Singleton pattern working")
        print(f"  - Registry initialized")
        
        # Test usage tracking
        registry.record_model_usage("test_model", 1.5)
        stats = registry.get_usage_stats("test_model")
        print(f"  - Usage tracking: {stats.usage_count if stats else 'N/A'}")
        
        # Test dependency tracking
        registry.add_dependency("model_b", ["model_a"])
        deps = registry.get_dependencies("model_b")
        print(f"  - Dependency tracking: {deps}")
        
        # Generate report
        report = registry.generate_usage_report()
        print(f"  - Report generation: {len(report)} items")
        
        # Test reset
        registry.reset_usage_stats("test_model")
        print(f"  - Reset functionality working")
        
    except Exception as e:
        print(f"\nβœ— Model Registry failed: {e}")

    # Test 12: Model Manager (without actual downloads)
    try:
        from models.model_manager import ModelManager, get_model_manager
        
        manager = get_model_manager()
        
        print(f"\nβœ“ Model Manager Test:")
        print(f"  - Singleton pattern working")
        print(f"  - Device: {manager.device}")
        print(f"  - Cache directory: {manager.cache_dir}")
        
        # Test metadata
        metadata = manager.metadata
        print(f"  - Metadata loaded: {len(metadata)} entries")
        
        # Test cache
        cache_size = manager.cache.size()
        print(f"  - Cache initialized: size {cache_size}")
        
        # Test model info check
        model_name = list(MODEL_REGISTRY.keys())[0] if MODEL_REGISTRY else "perplexity_reference_lm"
        is_downloaded = manager.is_model_downloaded(model_name)
        print(f"  - Model check: {model_name} downloaded={is_downloaded}")
        
        # Test memory usage
        memory_info = manager.get_memory_usage()
        print(f"  - Memory monitoring: {len(memory_info)} metrics")
        
        # Test model configuration access
        model_config = get_model_config(model_name)
        if model_config:
            print(f"  - Model config access: {model_config.model_id}")
        
    except Exception as e:
        print(f"\nβœ— Model Manager failed: {e}")

    # Test 13: Integration between models and config
    try:
        print(f"\nβœ“ Config-Models Integration Test:")
        
        # Check model config from registry
        for model_name, config in MODEL_REGISTRY.items():
            if config.required:
                print(f"  - {model_name}: {config.model_type.value}")
                break
        
        # Check settings integration
        print(f"  - Max cached models from settings: {settings.MAX_CACHED_MODELS}")
        print(f"  - Use quantization from settings: {settings.USE_QUANTIZATION}")
        
    except Exception as e:
        print(f"\nβœ— Config-Models integration failed: {e}")

    # Test 14: End-to-End System Integration
    try:
        print(f"\n" + "=" * 70)
        print("FULL SYSTEM INTEGRATION TEST")
        print("=" * 70)
        
        # Create a test scenario
        sample_text = """
        Machine learning is a subset of artificial intelligence. 
        It involves algorithms that learn patterns from data.
        Deep learning uses neural networks with multiple layers.
        """
        
        # 1. Process text
        from processors.text_processor import TextProcessor
        processor = TextProcessor()
        processed = processor.process(sample_text)
        
        print(f"βœ“ 1. Text Processing Complete:")
        print(f"   - Cleaned text: {len(processed.cleaned_text)} chars")
        print(f"   - Valid: {processed.is_valid}")
        
        # 2. Detect language
        from processors.language_detector import LanguageDetector
        detector = LanguageDetector(use_model=False)
        lang_result = detector.detect(processed.cleaned_text)
        
        print(f"\nβœ“ 2. Language Detection Complete:")
        print(f"   - Language: {lang_result.primary_language.value}")
        print(f"   - Script: {lang_result.script.value}")
        
        # 3. Domain classification structure
        from processors.domain_classifier import get_domain_name, is_technical_domain
        ai_ml_domain = Domain.AI_ML
        
        print(f"\nβœ“ 3. Domain System Ready:")
        print(f"   - Domain enum: {ai_ml_domain.value}")
        print(f"   - Human name: {get_domain_name(ai_ml_domain)}")
        print(f"   - Is technical: {is_technical_domain(ai_ml_domain)}")
        
        # 4. Model management
        from models.model_manager import get_model_manager
        from models.model_registry import get_model_registry
        
        model_manager = get_model_manager()
        model_registry = get_model_registry()
        
        print(f"\nβœ“ 4. Model Management Ready:")
        print(f"   - Manager: {type(model_manager).__name__}")
        print(f"   - Registry: {type(model_registry).__name__}")
        print(f"   - Cache dir exists: {model_manager.cache_dir.exists()}")
        
        # 5. Settings integration
        print(f"\nβœ“ 5. Settings Integration:")
        print(f"   - App: {settings.APP_NAME} v{settings.APP_VERSION}")
        print(f"   - Environment: {settings.ENVIRONMENT}")
        print(f"   - Debug: {settings.DEBUG}")
        
        print(f"\n🎯 FULL SYSTEM INTEGRATION SUCCESSFUL!")
        
    except Exception as e:
        print(f"\nβœ— Full system integration failed: {e}")
        import traceback
        print(traceback.format_exc())

    print("\n" + "=" * 70)
    print("TEST COMPLETED")
    print("=" * 70)

# Get the captured output
output_text = output_buffer.getvalue()

# Print the output
print(output_text)

# Count successes and failures
success_count = sum(1 for line in output_text.split('\n') if 'βœ“' in line)
failure_count = sum(1 for line in output_text.split('\n') if 'βœ—' in line)

print(f"Successes: {success_count}")
print(f"Failures: {failure_count}")

if failure_count == 0:
    print("\nπŸŽ‰ ALL TESTS PASSED! Complete system is properly integrated.")
else:
    print(f"\n⚠️  {failure_count} tests failed. Check the issues above.")