Till Fischer
commited on
Commit
·
240324f
1
Parent(s):
cac53d2
Fix NLTK punkt setup and remove redundant downloads
Browse files- analyze_aspects.py +6 -5
analyze_aspects.py
CHANGED
|
@@ -12,6 +12,11 @@ import nltk
|
|
| 12 |
from transformers import pipeline
|
| 13 |
from collections import defaultdict
|
| 14 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
|
| 17 |
output_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -118,11 +123,7 @@ def analyze_quickwin(db_path: Path, isbn: str, device: int = -1, languages: list
|
|
| 118 |
continue
|
| 119 |
|
| 120 |
logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
|
| 121 |
-
|
| 122 |
-
nltk.download('punkt')
|
| 123 |
-
nltk.data.path.append("/home/user/nltk_data")
|
| 124 |
-
os.environ["NLTK_DATA"] = "/home/user/nltk_data"
|
| 125 |
-
|
| 126 |
lang_map = {'de': 'german', 'en': 'english'}
|
| 127 |
sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
|
| 128 |
|
|
|
|
| 12 |
from transformers import pipeline
|
| 13 |
from collections import defaultdict
|
| 14 |
import matplotlib.pyplot as plt
|
| 15 |
+
import os
|
| 16 |
+
|
| 17 |
+
nltk.download('punkt', download_dir='/home/user/nltk_data')
|
| 18 |
+
nltk.data.path.append('/home/user/nltk_data')
|
| 19 |
+
os.environ['NLTK_DATA'] = '/home/user/nltk_data'
|
| 20 |
|
| 21 |
def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
|
| 22 |
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 123 |
continue
|
| 124 |
|
| 125 |
logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
|
| 126 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
lang_map = {'de': 'german', 'en': 'english'}
|
| 128 |
sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
|
| 129 |
|