fix: revert original file extension
Browse files- docsifer/service.py +7 -1
- requirements.txt +5 -7
docsifer/service.py
CHANGED
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
import logging
|
| 6 |
import tempfile
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Optional, Dict, Tuple, Any
|
| 9 |
|
|
@@ -107,7 +108,12 @@ class DocsiferService:
|
|
| 107 |
|
| 108 |
# Use a temp directory so MarkItDown sees the real file extension
|
| 109 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
tmp_path.write_bytes(src.read_bytes())
|
| 112 |
|
| 113 |
# If it's HTML and cleanup is requested
|
|
|
|
| 4 |
|
| 5 |
import logging
|
| 6 |
import tempfile
|
| 7 |
+
import filetype
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Optional, Dict, Tuple, Any
|
| 10 |
|
|
|
|
| 108 |
|
| 109 |
# Use a temp directory so MarkItDown sees the real file extension
|
| 110 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 111 |
+
kind = filetype.guess(str(src))
|
| 112 |
+
if kind is None:
|
| 113 |
+
new_filename = src.name
|
| 114 |
+
else:
|
| 115 |
+
new_filename = f"{src.stem}.{kind.extension}"
|
| 116 |
+
tmp_path = Path(tmpdir) / new_filename
|
| 117 |
tmp_path.write_bytes(src.read_bytes())
|
| 118 |
|
| 119 |
# If it's HTML and cleanup is requested
|
requirements.txt
CHANGED
|
@@ -6,11 +6,9 @@ pydantic
|
|
| 6 |
cachetools
|
| 7 |
upstash_redis==1.2.0
|
| 8 |
markitdown
|
| 9 |
-
openai
|
| 10 |
-
pyquery
|
| 11 |
-
tiktoken
|
| 12 |
scuid
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
matplotlib
|
| 16 |
-
aiohttp
|
|
|
|
| 6 |
cachetools
|
| 7 |
upstash_redis==1.2.0
|
| 8 |
markitdown
|
| 9 |
+
openai==1.59.7
|
| 10 |
+
pyquery==2.0.1
|
| 11 |
+
tiktoken==0.8.0
|
| 12 |
scuid
|
| 13 |
+
aiohttp==3.11.11
|
| 14 |
+
filetype==1.2.0
|
|
|
|
|
|