Spaces:
Running
Running
Upload 3 files
Browse files- modules/latex2bbox_color.py +61 -25
- modules/latex_processor.py +71 -26
- modules/visual_matcher.py +53 -35
modules/latex2bbox_color.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
|
|
|
| 3 |
import json
|
| 4 |
import shutil
|
| 5 |
import logging
|
|
@@ -69,6 +70,33 @@ formular_template = r"""
|
|
| 69 |
\end{document}
|
| 70 |
"""
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def run_cmd(cmd, timeout_sec=30):
|
| 74 |
proc = subprocess.Popen(cmd, shell=True)
|
|
@@ -101,37 +129,41 @@ def crop_image(image_path, pad=8):
|
|
| 101 |
|
| 102 |
img = Image.open(image_path).convert("RGB").crop((x_min-pad, y_min-pad, x_max+pad, y_max+pad))
|
| 103 |
img.save(image_path)
|
| 104 |
-
|
| 105 |
def extrac_bbox_from_color_image(image_path, color_list):
|
| 106 |
-
img =
|
| 107 |
-
W, H = img.size
|
| 108 |
-
pixels = list(img.getdata())
|
| 109 |
-
|
| 110 |
bbox_list = []
|
| 111 |
for target_color in color_list:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
except:
|
| 123 |
bbox_list.append([])
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
img = img.convert("L")
|
| 127 |
img_bw = img.point(lambda x: 255 if x == 255 else 0, '1')
|
| 128 |
-
img_bw.convert("RGB").save(image_path)
|
| 129 |
return bbox_list
|
| 130 |
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
def latex2bbox_color(input_arg):
|
| 133 |
latex, basename, output_path, temp_dir, total_color_list = input_arg
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
output_bbox_path = os.path.join(output_path, 'bbox', basename+'.jsonl')
|
| 136 |
output_vis_path = os.path.join(output_path, 'vis', basename+'.png')
|
| 137 |
output_base_path = os.path.join(output_path, 'vis', basename+'_base.png')
|
|
@@ -140,6 +172,7 @@ def latex2bbox_color(input_arg):
|
|
| 140 |
return
|
| 141 |
|
| 142 |
try:
|
|
|
|
| 143 |
ret, new_latex = tokenize_latex(latex, middle_file=os.path.join(temp_dir, basename+'.txt'))
|
| 144 |
if not(ret and new_latex):
|
| 145 |
log = f"ERROR, Tokenize latex failed: {basename}."
|
|
@@ -164,7 +197,7 @@ def latex2bbox_color(input_arg):
|
|
| 164 |
paper_size = 4
|
| 165 |
else:
|
| 166 |
paper_size = 5
|
| 167 |
-
final_latex =
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
log = f"ERROR, Preprocess latex failed: {basename}; {e}."
|
|
@@ -198,18 +231,21 @@ def latex2bbox_color(input_arg):
|
|
| 198 |
vis = Image.open(output_base_path)
|
| 199 |
draw = ImageDraw.Draw(vis)
|
| 200 |
|
| 201 |
-
with open(output_bbox_path, 'w') as f:
|
| 202 |
for token, box in zip(token_list, bbox_list):
|
| 203 |
item = {
|
| 204 |
"bbox": box,
|
| 205 |
"token": token
|
| 206 |
}
|
| 207 |
-
f.write(json.dumps(item)+'\n')
|
| 208 |
|
| 209 |
if not box:
|
| 210 |
continue
|
| 211 |
x_min, y_min, x_max, y_max = box
|
| 212 |
draw.rectangle([x_min, y_min, x_max, y_max], fill=None, outline=(0,250,0), width=1)
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
vis.save(output_vis_path)
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
+
import cv2
|
| 4 |
import json
|
| 5 |
import shutil
|
| 6 |
import logging
|
|
|
|
| 70 |
\end{document}
|
| 71 |
"""
|
| 72 |
|
| 73 |
+
formular_template_zh = r"""
|
| 74 |
+
\documentclass[12pt]{article}
|
| 75 |
+
\usepackage[landscape]{geometry}
|
| 76 |
+
\usepackage{geometry}
|
| 77 |
+
\geometry{a<PaperSize>paper,scale=0.98}
|
| 78 |
+
\pagestyle{empty}
|
| 79 |
+
\usepackage{booktabs}
|
| 80 |
+
\usepackage{amsmath}
|
| 81 |
+
\usepackage{upgreek}
|
| 82 |
+
\usepackage{CJK}
|
| 83 |
+
\usepackage{amssymb}
|
| 84 |
+
\usepackage{xcolor}
|
| 85 |
+
\begin{document}
|
| 86 |
+
\makeatletter
|
| 87 |
+
\renewcommand*{\@textcolor}[3]{%%
|
| 88 |
+
\protect\leavevmode
|
| 89 |
+
\begingroup
|
| 90 |
+
\color#1{#2}#3%%
|
| 91 |
+
\endgroup
|
| 92 |
+
}
|
| 93 |
+
\makeatother
|
| 94 |
+
\begin{CJK}{UTF8}{gkai}
|
| 95 |
+
%s
|
| 96 |
+
\end{CJK}
|
| 97 |
+
\end{document}
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
|
| 101 |
def run_cmd(cmd, timeout_sec=30):
|
| 102 |
proc = subprocess.Popen(cmd, shell=True)
|
|
|
|
| 129 |
|
| 130 |
img = Image.open(image_path).convert("RGB").crop((x_min-pad, y_min-pad, x_max+pad, y_max+pad))
|
| 131 |
img.save(image_path)
|
| 132 |
+
|
| 133 |
def extrac_bbox_from_color_image(image_path, color_list):
|
| 134 |
+
img = cv2.imread(image_path)
|
|
|
|
|
|
|
|
|
|
| 135 |
bbox_list = []
|
| 136 |
for target_color in color_list:
|
| 137 |
+
r, g, b = target_color
|
| 138 |
+
target_rgb = np.array([b, g, r], dtype=np.uint8)
|
| 139 |
+
mask = np.all(img == target_rgb, axis=2)
|
| 140 |
+
coords = np.argwhere(mask)
|
| 141 |
+
if coords.size > 0:
|
| 142 |
+
x_min, y_min = coords[:, 1].min(), coords[:, 0].min()
|
| 143 |
+
x_max, y_max = coords[:, 1].max(), coords[:, 0].max()
|
| 144 |
+
bbox_list.append([int(x_min-1), int(y_min-1), int(x_max+1), int(y_max+1)])
|
| 145 |
+
else:
|
|
|
|
|
|
|
| 146 |
bbox_list.append([])
|
| 147 |
+
|
| 148 |
+
img = Image.open(image_path).convert("RGB").convert("L")
|
|
|
|
| 149 |
img_bw = img.point(lambda x: 255 if x == 255 else 0, '1')
|
| 150 |
+
img_bw.convert("RGB").save(image_path)
|
| 151 |
return bbox_list
|
| 152 |
|
| 153 |
+
def contains_chinese(text):
|
| 154 |
+
# 匹配中文字符的正则表达式范围
|
| 155 |
+
return re.search(r'[\u4e00-\u9fff]', text) is not None
|
| 156 |
|
| 157 |
def latex2bbox_color(input_arg):
|
| 158 |
latex, basename, output_path, temp_dir, total_color_list = input_arg
|
| 159 |
+
if "tabular" in latex:
|
| 160 |
+
template = tabular_template
|
| 161 |
+
else:
|
| 162 |
+
if contains_chinese(latex):
|
| 163 |
+
template = formular_template_zh
|
| 164 |
+
latex = latex.replace(",", ", ").replace(":", ": ").replace(";", "; ")
|
| 165 |
+
else:
|
| 166 |
+
template = formular_template
|
| 167 |
output_bbox_path = os.path.join(output_path, 'bbox', basename+'.jsonl')
|
| 168 |
output_vis_path = os.path.join(output_path, 'vis', basename+'.png')
|
| 169 |
output_base_path = os.path.join(output_path, 'vis', basename+'_base.png')
|
|
|
|
| 172 |
return
|
| 173 |
|
| 174 |
try:
|
| 175 |
+
latex = latex.replace("\n", " ")
|
| 176 |
ret, new_latex = tokenize_latex(latex, middle_file=os.path.join(temp_dir, basename+'.txt'))
|
| 177 |
if not(ret and new_latex):
|
| 178 |
log = f"ERROR, Tokenize latex failed: {basename}."
|
|
|
|
| 197 |
paper_size = 4
|
| 198 |
else:
|
| 199 |
paper_size = 5
|
| 200 |
+
final_latex = template.replace("<PaperSize>", str(paper_size)) % rgb_latex
|
| 201 |
|
| 202 |
except Exception as e:
|
| 203 |
log = f"ERROR, Preprocess latex failed: {basename}; {e}."
|
|
|
|
| 231 |
vis = Image.open(output_base_path)
|
| 232 |
draw = ImageDraw.Draw(vis)
|
| 233 |
|
| 234 |
+
with open(output_bbox_path, 'w', encoding='utf-8') as f:
|
| 235 |
for token, box in zip(token_list, bbox_list):
|
| 236 |
item = {
|
| 237 |
"bbox": box,
|
| 238 |
"token": token
|
| 239 |
}
|
| 240 |
+
f.write(json.dumps(item, ensure_ascii=False)+'\n')
|
| 241 |
|
| 242 |
if not box:
|
| 243 |
continue
|
| 244 |
x_min, y_min, x_max, y_max = box
|
| 245 |
draw.rectangle([x_min, y_min, x_max, y_max], fill=None, outline=(0,250,0), width=1)
|
| 246 |
+
try:
|
| 247 |
+
draw.text((x_min, y_min), token, (250,0,0))
|
| 248 |
+
except:
|
| 249 |
+
pass
|
| 250 |
|
| 251 |
vis.save(output_vis_path)
|
modules/latex_processor.py
CHANGED
|
@@ -8,14 +8,14 @@ from PIL import Image
|
|
| 8 |
|
| 9 |
|
| 10 |
SKIP_PATTERNS = [r'\{', r'\}', r'[\[\]]', r'\\begin\{.*?\}', r'\\end\{.*?\}', r'\^', r'\_', r'\\.*rule.*', r'\\.*line.*', r'\[[\-.0-9]+[epm][xtm]\]']
|
| 11 |
-
SKIP_Tokens = ['\\', '\\\\', '\\index', '\\a', '&', '$', '\\multirow', '\\def', '\\raggedright', '\\url', '\\cr', '\\ensuremath', '\\left', '\\right',
|
| 12 |
-
'\\mathchoice', '\\scriptstyle', '\\displaystyle', '\\qquad', '\\quad', '\\,', '\\!', '~', '\\boldmath']
|
| 13 |
-
PHANTOM_Tokens = ['\\fontfamily', '\\vphantom', '\\phantom', '\\rowcolor', '\\ref']
|
| 14 |
TWO_Tail_Tokens = ['\\frac', '\\binom']
|
| 15 |
AB_Tail_Tokens = ['\\xrightarrow', '\\xleftarrow', '\\sqrt'] # special token \xxx [] {}
|
| 16 |
TWO_Tail_Invisb_Tokens = ['\\overset', '\\underset', '\\stackrel']
|
| 17 |
ONE_Tail_Tokens = ['\\widetilde', '\\overline', '\\hat', '\\widehat', '\\tilde', '\\Tilde', '\\dot', '\\bar', '\\vec', '\\underline', '\\underbrace', '\\check',
|
| 18 |
-
'\\breve', '\\Bar', '\\Vec', '\\mathring', '\\ddot']
|
| 19 |
ONE_Tail_Invisb_Tokens = ['\\boldsymbol', '\\pmb', '\\textbf', '\\mathrm', '\\mathbf', '\\mathbb', '\\mathcal', '\\textmd', '\\texttt', '\\textnormal',
|
| 20 |
'\\text', '\\textit', '\\textup', '\\mathop', '\\mathbin', '\\smash', '\\operatorname', '\\textrm', '\\mathfrak', '\\emph',
|
| 21 |
'\\textsf', '\\textsc']
|
|
@@ -150,29 +150,74 @@ def normalize_latex(l, rm_trail=False):
|
|
| 150 |
for bef, aft in zip(old_token, new_token):
|
| 151 |
l = l.replace(bef, aft)
|
| 152 |
|
| 153 |
-
# TODO token such \not= should be one token
|
| 154 |
-
pattern = r'\\not [<>+=\-]'
|
| 155 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 156 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 157 |
-
for bef, aft in zip(old_token, new_token):
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
# TODO tokens such as \dots \exp \sinh, split them to parts, so the bbox match will be easier.
|
| 161 |
|
| 162 |
l = " "+l+" "
|
| 163 |
-
l =
|
| 164 |
-
l =
|
| 165 |
-
l =
|
| 166 |
-
l =
|
| 167 |
-
l =
|
| 168 |
-
l =
|
| 169 |
-
l =
|
| 170 |
-
l =
|
| 171 |
-
l =
|
| 172 |
-
l =
|
| 173 |
-
l =
|
| 174 |
-
l =
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
# ** token such as \big( should be one token
|
| 177 |
pattern = r'\\[Bb]ig[g]?[glrm]? [(){}|\[\]] '
|
| 178 |
old_token = re.findall(pattern, l, re.DOTALL)
|
|
@@ -235,12 +280,12 @@ def normalize_latex(l, rm_trail=False):
|
|
| 235 |
for bef, aft in zip(old_token, new_token):
|
| 236 |
l = l.replace(bef, "{ "+aft[1:-1]+" }")
|
| 237 |
|
| 238 |
-
# ** \
|
| 239 |
-
pattern = r'\\
|
| 240 |
old_token = re.findall(pattern, l, re.DOTALL)
|
| 241 |
new_token = [item.replace(" ", "") for item in old_token]
|
| 242 |
for bef, aft in zip(old_token, new_token):
|
| 243 |
-
l = l.replace(bef, aft
|
| 244 |
|
| 245 |
# ** \specialrule{1pt}{2pt}{2pt}, special lines, shoudle be combined as one token
|
| 246 |
pattern = r'\\specialrule {[ .0-9a-z]+} {[ .0-9a-z]+} {[ .0-9a-z]+}'
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
SKIP_PATTERNS = [r'\{', r'\}', r'[\[\]]', r'\\begin\{.*?\}', r'\\end\{.*?\}', r'\^', r'\_', r'\\.*rule.*', r'\\.*line.*', r'\[[\-.0-9]+[epm][xtm]\]']
|
| 11 |
+
SKIP_Tokens = ['\\', '\\\\', '\\index', '\\a', '&', '$', '\\multirow', '\\def', '\\edef', '\\raggedright', '\\url', '\\cr', '\\ensuremath', '\\left', '\\right',
|
| 12 |
+
'\\mathchoice', '\\scriptstyle', '\\displaystyle', '\\qquad', '\\quad', '\\,', '\\!', '~', '\\boldmath', '\\gdef', '\\today', '\\the']
|
| 13 |
+
PHANTOM_Tokens = ['\\fontfamily', '\\vphantom', '\\phantom', '\\rowcolor', '\\ref', '\\thesubequation', '\\global', '\\theboldgroup']
|
| 14 |
TWO_Tail_Tokens = ['\\frac', '\\binom']
|
| 15 |
AB_Tail_Tokens = ['\\xrightarrow', '\\xleftarrow', '\\sqrt'] # special token \xxx [] {}
|
| 16 |
TWO_Tail_Invisb_Tokens = ['\\overset', '\\underset', '\\stackrel']
|
| 17 |
ONE_Tail_Tokens = ['\\widetilde', '\\overline', '\\hat', '\\widehat', '\\tilde', '\\Tilde', '\\dot', '\\bar', '\\vec', '\\underline', '\\underbrace', '\\check',
|
| 18 |
+
'\\breve', '\\Bar', '\\Vec', '\\mathring', '\\ddot', '\\Ddot', '\\dddot', '\\ddddot']
|
| 19 |
ONE_Tail_Invisb_Tokens = ['\\boldsymbol', '\\pmb', '\\textbf', '\\mathrm', '\\mathbf', '\\mathbb', '\\mathcal', '\\textmd', '\\texttt', '\\textnormal',
|
| 20 |
'\\text', '\\textit', '\\textup', '\\mathop', '\\mathbin', '\\smash', '\\operatorname', '\\textrm', '\\mathfrak', '\\emph',
|
| 21 |
'\\textsf', '\\textsc']
|
|
|
|
| 150 |
for bef, aft in zip(old_token, new_token):
|
| 151 |
l = l.replace(bef, aft)
|
| 152 |
|
| 153 |
+
# # TODO token such \not= should be one token
|
| 154 |
+
# pattern = r'\\not [<>+=\-]'
|
| 155 |
+
# old_token = re.findall(pattern, l, re.DOTALL)
|
| 156 |
+
# new_token = [item.replace(" ", "") for item in old_token]
|
| 157 |
+
# for bef, aft in zip(old_token, new_token):
|
| 158 |
+
# l = l.replace(bef, aft)
|
| 159 |
+
|
| 160 |
+
# # TODO \not xx shoudle be combined as one token
|
| 161 |
+
# pattern = r'\\not [\\=\<\>][^ ]+ '
|
| 162 |
+
# old_token = re.findall(pattern, l, re.DOTALL)
|
| 163 |
+
# new_token = [item.replace(" ", "") for item in old_token]
|
| 164 |
+
# for bef, aft in zip(old_token, new_token):
|
| 165 |
+
# l = l.replace(bef, aft+" ")
|
| 166 |
|
| 167 |
# TODO tokens such as \dots \exp \sinh, split them to parts, so the bbox match will be easier.
|
| 168 |
|
| 169 |
l = " "+l+" "
|
| 170 |
+
l = re.sub(r'(?<=\s)--(?=\s)', r'- -', l)
|
| 171 |
+
l = re.sub(r'(?<=\s)---(?=\s)', r'- - -', l)
|
| 172 |
+
l = re.sub(r'(?<=\s)…(?=\s)', r'. . .', l)
|
| 173 |
+
l = re.sub(r'(?<=\s)\\ldots(?=\s)', r'. . .', l)
|
| 174 |
+
l = re.sub(r'(?<=\s)\\hdots(?=\s)', r'. . .', l)
|
| 175 |
+
l = re.sub(r'(?<=\s)\\cdots(?=\s)', r'. . .', l)
|
| 176 |
+
l = re.sub(r'(?<=\s)\\dddot(?=\s)', r'. . .', l)
|
| 177 |
+
l = re.sub(r'(?<=\s)\\dots(?=\s)', r'. . .', l)
|
| 178 |
+
l = re.sub(r'(?<=\s)\\dotsc(?=\s)', r'. . .', l)
|
| 179 |
+
l = re.sub(r'(?<=\s)\\dotsi(?=\s)', r'. . .', l)
|
| 180 |
+
l = re.sub(r'(?<=\s)\\dotsm(?=\s)', r'. . .', l)
|
| 181 |
+
l = re.sub(r'(?<=\s)\\dotso(?=\s)', r'. . .', l)
|
| 182 |
+
l = re.sub(r'(?<=\s)\\dotsb(?=\s)', r'. . .', l)
|
| 183 |
+
l = re.sub(r'(?<=\s)\\mathellipsis(?=\s)', r'. . .', l)
|
| 184 |
+
l = re.sub(r'(?<=\s)\\ex(?=\s)', r'\\mathrm { e x }', l)
|
| 185 |
+
l = re.sub(r'(?<=\s)\\ln(?=\s)', r'\\mathrm { l n }', l)
|
| 186 |
+
l = re.sub(r'(?<=\s)\\lg(?=\s)', r'\\mathrm { l g }', l)
|
| 187 |
+
l = re.sub(r'(?<=\s)\\cot(?=\s)', r'\\mathrm { c o t }', l)
|
| 188 |
+
l = re.sub(r'(?<=\s)\\mod(?=\s)', r'\\mathrm { m o d }', l)
|
| 189 |
+
l = re.sub(r'(?<=\s)\\bmod(?=\s)', r'\\mathrm { m o d }', l)
|
| 190 |
+
l = re.sub(r'(?<=\s)\\pmod(?=\s)', r'\\mathrm { m o d }', l) # \pmod 其实和mod不一样,但是不太好处理,暂时替换为\mod
|
| 191 |
+
l = re.sub(r'(?<=\s)\\min(?=\s)', r'\\mathrm { m i n }', l)
|
| 192 |
+
l = re.sub(r'(?<=\s)\\max(?=\s)', r'\\mathrm { m a x }', l)
|
| 193 |
+
l = re.sub(r'(?<=\s)\\ker(?=\s)', r'\\mathrm { k e r }', l)
|
| 194 |
+
l = re.sub(r'(?<=\s)\\hom(?=\s)', r'\\mathrm { h o m }', l)
|
| 195 |
+
l = re.sub(r'(?<=\s)\\sec(?=\s)', r'\\mathrm { s e c }', l)
|
| 196 |
+
l = re.sub(r'(?<=\s)\\scs(?=\s)', r'\\mathrm { s c s }', l)
|
| 197 |
+
l = re.sub(r'(?<=\s)\\csc(?=\s)', r'\\mathrm { c s c }', l)
|
| 198 |
+
l = re.sub(r'(?<=\s)\\deg(?=\s)', r'\\mathrm { d e g }', l)
|
| 199 |
+
l = re.sub(r'(?<=\s)\\arg(?=\s)', r'\\mathrm { a r g }', l)
|
| 200 |
+
l = re.sub(r'(?<=\s)\\log(?=\s)', r'\\mathrm { l o g }', l)
|
| 201 |
+
l = re.sub(r'(?<=\s)\\dim(?=\s)', r'\\mathrm { d i m }', l)
|
| 202 |
+
l = re.sub(r'(?<=\s)\\exp(?=\s)', r'\\mathrm { e x p }', l)
|
| 203 |
+
l = re.sub(r'(?<=\s)\\sin(?=\s)', r'\\mathrm { s i n }', l)
|
| 204 |
+
l = re.sub(r'(?<=\s)\\cos(?=\s)', r'\\mathrm { c o s }', l)
|
| 205 |
+
l = re.sub(r'(?<=\s)\\tan(?=\s)', r'\\mathrm { t a n }', l)
|
| 206 |
+
l = re.sub(r'(?<=\s)\\tanh(?=\s)', r'\\mathrm { t a n h }', l)
|
| 207 |
+
l = re.sub(r'(?<=\s)\\cosh(?=\s)', r'\\mathrm { c o s h }', l)
|
| 208 |
+
l = re.sub(r'(?<=\s)\\sinh(?=\s)', r'\\mathrm { s i n h }', l)
|
| 209 |
+
l = re.sub(r'(?<=\s)\\coth(?=\s)', r'\\mathrm { c o t h }', l)
|
| 210 |
+
l = re.sub(r'(?<=\s)\\arcsin(?=\s)', r'\\mathrm { a r c s i n }', l)
|
| 211 |
+
l = re.sub(r'(?<=\s)\\arccos(?=\s)', r'\\mathrm { a r c c o s }', l)
|
| 212 |
+
l = re.sub(r'(?<=\s)\\arctan(?=\s)', r'\\mathrm { a r c t a n }', l)
|
| 213 |
+
|
| 214 |
+
# ** token such as \string xxx should be one token
|
| 215 |
+
pattern = r'\\string [^ ]+ '
|
| 216 |
+
old_token = re.findall(pattern, l, re.DOTALL)
|
| 217 |
+
new_token = [item.replace(" ", "") for item in old_token]
|
| 218 |
+
for bef, aft in zip(old_token, new_token):
|
| 219 |
+
l = l.replace(bef, aft+" ")
|
| 220 |
+
|
| 221 |
# ** token such as \big( should be one token
|
| 222 |
pattern = r'\\[Bb]ig[g]?[glrm]? [(){}|\[\]] '
|
| 223 |
old_token = re.findall(pattern, l, re.DOTALL)
|
|
|
|
| 280 |
for bef, aft in zip(old_token, new_token):
|
| 281 |
l = l.replace(bef, "{ "+aft[1:-1]+" }")
|
| 282 |
|
| 283 |
+
# ** \rule{1pt}{2pt} lines, shoudle be combined as one token and do not render
|
| 284 |
+
pattern = r'\\rule {[ .0-9a-z]+} {[ .0-9a-z]+}'
|
| 285 |
old_token = re.findall(pattern, l, re.DOTALL)
|
| 286 |
new_token = [item.replace(" ", "") for item in old_token]
|
| 287 |
for bef, aft in zip(old_token, new_token):
|
| 288 |
+
l = l.replace(bef, aft)
|
| 289 |
|
| 290 |
# ** \specialrule{1pt}{2pt}{2pt}, special lines, shoudle be combined as one token
|
| 291 |
pattern = r'\\specialrule {[ .0-9a-z]+} {[ .0-9a-z]+} {[ .0-9a-z]+}'
|
modules/visual_matcher.py
CHANGED
|
@@ -42,41 +42,67 @@ def norm_coords(x, left, right):
|
|
| 42 |
|
| 43 |
def norm_same_token(token):
|
| 44 |
special_map = {
|
|
|
|
|
|
|
| 45 |
"\\cdot": ".",
|
|
|
|
|
|
|
| 46 |
"\\mid": "|",
|
| 47 |
-
"\\
|
| 48 |
"\\top": "T",
|
| 49 |
"\\Tilde": "\\tilde",
|
| 50 |
-
"\\cdots": "\\dots",
|
| 51 |
"\\prime": "'",
|
| 52 |
"\\ast": "*",
|
| 53 |
"\\left<": "\\langle",
|
| 54 |
-
"\\right>": "\\rangle"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
-
if token in special_map.keys():
|
| 57 |
-
token = special_map[token]
|
| 58 |
if token.startswith('\\left') or token.startswith('\\right'):
|
| 59 |
-
token
|
|
|
|
| 60 |
if token.startswith('\\big') or token.startswith('\\Big'):
|
| 61 |
if "\\" in token[4:]:
|
| 62 |
token = "\\"+token[4:].split("\\")[-1]
|
| 63 |
else:
|
| 64 |
token = token[-1]
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
return token[0:-1]
|
| 68 |
-
if token in ['\\lVert', '\\rVert', '\\Vert']:
|
| 69 |
-
return '\\|'
|
| 70 |
-
if token in ['\\lvert', '\\rvert', '\\vert']:
|
| 71 |
-
return '|'
|
| 72 |
-
if token.endswith("rightarrow"):
|
| 73 |
-
return "\\rightarrow"
|
| 74 |
-
if token.endswith("leftarrow"):
|
| 75 |
-
return "\\leftarrow"
|
| 76 |
if token.startswith('\\wide'):
|
| 77 |
return token.replace("wide", "")
|
| 78 |
if token.startswith('\\var'):
|
| 79 |
-
return token.replace("
|
|
|
|
|
|
|
| 80 |
return token
|
| 81 |
|
| 82 |
|
|
@@ -91,18 +117,6 @@ class HungarianMatcher:
|
|
| 91 |
self.cost_position = cost_position
|
| 92 |
self.cost_order = cost_order
|
| 93 |
self.cost = {}
|
| 94 |
-
|
| 95 |
-
def calculate_token_cost_old(self, box_gt, box_pred):
|
| 96 |
-
token_cost = np.ones((len(box_gt), len(box_pred)))
|
| 97 |
-
for i in range(token_cost.shape[0]):
|
| 98 |
-
box1 = box_gt[i]
|
| 99 |
-
for j in range(token_cost.shape[1]):
|
| 100 |
-
box2 = box_pred[j]
|
| 101 |
-
if box1['token'] == box2['token']:
|
| 102 |
-
token_cost[i, j] = 0
|
| 103 |
-
elif norm_same_token(box1['token']) == norm_same_token(box2['token']):
|
| 104 |
-
token_cost[i, j] = 0.05
|
| 105 |
-
return np.array(token_cost)
|
| 106 |
|
| 107 |
def calculate_token_cost(self, box_gt, box_pred):
|
| 108 |
token2id = {}
|
|
@@ -143,7 +157,7 @@ class HungarianMatcher:
|
|
| 143 |
token_cost = 1.0 - pred_token_logits[:, gt_token_array]
|
| 144 |
norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array]
|
| 145 |
|
| 146 |
-
token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.
|
| 147 |
return token_cost.T
|
| 148 |
|
| 149 |
|
|
@@ -155,10 +169,12 @@ class HungarianMatcher:
|
|
| 155 |
box_array.append([x_min/W, y_min/H, x_max/W, y_max/H])
|
| 156 |
return np.array(box_array)
|
| 157 |
|
| 158 |
-
def order2array(self, box_list):
|
|
|
|
|
|
|
| 159 |
order_array = []
|
| 160 |
for idx, box in enumerate(box_list):
|
| 161 |
-
order_array.append([idx /
|
| 162 |
return np.array(order_array)
|
| 163 |
|
| 164 |
def calculate_l1_cost(self, gt_array, pred_array):
|
|
@@ -170,8 +186,10 @@ class HungarianMatcher:
|
|
| 170 |
aa = time.time()
|
| 171 |
gt_box_array = self.box2array(box_gt, gt_size)
|
| 172 |
pred_box_array = self.box2array(box_pred, pred_size)
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
| 175 |
|
| 176 |
token_cost = self.calculate_token_cost(box_gt, box_pred)
|
| 177 |
position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array)
|
|
|
|
| 42 |
|
| 43 |
def norm_same_token(token):
|
| 44 |
special_map = {
|
| 45 |
+
"\\dot": ".",
|
| 46 |
+
"\\Dot": ".",
|
| 47 |
"\\cdot": ".",
|
| 48 |
+
"\\cdotp": ".",
|
| 49 |
+
"\\ldotp": ".",
|
| 50 |
"\\mid": "|",
|
| 51 |
+
"\\rightarrow": "\\to",
|
| 52 |
"\\top": "T",
|
| 53 |
"\\Tilde": "\\tilde",
|
|
|
|
| 54 |
"\\prime": "'",
|
| 55 |
"\\ast": "*",
|
| 56 |
"\\left<": "\\langle",
|
| 57 |
+
"\\right>": "\\rangle",
|
| 58 |
+
"\\lbrace": "\{",
|
| 59 |
+
"\\rbrace": "\}",
|
| 60 |
+
"\\lbrack": "[",
|
| 61 |
+
"\\rbrack": "]",
|
| 62 |
+
"\\blackslash": "/",
|
| 63 |
+
"\\slash": "/",
|
| 64 |
+
"\\leq": "\\le",
|
| 65 |
+
"\\geq": "\\ge",
|
| 66 |
+
"\\neq": "\\ne",
|
| 67 |
+
"\\Vert": "\\|",
|
| 68 |
+
"\\lVert": "\\|",
|
| 69 |
+
"\\rVert": "\\|",
|
| 70 |
+
"\\vert": "|",
|
| 71 |
+
"\\lvert": "|",
|
| 72 |
+
"\\rvert": "|",
|
| 73 |
+
"\\colon": ":",
|
| 74 |
+
"\\Ddot": "\\ddot",
|
| 75 |
+
"\\Bar": "\\bar",
|
| 76 |
+
"\\Vec": "\\vec",
|
| 77 |
+
"\\parallel": "\\|",
|
| 78 |
+
"\\dag": "\\dagger",
|
| 79 |
+
"\\ddag": "\\ddagger",
|
| 80 |
+
"\\textlangle": "<",
|
| 81 |
+
"\\textrangle": ">",
|
| 82 |
+
"\\textgreater": ">",
|
| 83 |
+
"\\textless": "<",
|
| 84 |
+
"\\textbackslash": "n",
|
| 85 |
+
"\\textunderscore": "_",
|
| 86 |
+
"\\=": "_",
|
| 87 |
+
"\\neg": "\\lnot",
|
| 88 |
+
"\\neq": "\\not=",
|
| 89 |
}
|
|
|
|
|
|
|
| 90 |
if token.startswith('\\left') or token.startswith('\\right'):
|
| 91 |
+
if "arrow" not in token and "<" not in token and ">" not in token and "harpoon" not in token:
|
| 92 |
+
token = token.replace("\\left", "").replace("\\right", "")
|
| 93 |
if token.startswith('\\big') or token.startswith('\\Big'):
|
| 94 |
if "\\" in token[4:]:
|
| 95 |
token = "\\"+token[4:].split("\\")[-1]
|
| 96 |
else:
|
| 97 |
token = token[-1]
|
| 98 |
+
if token in special_map.keys():
|
| 99 |
+
token = special_map[token]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
if token.startswith('\\wide'):
|
| 101 |
return token.replace("wide", "")
|
| 102 |
if token.startswith('\\var'):
|
| 103 |
+
return token.replace("var", "")
|
| 104 |
+
if token.startswith('\\string'):
|
| 105 |
+
return token.replace("\\string", "")
|
| 106 |
return token
|
| 107 |
|
| 108 |
|
|
|
|
| 117 |
self.cost_position = cost_position
|
| 118 |
self.cost_order = cost_order
|
| 119 |
self.cost = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
def calculate_token_cost(self, box_gt, box_pred):
|
| 122 |
token2id = {}
|
|
|
|
| 157 |
token_cost = 1.0 - pred_token_logits[:, gt_token_array]
|
| 158 |
norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array]
|
| 159 |
|
| 160 |
+
token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.005
|
| 161 |
return token_cost.T
|
| 162 |
|
| 163 |
|
|
|
|
| 169 |
box_array.append([x_min/W, y_min/H, x_max/W, y_max/H])
|
| 170 |
return np.array(box_array)
|
| 171 |
|
| 172 |
+
def order2array(self, box_list, max_token_lens=None):
|
| 173 |
+
if not max_token_lens:
|
| 174 |
+
max_token_lens = len(box_list)
|
| 175 |
order_array = []
|
| 176 |
for idx, box in enumerate(box_list):
|
| 177 |
+
order_array.append([idx / max_token_lens])
|
| 178 |
return np.array(order_array)
|
| 179 |
|
| 180 |
def calculate_l1_cost(self, gt_array, pred_array):
|
|
|
|
| 186 |
aa = time.time()
|
| 187 |
gt_box_array = self.box2array(box_gt, gt_size)
|
| 188 |
pred_box_array = self.box2array(box_pred, pred_size)
|
| 189 |
+
|
| 190 |
+
max_token_lens = max(len(box_gt), len(box_pred))
|
| 191 |
+
gt_order_array = self.order2array(box_gt, max_token_lens)
|
| 192 |
+
pred_order_array = self.order2array(box_pred, max_token_lens)
|
| 193 |
|
| 194 |
token_cost = self.calculate_token_cost(box_gt, box_pred)
|
| 195 |
position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array)
|