Spaces:
Running
Running
| /*** MAIN FUNCTION ***/ | |
| function camxes_postprocessing(parse_tree) { | |
| /* Pruning morphology nodes. */ | |
| parse_tree = remove_morphology(parse_tree); | |
| parse_tree = remove_spaces(parse_tree); | |
| /* Removing every nodes except for those in the following whitelist. */ | |
| var wanted_nodes = ["text", "dot_star", "sentence", "selbri", "sumti", "prenex"]; | |
| // We included "text" so the outermost array also begins with a node name. | |
| parse_tree = simplify_parse_tree(parse_tree, wanted_nodes); | |
| /* Building and retreiving the marking game string. */ | |
| return marking_game_format(parse_tree); | |
| } | |
| function remove_spaces(tree) { | |
| if (tree.length > 0 && tree[0] == "spaces") return null; | |
| var i = 0; | |
| while (i < tree.length) { | |
| if (is_array(tree[i])) { | |
| tree[i] = remove_spaces(tree[i]); | |
| if (tree[i] === null) tree.splice(i, 1); | |
| } | |
| i++; | |
| } | |
| return tree; | |
| } | |
| function simplify_parse_tree(pt, nl) { | |
| if (is_string(pt)) return pt; | |
| if (!is_array(pt)) throw "ERROR"; | |
| if (pt.length == 0) return null; | |
| var no_label = is_array(pt[0]); | |
| var i = no_label ? 0 : 1; | |
| while (i < pt.length) { | |
| if (is_array(pt[i])) { | |
| var v = simplify_parse_tree(pt[i], nl); | |
| pt = pt.slice(0, i).concat(v, pt.slice(i + 1)); | |
| if (v == []) i--; | |
| } | |
| i++; | |
| } | |
| if (no_label) return pt; | |
| else if (among(pt[0], nl)) return [pt]; | |
| else if (pt.length == 1) return []; | |
| else return pt.slice(1); | |
| } | |
| function marking_game_format(pt) { | |
| var s = "" // Output string | |
| var i = 0; // String index | |
| var b = ""; // Bracket pair | |
| while (i < pt.length) { | |
| if (is_string(pt[i])) { | |
| if (i == 0) { | |
| b = bracket_from_nodename(pt[i]); | |
| } else { | |
| if (s != "") s += " "; | |
| s += pt[i]; | |
| } | |
| } else if (is_array(pt[i])) { | |
| if (s != "") s += " "; | |
| s += marking_game_format(pt[i]); | |
| } | |
| i++; | |
| } | |
| if (b.length >= 2) | |
| s = b[0] + s + b[1]; | |
| if (s.length > 0 && s[s.length - 1] == ' ') | |
| s = s.substring(0, s.length - 1); | |
| return s; | |
| } | |
| function bracket_from_nodename(nodename) { | |
| switch (nodename) { | |
| case "prenex": return "⟦⟧"; | |
| case "sentence": return "{}"; | |
| case "sumti": return "[]"; | |
| case "selbri": return "<>"; | |
| default: return ""; | |
| } | |
| } | |
| // ====== MORPHOLOGY REMOVAL ====== // | |
| /* | |
| * remove_morphology(parse_tree) | |
| * | |
| * This function takes a parse tree, and joins the expressions of the following | |
| * nodes: | |
| * "cmevla", "gismu_2", "lujvo", "fuhivla", "spaces" | |
| * as well as any selmaho node (e.g. "KOhA"). | |
| * | |
| * (This is essentially a copy of process_parse_tree.js.) | |
| */ | |
| function remove_morphology(pt) { | |
| if (pt.length < 1) return []; | |
| var i; | |
| /* Sometimes nodes have no label and have instead an array as their first | |
| element. */ | |
| if (is_array(pt[0])) i = 0; | |
| else { // The first element is a label (node name). | |
| // Let's check if this node is a candidate for our pruning. | |
| if (is_target_node(pt)) { | |
| /* We join recursively all the terminal elements (letters) in this | |
| * node and its child nodes, and put the resulting string in the #1 | |
| * slot of the array; afterwards we delete all the remaining elements | |
| * (their terminal values have been concatenated into pt[1]). */ | |
| pt[1] = join_expr(pt); | |
| // If pt[1] contains an empty string, let's delete it as well: | |
| pt.splice((pt[1] == "") ? 1 : 2); | |
| return pt; | |
| } | |
| i = 1; | |
| } | |
| /* If we've reached here, then this node is not a target for pruning, so let's | |
| do recursion into its child nodes. */ | |
| while (i < pt.length) { | |
| remove_morphology(pt[i]); | |
| i++; | |
| } | |
| return pt; | |
| } | |
| /* Checks whether the argument node is a target for pruning. */ | |
| function is_target_node(n) { | |
| return (among(n[0], ["cmevla", "gismu", "lujvo", "fuhivla", "initial_spaces", "ga_clause", "gu_clause"]) | |
| || is_selmaho(n[0])); | |
| } | |
| /* This function returns the string resulting from the recursive concatenation of | |
| * all the leaf elements of the parse tree argument (except node names). */ | |
| function join_expr(n) { | |
| if (n.length < 1) return ""; | |
| var s = ""; | |
| var i = is_array(n[0]) ? 0 : 1; | |
| while (i < n.length) { | |
| s += is_string(n[i]) ? n[i] : join_expr(n[i]); | |
| i++; | |
| } | |
| return s; | |
| } | |
| function among(v, s) { | |
| var i = 0; | |
| while (i < s.length) if (s[i++] == v) return true; | |
| return false; | |
| } | |
| function is_selmaho(v) { | |
| if (!is_string(v)) return false; | |
| return (0 == v.search(/^[IUBCDFGJKLMNPRSTVXZ]?([AEIOUY]|(AI|EI|OI|AU))(h([AEIOUY]|(AI|EI|OI|AU)))*$/g)); | |
| } | |
| /* ================== */ | |
| /* === Routines === */ | |
| /* ================== */ | |
| function prettify_brackets(str) { | |
| var open_brackets = ["(", "[", "{", "<"]; | |
| var close_brackets = [")", "]", "}", ">"]; | |
| var brackets_number = 4; | |
| // var numset = ['0','1','2','3','4','5','6','7','8','9']; | |
| var numset = ['\u2070','\u00b9','\u00b2','\u00b3','\u2074', | |
| '\u2075','\u2076','\u2077','\u2078','\u2079']; | |
| var i = 0; | |
| var floor = 0; | |
| while (i < str.length) { | |
| if (str[i] == '[') { | |
| var n = floor % brackets_number; | |
| var num = (floor && !n) ? | |
| str_print_uint(floor / brackets_number, numset) : ""; | |
| str = str_replace(str, i, 1, open_brackets[n] + num); | |
| floor++; | |
| } else if (str[i] == ']') { | |
| floor--; | |
| var n = floor % brackets_number; | |
| var num = (floor && !n) ? | |
| str_print_uint(floor / brackets_number, numset) : ""; | |
| str = str_replace(str, i, 1, num + close_brackets[n]); | |
| } | |
| i++; | |
| } | |
| return str; | |
| } | |
| function str_print_uint(val, charset) { | |
| // 'charset' must be a character array. | |
| var radix = charset.length; | |
| var str = ""; | |
| val -= val % 1; // No float allowed | |
| while (val >= 1) { | |
| str = charset[val % radix] + str; | |
| val /= radix; | |
| val -= val % 1; | |
| } | |
| return str; | |
| } | |
| function str_replace(str, pos, len, sub) { | |
| if (pos < str.length) { | |
| if (pos + len >= str.length) len -= pos + len - str.length; | |
| return str.substring(0, pos) + sub + str.substring(pos + len); | |
| } else return str; | |
| } | |
| function is_string(v) { | |
| return Object.prototype.toString.call(v) === '[object String]'; | |
| } | |
| function is_array(v) { | |
| return Object.prototype.toString.call(v) === '[object Array]'; | |
| } | |
| if (typeof module !== 'undefined') | |
| module.exports.postprocessing = camxes_postprocessing; | |