lojban-camxes-js / marking_game /marking_game_postproc.js
Pendrokar's picture
repo clone
4afcdfb
/*** MAIN FUNCTION ***/
function camxes_postprocessing(parse_tree) {
/* Pruning morphology nodes. */
parse_tree = remove_morphology(parse_tree);
parse_tree = remove_spaces(parse_tree);
/* Removing every nodes except for those in the following whitelist. */
var wanted_nodes = ["text", "dot_star", "sentence", "selbri", "sumti", "prenex"];
// We included "text" so the outermost array also begins with a node name.
parse_tree = simplify_parse_tree(parse_tree, wanted_nodes);
/* Building and retreiving the marking game string. */
return marking_game_format(parse_tree);
}
function remove_spaces(tree) {
if (tree.length > 0 && tree[0] == "spaces") return null;
var i = 0;
while (i < tree.length) {
if (is_array(tree[i])) {
tree[i] = remove_spaces(tree[i]);
if (tree[i] === null) tree.splice(i, 1);
}
i++;
}
return tree;
}
function simplify_parse_tree(pt, nl) {
if (is_string(pt)) return pt;
if (!is_array(pt)) throw "ERROR";
if (pt.length == 0) return null;
var no_label = is_array(pt[0]);
var i = no_label ? 0 : 1;
while (i < pt.length) {
if (is_array(pt[i])) {
var v = simplify_parse_tree(pt[i], nl);
pt = pt.slice(0, i).concat(v, pt.slice(i + 1));
if (v == []) i--;
}
i++;
}
if (no_label) return pt;
else if (among(pt[0], nl)) return [pt];
else if (pt.length == 1) return [];
else return pt.slice(1);
}
function marking_game_format(pt) {
var s = "" // Output string
var i = 0; // String index
var b = ""; // Bracket pair
while (i < pt.length) {
if (is_string(pt[i])) {
if (i == 0) {
b = bracket_from_nodename(pt[i]);
} else {
if (s != "") s += " ";
s += pt[i];
}
} else if (is_array(pt[i])) {
if (s != "") s += " ";
s += marking_game_format(pt[i]);
}
i++;
}
if (b.length >= 2)
s = b[0] + s + b[1];
if (s.length > 0 && s[s.length - 1] == ' ')
s = s.substring(0, s.length - 1);
return s;
}
function bracket_from_nodename(nodename) {
switch (nodename) {
case "prenex": return "⟦⟧";
case "sentence": return "{}";
case "sumti": return "[]";
case "selbri": return "<>";
default: return "";
}
}
// ====== MORPHOLOGY REMOVAL ====== //
/*
* remove_morphology(parse_tree)
*
* This function takes a parse tree, and joins the expressions of the following
* nodes:
* "cmevla", "gismu_2", "lujvo", "fuhivla", "spaces"
* as well as any selmaho node (e.g. "KOhA").
*
* (This is essentially a copy of process_parse_tree.js.)
*/
function remove_morphology(pt) {
if (pt.length < 1) return [];
var i;
/* Sometimes nodes have no label and have instead an array as their first
element. */
if (is_array(pt[0])) i = 0;
else { // The first element is a label (node name).
// Let's check if this node is a candidate for our pruning.
if (is_target_node(pt)) {
/* We join recursively all the terminal elements (letters) in this
* node and its child nodes, and put the resulting string in the #1
* slot of the array; afterwards we delete all the remaining elements
* (their terminal values have been concatenated into pt[1]). */
pt[1] = join_expr(pt);
// If pt[1] contains an empty string, let's delete it as well:
pt.splice((pt[1] == "") ? 1 : 2);
return pt;
}
i = 1;
}
/* If we've reached here, then this node is not a target for pruning, so let's
do recursion into its child nodes. */
while (i < pt.length) {
remove_morphology(pt[i]);
i++;
}
return pt;
}
/* Checks whether the argument node is a target for pruning. */
function is_target_node(n) {
return (among(n[0], ["cmevla", "gismu", "lujvo", "fuhivla", "initial_spaces", "ga_clause", "gu_clause"])
|| is_selmaho(n[0]));
}
/* This function returns the string resulting from the recursive concatenation of
* all the leaf elements of the parse tree argument (except node names). */
function join_expr(n) {
if (n.length < 1) return "";
var s = "";
var i = is_array(n[0]) ? 0 : 1;
while (i < n.length) {
s += is_string(n[i]) ? n[i] : join_expr(n[i]);
i++;
}
return s;
}
function among(v, s) {
var i = 0;
while (i < s.length) if (s[i++] == v) return true;
return false;
}
function is_selmaho(v) {
if (!is_string(v)) return false;
return (0 == v.search(/^[IUBCDFGJKLMNPRSTVXZ]?([AEIOUY]|(AI|EI|OI|AU))(h([AEIOUY]|(AI|EI|OI|AU)))*$/g));
}
/* ================== */
/* === Routines === */
/* ================== */
function prettify_brackets(str) {
var open_brackets = ["(", "[", "{", "<"];
var close_brackets = [")", "]", "}", ">"];
var brackets_number = 4;
// var numset = ['0','1','2','3','4','5','6','7','8','9'];
var numset = ['\u2070','\u00b9','\u00b2','\u00b3','\u2074',
'\u2075','\u2076','\u2077','\u2078','\u2079'];
var i = 0;
var floor = 0;
while (i < str.length) {
if (str[i] == '[') {
var n = floor % brackets_number;
var num = (floor && !n) ?
str_print_uint(floor / brackets_number, numset) : "";
str = str_replace(str, i, 1, open_brackets[n] + num);
floor++;
} else if (str[i] == ']') {
floor--;
var n = floor % brackets_number;
var num = (floor && !n) ?
str_print_uint(floor / brackets_number, numset) : "";
str = str_replace(str, i, 1, num + close_brackets[n]);
}
i++;
}
return str;
}
function str_print_uint(val, charset) {
// 'charset' must be a character array.
var radix = charset.length;
var str = "";
val -= val % 1; // No float allowed
while (val >= 1) {
str = charset[val % radix] + str;
val /= radix;
val -= val % 1;
}
return str;
}
function str_replace(str, pos, len, sub) {
if (pos < str.length) {
if (pos + len >= str.length) len -= pos + len - str.length;
return str.substring(0, pos) + sub + str.substring(pos + len);
} else return str;
}
function is_string(v) {
return Object.prototype.toString.call(v) === '[object String]';
}
function is_array(v) {
return Object.prototype.toString.call(v) === '[object Array]';
}
if (typeof module !== 'undefined')
module.exports.postprocessing = camxes_postprocessing;