#include "kb_parser.h"
#include "kb_term_builder.h"
#include "owl_import.h"
#include "psearch_db.h"
namespace parser {
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_schema_statement
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_schema_statement(std::string const& line)
{
string_vector_t v;
tokenize(line, v, "(), =\t");
if(v.size() != 3) throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P1, invalid schema statement");
if(v[1] == "resource") create_resource(v[0], v[2]);
else if(v[1] == "text") create_text (v[0], v[2]);
else if(v[1] == "bool") create_bool (v[0], v[2]);
else if(v[1] == "int") create_int (v[0], v[2]);
else if(v[1] == "uint") create_uint (v[0], v[2]);
else if(v[1] == "real") create_real (v[0], v[2]);
else if(v[1] == "duration") create_duration(v[0], v[2]);
else if(v[1] == "time") create_time (v[0], v[2]);
else if(v[1] == "date") create_date (v[0], v[2]);
else if(v[1] == "unset") create_unset (v[0]);
else {
std::string msg("ERROR-P3, unknown data type '"+v[1]+"' in schema statement");
std::cout << msg << std::endl;
std::cout << "\tKnown data type are 'resource', 'text', 'bool', 'int', 'uint', 'real', 'duration', 'time', 'date'\n";
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_assert_triple
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_assert_triple(std::string const& line)
{
string_vector_t v;
tokenize(line, v, "(), =\t");
rdf::index_type s=0, p=0, o=0;
if(v[0] == "assert_triple" or v[0] == "expected_result" or v[0] == "failure_result") {
if(v[1].find("bnode:") == 0) s = get_bnode(v[1]);
else s = create_resource(v[1], v[1]);
p = create_resource(v[2], v[2]);
if(v[3].find("bnode:") == 0) o = get_bnode(v[3]);
else o = create_resource(v[3], v[3]);
}
if(v[0] == "assert_triple") {
m_meta_graph_p->insert(s, p, o);
} else if(v[0] == "expected_result") {
add_expected_triple(rdf::index_triple(s, p, o));
} else if(v[0] == "failure_result") {
add_failure_triple(rdf::index_triple(s, p, o));
} else {
std::string msg("ERROR-P3, unknown directive '"+v[0]+"' in assert triples section");
std::cout << msg << std::endl;
std::cout << "\tKnown directives are 'assert_triple', 'expected_result', 'failure_result'\n";
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_import_owl
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_import_owl(std::string const& line)
{
string_vector_t v;
tokenize(line, v, "\"(), =\t");
if(v[0] == "owl_import") {
parser::xml::import_owl_model(v[1], m_meta_graph_p, false);
} else {
std::string msg("ERROR-P3, unknown directive '"+v[0]+"' in owl_import section");
std::cout << msg << std::endl;
std::cout << "\tKnown directives is 'owl_import'\n";
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_psearch_db
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_psearch_db(std::string const& line)
{
string_vector_t v;
char * pEnd;
tokenize(line, v, "{};");
psearch::psearch_db_ptr_type db_p = m_kbase_p->get_psearch_db_ptr();
for(string_vector_t::iterator itor=v.begin(); itor!=v.end(); ++itor) boost::trim(*itor);
if(v[0] == "rule") {
rdf::index_type index=NULL;
unsigned int weight=0;
psearch::category_set_type ex_categories;
psearch::node_set_type c1_nodes;
psearch::node_set_type c2_nodes;
psearch::node_set_type negated_nodes;
for(unsigned int item=1; item<v.size(); ++item) {
string_vector_t v2;
tokenize(v[item], v2, "(), \t");
if(v2[0] == "index") {
index = create_resource(v2[1], v2[1]);
} else if(v2[0] == "ex_category") {
for(unsigned int item2=1; item2<v2.size(); ++item2) {
rdf::index_type idx = create_resource(v2[item2], v2[item2]);
ex_categories.insert(db_p->add_category(idx, idx->get_name()));
}
} else if(v2[0] == "has_c1") {
for(unsigned int item2=1; item2<v2.size(); ++item2) {
rdf::index_type idx = create_resource(v2[item2], v2[item2]);
c1_nodes.insert(db_p->add_node(idx, idx->get_name()));
}
} else if(v2[0] == "has_c2") {
for(unsigned int item2=1; item2<v2.size(); ++item2) {
rdf::index_type idx = create_resource(v2[item2], v2[item2]);
c2_nodes.insert(db_p->add_node(idx, idx->get_name()));
}
} else if(v2[0] == "has_not") {
for(unsigned int item2=1; item2<v2.size(); ++item2) {
rdf::index_type idx = create_resource(v2[item2], v2[item2]);
negated_nodes.insert(db_p->add_node(idx, idx->get_name()));
}
} else if(v2[0] == "weight") {
weight = strtol(v2[1].c_str(), &pEnd, 0);
}
}
db_p->add_rule(index, index->get_name(), weight, ex_categories, c1_nodes, c2_nodes, negated_nodes);
} else if(v[0] == "node") {
rdf::index_type index=NULL;
bool is_skip_pattern_activation=false;
for(unsigned int item=1; item<v.size(); ++item) {
string_vector_t v2;
tokenize(v[item], v2, "(), \t");
if(v2[0] == "index") {
index = create_resource(v2[1], v2[1]);
} else if(v2[0] == "skip_pattern_activation") {
is_skip_pattern_activation = v2[1] == "true";
}
}
psearch::node_index_type node_index = db_p->add_node(index, index->get_name());
db_p->set_node_skip_activation(node_index, is_skip_pattern_activation);
} else {
std::string msg("ERROR-P3, unknown directive '"+v[0]+"' in process_psearch_db");
std::cout << msg << std::endl;
std::cout << "\tKnown directives are: 'pattern'\n";
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_psearch_session
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_psearch_session(std::string const& line)
{
if(not m_test_data_map_p) {
std::cout << "ERROR-P4, kb_builder not properly initialized for process_psearch_session section, ignore section";
return;
}
string_vector_t v;
tokenize(line, v, "{};");
std::string key = boost::trim_copy(v[0]);
if(key == "session" or key.find("expected_nsc") == 0 or key.find("expected_psc") == 0) {
string_vector_t & properties = (*m_test_data_map_p)[key];
for(unsigned int item=1; item<v.size(); ++item) {
properties.push_back(boost::trim_copy(v[item]));
}
} else {
std::string msg("ERROR-P3, unknown directive '"+key+"' in process_psearch_session");
std::cout << msg << std::endl;
std::cout << "\tKnown directives are: 'session', 'expected_nsc', 'expected_psc'\n";
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_graph_internal
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_graph_internal(rdf::rdf_graph_ptr_type & graph_p, std::string const& line)
{
string_vector_t v;
tokenize(line, v, "(),");
for(string_vector_t::iterator itor=v.begin(); itor!=v.end(); ++itor) boost::trim(*itor);
if(v[0] == "triple") {
if(v.size() != 4) {
std::string msg("ERROR-P3, triple of incorrect size in process_graph_internal");
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
rdf::index_type s=0, o=0;
if(v[1].find("bnode:") == 0) s = get_bnode(v[1]);
else s = create_resource(v[1], v[1]);
rdf::index_type p = create_resource(v[2], v[2]);
if(v[3].find("bnode:") == 0) o = get_bnode(v[3]);
else o = create_resource(v[3], v[3]);
graph_p->insert(s, p, o);
} else {
std::string msg("ERROR-P3, unknown directive '"+v[0]+"' in process_graph_internal (will ignore)");
std::cout << msg << std::endl;
std::cout << "\tKnown directives are: 'triple'\n";
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_meta_graph
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_meta_graph(std::string const& line)
{
rdf::rdf_graph_ptr_type meta_graph_p = m_kbase_p->get_meta_graph();
process_graph_internal(meta_graph_p, line);
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_asserted_graph
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_asserted_graph(std::string const& line)
{
process_graph_internal(m_asserted_graph_p, line);
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_knowledge_base_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_knowledge_base_section(std::string const& line)
{
string_vector_t v;
tokenize(line, v, "(), =\t");
if(v.size() != 2) throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P1, invalid schema statement");
if(v[0] == "default-explain") m_kbase_p->set_has_explain_info(v[1]=="true" or v[1]=="t" ? true:false);
else if(v[0] == "default-optimization") m_kbase_p->set_optimization_flag(v[1]=="true" or v[1]=="t" ? true:false);
else if(v[0] == "default-lookup-index") m_kbase_p->set_lookup_index_flag(v[1]=="true" or v[1]=="t" ? true:false);
else if(v[0] == "max-rule-visit") m_kbase_p->set_max_rule_visit(boost::lexical_cast<unsigned int>(v[1]));
else {
std::cout << "WARNING: unknown directive in knowledge base configuration section, skipping";
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::create_literal_as_index
//
// 'l' is a copy of the string from which to create the literal resource,
// the literal will be taken as text in double quotes, the remainder will be assigned to
// 'rest'.
// works only for date, time, duration and numbers
/////////////////////////////////////////////////////////////////////////////////////////
rdf::index_type
kb_builder::create_literal_as_index(std::string line, std::string & rest)
{
line = parse_param(line, rest);
unsigned int sz = line.size();
if(sz == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P16, invalid text for literal");
// check for date
if(sz>4 and sz<11 and line[4]=='-') return create_date(line, line);
// check for time
if(sz>15 and sz<20 and
line.find('-') != std::string::npos and
line.find(' ') != std::string::npos and
line.find(':') != std::string::npos) return create_time(line, line);
// check for duration
if(sz>2 and line.find(':') != std::string::npos) return create_duration(line, line);
// check for real
if(sz>1 and line.find('.') != std::string::npos) return create_real(line, line);
// check for unsigned int
if(sz>1 and (line[sz-1]=='u' or line[sz-1]=='U')) {
line[sz-1] = 'u';
return create_uint(line, line.substr(0, sz-1));
}
// well, gotta be int!
return create_int(line, line);
};
template<class F>
rule::knowledge_rule::xprsn_ptr_t
kb_builder::create_binary_expression_helper(rule::knowledge_rule_ptr_type & rule_p, F const& first, std::string & str)
{
// the second term is the operator - can't be an expression or variable
std::string oper = next_token(str, str, " ()");
if(oper[0]=='?' or oper[0]=='_' or oper[0]=='\"' or oper[0]=='(')
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P15, invalid rule syntax, cannot have binary expression with a variable, literal, or another expression as operator.");
// now the second operant
boost::trim(str);
if(str[0] == '(') { // oh no, another term!
str = str.substr(1);
rule::knowledge_rule::xprsn_ptr_t second = create_expression(rule_p, str);
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_binary_term(&*rule_p, first, oper, second);
} else if(str[0]=='?' or str[0]=='_') {
std::string second = next_token(str, str, " ()");
if(second.find_first_of(" \t()")!=std::string::npos) {
std::string msg("ERROR-P13, invalid rule syntax, variable '"+second+"' is not valid. Forgot '.'?");
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_binary_term(&*rule_p, first, oper, second);
} else if(str[0] == '\"') {
rdf::index_type second = create_literal_as_index(str, str);
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_binary_term(&*rule_p, first, oper, second);
}
// must be a named resource/literal
std::string second = next_token(str, str, " ()");
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_binary_term(&*rule_p, first, oper, get_index(second));
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::create_expression
//
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_rule::xprsn_ptr_t
kb_builder::create_expression(rule::knowledge_rule_ptr_type & rule_p, std::string & str)
{
boost::trim(str);
///////////////////////////////////////////////////////////
// get the first term of the expression
///////////////////////////////////////////////////////////
if(str[0] == '(') { // oh no, another term!
str = str.substr(1);
rule::knowledge_rule::xprsn_ptr_t first = create_expression(rule_p, str);
boost::trim(str);
if(str[0] == ')') {
str.erase(0, 1);
return first;
}
return create_binary_expression_helper(rule_p, first, str);
} else if(str[0]=='?' or str[0]=='_') {
std::string first = next_token(str, str, " ()");
if(first.find_first_of(" \t()")!=std::string::npos) {
std::string msg("ERROR-P13, invalid rule syntax, variable '"+first+"' is not valid. Forgot '.'?");
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
return create_binary_expression_helper(rule_p, first, str);
} else if(str[0] == '\"') {
rdf::index_type l = create_literal_as_index(str, str);
return create_binary_expression_helper(rule_p, l, str);
}
// gotta be named literal if binary term or operator if unary term
std::string first = next_token(str, str, " ()");
// check if is resource
if(is_resource(first)) {
// we have a binary term with a resource as first operand
return create_binary_expression_helper(rule_p, get_index(first), str);
} else {
// we have a unary term - get the object
boost::trim(str);
///////////////////////////////////////////////////////////
// It's a unary term
// 'first' is operator,
// finding operand of unary term
///////////////////////////////////////////////////////////
if(str[0] == '(') { // oh no, another term!
str = str.substr(1);
rule::knowledge_rule::xprsn_ptr_t operant = create_expression(rule_p, str);
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_unary_term(&*rule_p, first, operant);
} else if(str[0]=='?' or str[0]=='_') {
std::string operant = next_token(str, str, " ()");
if(operant.find_first_of(" \t()")!=std::string::npos) {
std::string msg("ERROR-P13, invalid rule syntax, variable '"+operant+"' is not valid. Forgot '.'?");
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_unary_term(&*rule_p, first, operant);
} else if(str[0] == '\"') {
rdf::index_type operant = create_literal_as_index(str, str);
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_unary_term(&*rule_p, first, operant);
}
// gotta be named resource/literal
std::string operant = next_token(str, str, " ()");
boost::trim(str);
if(str[0] == ')') str.erase(0, 1);
return create_unary_term(&*rule_p, first, get_index(operant));
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_knowledge_rule
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_knowledge_rule(std::string const& line)
{
string_vector_t v;
// std::cout << "kb_builder::process_knowledge_rule('" << line << "'\n";
// get the rule attributes
std::string params, body;
split_at(line, params, body, ":");
tokenize(params, v, "[] =,");
unsigned int sz = v.size();
if(sz == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P10, invalid rule syntax");
if(sz > 1 and sz%2 == 1) throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P10, invalid rule syntax");
std::string k, name, val;
std::map<std::string, std::string> p;
if(sz == 1) {
name = v[0];
p.insert(std::make_pair("n", name));
} else {
for(string_vector_t::iterator itor=v.begin(); itor!=v.end(); ++itor) {
k = *itor;
if(k == "name") k = "n";
p.insert(std::make_pair(k, *(++itor)));
}
}
rule::rule_type type = rule::query_rule;
if(body.find("->") != std::string::npos) type = rule::forward_chaining_rule;
else if(body.find("<-") != std::string::npos) type = rule::backward_chaining_rule;
name = p.find("n")->second;
rule::knowledge_rule_ptr_type rule_p = m_kbase_p->create_knowledge_rule(type, name);
rule_p->set_rule_text(line);
for(std::map<std::string, std::string>::iterator itor=p.begin(); itor!=p.end(); ++itor) {
k = itor->first;
val = itor->second;
if(k=="salience" or k=="s") rule_p->set_rule_salience(boost::lexical_cast<unsigned int>(val));
if(k=="optimize" or k=="o") rule_p->set_optimization_flag(val=="true" or val=="t" ? true:false);
if(k=="explain_info" or k=="x") rule_p->keep_explain_info(val=="true" or val=="t" ? true:false);
}
// ready for the rule body
switch(type) {
case rule::query_rule: process_for_query (rule_p, body); break;
case rule::forward_chaining_rule: process_for_forward (rule_p, body); break;
case rule::backward_chaining_rule: process_for_backward(rule_p, body); break;
case rule::head_rule_rt: break;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_for_forward
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_for_forward(rule::knowledge_rule_ptr_type & rule_p, std::string const& line)
{
// get the body terms of the rule and the consequent terms
std::string body, consequent;
split_at(line, body, consequent, "->");
process_body_terms(rule_p, body);
process_consequent_terms(rule_p, consequent);
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_for_query
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_for_query(rule::knowledge_rule_ptr_type & rule_p, std::string line)
{
// parse the select * from section
std::string token = next_token(line, line, " ");
if(!boost::algorithm::iequals(token, "select"))
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P25, invalid rule syntax - Expecting SELECT keyword.");
token = next_token(line, line, " ");
if(token != "*") {
int count = 0;
while(!boost::algorithm::iequals(token, "from") and ++count<100) {
if(token[0] != '?' and token[0] != '_')
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P25, invalid rule syntax - Variable must start with \"?\" or \"_\" - error while within SELECT clause.");
rule_p->add_select_variable(token);
token = next_token(line, line, " ");
}
if(count == 100)
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P25, invalid rule syntax - Error - too many variables (limit 100 exceeded) or FROM keyword missing in SELECT clause.");
} else {
// consume the from keyword
token = next_token(line, line, " ");
if(!boost::algorithm::iequals(token, "from"))
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P25, invalid rule syntax - Expecting FROM keyword.");
}
boost::trim(line);
process_body_terms(rule_p, line);
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_for_backward
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_for_backward(rule::knowledge_rule_ptr_type & rule_p, std::string const& line)
{
// get the head and body terms
std::string head, body;
split_at(line, head, body, "<-");
rule::kterm_ptr_type head_term_p =
process_term(rule_p, head, head_functor());
process_body_terms(rule_p, body);
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_body_terms
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_body_terms(rule::knowledge_rule_ptr_type & rule_p, std::string const& line)
{
// process each term
std::string term;
string_vector_t v;
tokenize(line, v, ".");
rule::kterm_ptr_type rule_term_p;
rule::knowledge_rule::xprsn_ptr_t filter_p;
for(string_vector_t::iterator itor=v.begin(); itor!=v.end(); ++itor) {
term = *itor;
boost::trim(term);
if(term[0] == '(') {
rule_term_p = process_term(rule_p, term, body_functor());
} else if(term[0] == 'n' or term[0] == 'N') {
rule_term_p = process_term(rule_p, term, neg_body_functor());
} else if(term[0] == '[') {
filter_p = process_filter(rule_p, term);
if(rule_term_p) rule_p->set_filter(rule_term_p, filter_p);
else throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P11, invalid rule syntax - cannot attach filter to rule term");
rule_term_p.reset(); // it's consumed; cannot attach another filter to it
} else {
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P12, invalid rule syntax - body term must start with either '(' or '['");
}
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// kb_builder::process_consequent_terms
//
/////////////////////////////////////////////////////////////////////////////////////////
void
kb_builder::process_consequent_terms(rule::knowledge_rule_ptr_type & rule_p, std::string const& line)
{
// process each term
std::string term;
string_vector_t v;
tokenize(line, v, ".");
rule::kterm_ptr_type rule_term_p;
for(string_vector_t::iterator itor=v.begin(); itor!=v.end(); ++itor) {
term = *itor;
boost::trim(term);
if(term[0] == '(') {
rule_term_p = process_term(rule_p, term, consequent_functor());
} else {
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-P12, invalid rule syntax - consequent term must start with either '(' or '['");
}
}
};
//---------------------------------------------------------------------------------------
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base
//
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base(std::string const& fname, bool verbose)
{
// parameter can be set here such as pool_size, triple_size, sessions_map_size
// leaving to default for now.
rdf::rdf_graph_ptr_type meta_graph_p = rdf::create_rdf_graph();
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, verbose);
load_knowledge_base_internal(fname, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base
//
// use meta_graph_p as the meta data graph for the knowledge base
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base(std::string const& fname, rdf::rdf_graph_ptr_type & meta_graph_p, bool verbose)
{
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, verbose);
load_knowledge_base_internal(fname, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base
//
// Method for loading knowledge_base with additional asserted_graph to load a
// completed knowledge session from file
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base(std::string const& fname, rdf::rdf_graph_ptr_type & meta_graph_p, rdf::rdf_graph_ptr_type & asserted_graph_p, bool verbose)
{
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, asserted_graph_p, verbose);
load_knowledge_base_internal(fname, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base
//
// Method for loading knowledge_base with provisions for test cases.
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base(std::string const& fname, triple_array_type & expected_triples, triple_array_type & failure_triples, bool verbose)
{
rdf::rdf_graph_ptr_type meta_graph_p = rdf::create_rdf_graph();
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, &expected_triples, &failure_triples, verbose);
load_knowledge_base_internal(fname, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base
//
// Method for loading knowledge_base with provisions for test cases targeted to psearch.
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base(std::string const& fname, test_data_map_type & test_data_map, bool verbose)
{
rdf::rdf_graph_ptr_type meta_graph_p = rdf::create_rdf_graph();
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, &test_data_map, verbose);
load_knowledge_base_internal(fname, builder, verbose);
return knowledge_base_p;
};
//---------------------------------------------------------------------------------------
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base_from_buffer
//
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base_from_buffer(std::string const& mem_buffer, bool verbose)
{
// parameter can be set here such as pool_size, triple_size, sessions_map_size
// leaving to default for now.
rdf::rdf_graph_ptr_type meta_graph_p = rdf::create_rdf_graph();
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, verbose);
load_knowledge_base_from_buffer_internal(mem_buffer, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base_from_buffer
//
// use meta_graph_p as the meta data graph for the knowledge base
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base_from_buffer(std::string const& mem_buffer, rdf::rdf_graph_ptr_type & meta_graph_p, bool verbose)
{
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, verbose);
load_knowledge_base_from_buffer_internal(mem_buffer, builder, verbose);
return knowledge_base_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base_from_buffer
//
// Method for loading knowledge_base with additional asserted_graph to load a
// completed knowledge session from file
/////////////////////////////////////////////////////////////////////////////////////////
rule::knowledge_base_ptr_type
load_knowledge_base_from_buffer(std::string const& mem_buffer, rdf::rdf_graph_ptr_type & meta_graph_p, rdf::rdf_graph_ptr_type & asserted_graph_p, bool verbose)
{
rule::knowledge_base_ptr_type knowledge_base_p = rule::knowledge_base_ptr_type(new rule::knowledge_base(meta_graph_p));
kb_builder builder(knowledge_base_p, asserted_graph_p, verbose);
load_knowledge_base_from_buffer_internal(mem_buffer, builder, verbose);
return knowledge_base_p;
};
//---------------------------------------------------------------------------------------
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base_internal
//
// main function to load knowledge base
/////////////////////////////////////////////////////////////////////////////////////////
void
load_knowledge_base_internal(std::string const& fname, kb_builder & builder, bool verbose)
{
if(verbose) std::cout << "Reading knowledge base from file " << fname << std::endl;
if(fname.size() == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid file name");
process_file(fname, builder);
post_load_knowledge_base_internal(fname, builder, verbose);
};
/////////////////////////////////////////////////////////////////////////////////////////
// load_knowledge_base_from_buffer_internal
//
// main function to load knowledge base
/////////////////////////////////////////////////////////////////////////////////////////
void
load_knowledge_base_from_buffer_internal(std::string const& mem_buffer, kb_builder & builder, bool verbose)
{
if(verbose) std::cout << "Reading knowledge base from buffer " << mem_buffer << std::endl;
if(mem_buffer.size() == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid mem_buffer");
std::istringstream in(mem_buffer, std::istringstream::in);
if(!in) {
std::cout << "*** Cannot open buffer '" << mem_buffer << "' for reading!\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid mem_buffer");
}
process_stream(in, "", builder);
post_load_knowledge_base_internal("", builder, verbose);
};
/////////////////////////////////////////////////////////////////////////////////////////
// post_load_knowledge_base_internal
//
// main function to load knowledge base
/////////////////////////////////////////////////////////////////////////////////////////
void
post_load_knowledge_base_internal(std::string const& fname, kb_builder & builder, bool verbose)
{
// compile the rules
rule::knowledge_base_ptr_type kbase_p = builder.get_knowledge_base_ptr();
kbase_p->activate_knowledge_rules(verbose);
if(builder.get_parsed_meta_graph()) kbase_p->get_psearch_db_ptr()->load_from_meta_graph(verbose);
if(verbose) {
rule_priority_queue_type queue;
rule::knowledge_base::knowledge_rule_ptr_const_iterator itor = kbase_p->get_knowledge_rules_begin();
rule::knowledge_base::knowledge_rule_ptr_const_iterator end = kbase_p->get_knowledge_rules_end();
int rules_count = 0;
for(; itor!=end; ++itor) {
queue.push(*itor);
rules_count += 1;
}
std::cout << std::endl << "The knowledge base contains "<< rules_count << " rules, they are (sorted by salience):" << std::endl;
std::cout << std::endl;
while(!queue.empty()) {
rule::knowledge_rule_ptr_type rule_p = queue.top();
std::cout << *rule_p << std::endl;
std::cout << std::endl;
queue.pop();
}
std::cout << std::endl << "The psearch db contains the following patterns:" << std::endl;
kbase_p->get_psearch_db_ptr()->printAllPatterns();
std::cout << std::endl;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_file
//
/////////////////////////////////////////////////////////////////////////////////////////
void
process_file(std::string const& fname, kb_builder & builder)
{
std::ifstream in;
in.open(fname.c_str());
if(!in) {
std::cout << "*** Cannot open file '" << fname << "' for reading!\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid file name");
}
process_stream(in, fname, builder);
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_stream
//
/////////////////////////////////////////////////////////////////////////////////////////
void
process_stream(std::istream & in, std::string const& fname, kb_builder & builder)
{
// main control
std::string line;
unsigned int lnum=0;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*include-file*") == 0) {
// std::cout << "*include-file* found at line number " << lnum << std::endl;
std::string name = parse_param(line);
process_file(name, builder);
} else if(line.find("*schema-begin*") == 0) {
// std::cout << "*schema-begin* found at line number " << lnum << std::endl;
parse_schema_section(in, builder, lnum);
} else if(line.find("*knowledge-base-begin*") == 0) {
// std::cout << "*knowledge-base-begin* found at line number " << lnum << std::endl;
parse_knowledge_base_section(in, builder, lnum);
} else if(line.find("*knowledge-rules-begin*") == 0) {
// std::cout << "*knowledge-rules-begin* found at line number " << lnum << std::endl;
parse_knowledge_rule_section(in, builder, lnum);
} else if(line.find("*assert-triples-begin*") == 0) {
// std::cout << "*assert-triples-begin* found at line number " << lnum << std::endl;
parse_assert_triples_section(in, builder, lnum);
} else if(line.find("*import-owl-begin*") == 0) {
parse_import_owl_section(in, builder, lnum);
} else if(line.find("*psearch-db-begin*") == 0) {
parse_psearch_db_section(in, builder, lnum);
} else if(line.find("*psearch-session-begin*") == 0) {
parse_psearch_session_section(in, builder, lnum);
} else if(line.find("*meta-graph-begin*") == 0) {
builder.set_parsed_meta_graph(true);
parse_meta_graph_section(in, builder, lnum);
} else if(line.find("*asserted-graph-begin*") == 0) {
parse_asserted_graph_section(in, builder, lnum);
}
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught with message: '" << e.what();
std::cout << "' near line " << lnum << " of file " << fname << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught ";
std::cout << "near line " << lnum << " of file " << fname << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_knowledge_rule_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_knowledge_rule_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
builder.process_knowledge_rule_section_begin();
std::string line;
try {
while (read_line(in, line, lnum, " ")) {
if(line.find("*knowledge-rules-end*") == 0) {
builder.process_knowledge_rule_section_end();
return;
}
builder.process_knowledge_rule(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in knowledge rule section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in knowledge rule section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in schema section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_knowledge_base_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_knowledge_base_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*knowledge-base-end*") == 0) return;
builder.process_knowledge_base_section(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in knowledge base section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in knowledge base section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in schema section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_schema_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_schema_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*schema-end*") == 0) return;
builder.process_schema_statement(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in schema section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in schema section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in schema section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_assert_triples_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_assert_triples_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*assert-triples-end*") == 0) return;
builder.process_assert_triple(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in assert triple section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in assert triple section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in assert triple section!");
}
}
/////////////////////////////////////////////////////////////////////////////////////////
// parse_import_owl_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_import_owl_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*import-owl-end*") == 0) return;
builder.process_import_owl(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in import-owl section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in import-owl section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in import-owl section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_psearch_session_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_psearch_session_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*psearch-session-end*") == 0) return;
builder.process_psearch_session(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in parse_psearch_session_section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in parse_psearch_session_section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in parse_psearch_session_section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_meta_graph_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_meta_graph_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*meta-graph-end*") == 0) return;
builder.process_meta_graph(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in parse_meta_graph_section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in parse_meta_graph_section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in parse_meta_graph_section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_asserted_graph_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_asserted_graph_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
bool process_lines = true;
if(not builder.has_asserted_graph()) {
process_lines = false;
std::cout << "ERROR, Builder does not have an asserted graph - triples will be ignored" << std::endl;
}
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*asserted-graph-end*") == 0) return;
if(process_lines) builder.process_asserted_graph(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in parse_asserted_graph_section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in parse_asserted_graph_section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in parse_asserted_graph_section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_psearch_db_section
//
/////////////////////////////////////////////////////////////////////////////////////////
void
parse_psearch_db_section(std::istream & in, kb_builder & builder, unsigned int & lnum)
{
std::string line;
try {
while (read_line(in, line, lnum, "")) {
if(line.find("*psearch-db-end*") == 0) return;
builder.process_psearch_db(line);
}
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught in parse_psearch_db_section. . ." << e << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught in parse_psearch_db_section . . .\n";
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception caught in parse_psearch_db_section!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// read_line
//
/////////////////////////////////////////////////////////////////////////////////////////
bool
read_line(std::istream & in, std::string & line, unsigned int & lnum, std::string const& multi_line_delemit)
{
std::string str;
while(getline(in, line)) {
++lnum;
boost::trim(line);
if(line.length()==0 or '#' == line[0]) continue;
if(*line.rbegin() == '\\') {
while(*line.rbegin() == '\\') {
if(!getline(in, str)) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, end of file reached while reading line");
if(str.length()==0 or '#' == str[0]) continue;
++lnum;
(*line.rbegin()) = ' ';
boost::trim(line);
boost::trim(str);
line.append(multi_line_delemit).append(str);
}
std::string::size_type pos = 0;
int count=0;
while((pos=line.find("\"\"", pos)) != std::string::npos and ++count<50) {
if(pos==0 or line[pos-1] != '\\') line.erase(pos, 2);
else ++pos;
}
if(count == 50) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, while looking for \"\" (infinite loop!)");
}
// std::cout << "line is '"<< line << "' at line number "<< lnum << std::endl;
return true;
}
return false;
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_param
//
// jpos will be set to the end \" of 'line', so that line[jpos]=='\"'
// consume_quotes indicates if first and last quote are consumed
/////////////////////////////////////////////////////////////////////////////////////////
std::string
parse_param(std::string line, std::string::size_type & jpos, bool consume_quotes)
{
// find the first "
std::string::size_type ipos = line.find('\"');
if(ipos == std::string::npos) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, missing \" to delimit argument");
// find the next un-escaped one
jpos = ipos+1;
int count=0;
while((jpos=line.find('\"', jpos)) != std::string::npos and ++count<50) {
if(line[jpos-1] != '\\') break;
++jpos;
}
if(jpos == std::string::npos or count == 50) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, missing \" to delimit argument");
if(!consume_quotes) return line.substr(ipos, jpos-ipos+1);
// consume the quotes
if(jpos == ipos+1) return "";
std::string str = line.substr(ipos+1, jpos-ipos-1);
// now changed the escaped quotes into quotes
//ie, change '\\\"' into '\"'
if(str.find("\\\"") != std::string::npos) {
std::string::size_type pos = 0;
while((pos=str.find("\\\""), pos) != std::string::npos) str.erase(pos, 1);
// std::cout << "quoted-param: "<< str << std::endl;
}
return str;
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_param
//
// 'rest' is assigned the text following the end '\"'
/////////////////////////////////////////////////////////////////////////////////////////
std::string
parse_param(std::string line, std::string & rest, bool consume_quotes)
{
std::string::size_type pos = 0;
std::string params = parse_param(line, pos, consume_quotes);
if(pos == line.size()-1) rest = "";
else rest = line.substr(pos+1);
return params;
};
/////////////////////////////////////////////////////////////////////////////////////////
// parse_param
//
// just extract the double-quoted params - don't care about the rest of the line
// - will consume the quotes
/////////////////////////////////////////////////////////////////////////////////////////
std::string
parse_param(std::string line)
{
std::string::size_type pos = 0;
std::string params = parse_param(line, pos, true);
return params;
};
/////////////////////////////////////////////////////////////////////////////////////////
// tokenize
//
// tokenize a string and skip delimiters if enclosed within double quotes (like a param)
// Note: when double quotes are used, must be first after delimiters.
/////////////////////////////////////////////////////////////////////////////////////////
void
tokenize(std::string const& str, string_vector_t & tokens, std::string const& delimit)
{
std::string::size_type ipos = 0;
int count=0;
while(ipos!=std::string::npos and (ipos=str.find_first_not_of(delimit, ipos)) != std::string::npos and ++count < 50) {
// std::cout << "first not delimit is str[ipos] is '" << str[ipos] << "' at " << ipos << std::endl;
if(str[ipos] == '\"') {
// got a quoted param
std::string::size_type jpos = 0;
tokens.push_back(parse_param(str.substr(ipos), jpos));
ipos += jpos+1;
if(ipos < str.size()) {
// std::cout << "- after parse param str[ipos] is '" << str[ipos] << "' at " << ipos << std::endl;
} else {
ipos = std::string::npos;
// std::cout << "- took last token as param!\n";
}
} else {
std::string::size_type jpos = str.find_first_of(delimit, ipos);
if(jpos == std::string::npos) {
tokens.push_back(str.substr(ipos));
ipos = std::string::npos;
// std::cout << "- took last token!\n";
} else {
tokens.push_back(str.substr(ipos, jpos - ipos));
ipos = jpos;
// std::cout << "- after taking token, str[ipos] is '" << str[ipos] << "' at " << ipos << std::endl;
}
}
}
if(count == 50) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, while tokenizing (infinite loop!)");
};
/////////////////////////////////////////////////////////////////////////////////////////
// split_at
//
/////////////////////////////////////////////////////////////////////////////////////////
void
split_at(std::string const& str, std::string & s1, std::string & s2, std::string const& d)
{
std::string::size_type ipos = str.find(d);
if(ipos==0 or ipos==std::string::npos) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid syntax");
s1 = str.substr(0, ipos);
s2 = str.substr(ipos+d.size());
};
/////////////////////////////////////////////////////////////////////////////////////////
// next_token
//
// utility function - split next token from the rest
// the double-quotes are not consumed - token will extend untill the end quote
// and token returned with the quotes.
/////////////////////////////////////////////////////////////////////////////////////////
std::string
next_token(std::string str, std::string & rest, std::string const& delimit)
{
std::string::size_type ipos = str.find_first_not_of(delimit, 0);
if(ipos == std::string::npos) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid syntax - cannot get next token");
if(str[ipos] == '\"') {
// got a quoted param as the token
return parse_param(str, rest, false);
}
std::string::size_type jpos = str.find_first_of(delimit, ipos);
if(jpos == std::string::npos) {
rest = "";
return str.substr(ipos);
} else {
rest = str.substr(jpos);
return str.substr(ipos, jpos - ipos);
}
};
}; /* parser namespace */