#include "psearch_db.h"
#include "psearch_query.h"
#include "psearch_session.h"
namespace psearch {
/////////////////////////////////////////////////////////////////////////////////////////
// PSResult::apply_filter
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSResult::apply_filter(query_params const& params)
{
if(params.get_nbr_categories() == 0) return;
nsc_map_type::iterator itor = m_nsc_map.begin();
nsc_map_type::iterator end = m_nsc_map.end();
while(itor != end) {
if(params.is_keep_session_nodes() and m_psession_p->has_node(itor->first)) {
++itor;
} else {
// check if filter out the node based on categories
node_index_type node_index = itor->first;
bool keep_node = false;
category_const_iterator_type citor = params.get_categories_begin();
category_const_iterator_type cend = params.get_categories_end();
for(; citor!=cend; ++citor) {
category_index_type category_index = *citor;
if(category_index->has_node(node_index)) {
keep_node = true;
break;
}
}
if(not keep_node) {
m_nsc_map.erase(itor++);
} else {
++itor;
}
}
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::query
//
/////////////////////////////////////////////////////////////////////////////////////////
psresult_ptr_type
PSearchQuery::query(query_params const& params, bool verbose)
{
query_execution_state query_state;
psc_list_type & psc_list = query_state.psc_list;
if(verbose) {
printQuery();
}
psc_list.clear();
// select all relevant patterns that are related to has_nodes
if(params.select_relevant_patterns_only()) {
select_relevant_patterns(query_state);
} else {
select_all_patterns(query_state);
}
// prune patterns from has_not/has nodes and initialize the score cards
// for those nodes.
prune_patterns_from_has_not_nodes(query_state);
prune_patterns_from_has_nodes(query_state);
if(verbose) {
printf("Query Patterns selected: \n");
printf("------------------------ \n");
psc_list_type::iterator itor = psc_list.begin();
psc_list_type::iterator end = psc_list.end();
for(; itor!=end; ++itor) std::cout << (*itor)->get_pattern_index() << std::endl;
std::cout << std::endl;
}
// collect all nodes from the patterns, update the node score cards, and create the pattern score cards
collect_nodes_from_patterns(params, query_state);
return m_result_p;
}
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::query
//
/////////////////////////////////////////////////////////////////////////////////////////
psresult_ptr_type
PSearchQuery::query(category_index_type category_index, bool verbose)
{
node_set_const_iterator itor = category_index->get_nodes_iterator();
while(not itor.is_end()) {
node_index_type node_index = itor.get_value_by_value();
bool is_has_node = not m_psession_p->has_negated_node(node_index);
bool is_asserted = m_psession_p->is_asserted_has_node(node_index);
nsc_ptr_type nsc_p = m_result_p->add_nsc(node_index);
nsc_p->set_has_node(is_has_node);
nsc_p->set_asserted_node(is_asserted);
nsc_p->add_weight(0);
itor.next();
}
return m_result_p;
}
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::is_c1_match
//
/////////////////////////////////////////////////////////////////////////////////////////
bool
PSearchQuery::is_c1_match(pattern_index_type pattern_index)
{
node_const_iterator_type nitor = pattern_index->get_c1_nodes_begin();
node_const_iterator_type nend = pattern_index->get_c1_nodes_end();
for(; nitor!=nend; ++nitor) {
if(not m_psession_p->has_node(*nitor)) return false;
}
return true;
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::select_relevant_patterns
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::select_relevant_patterns(query_execution_state & query_state)
{
typedef std::tr1::unordered_set<pattern_index_type> working_set_t;
psc_list_type & psc_list = query_state.psc_list;
working_set_t visited_patterns;
node_dual_set_const_iterator itor = m_psession_p->get_nodes_iterator();
while(not itor.is_end()) {
node_index_type node_index = itor.get_value_by_value();
if(not node_index->is_skip_pattern_activation()) {
pattern_const_iterator_type pitor = node_index->get_patterns_begin();
pattern_const_iterator_type pend = node_index->get_patterns_end();
for (; pitor!=pend; ++pitor) {
// pattern_index_type pattern_index = *pitor;
visited_patterns.insert(*pitor);
}
}
itor.next();
}
// push the patterns into the list
working_set_t::iterator pitor = visited_patterns.begin();
working_set_t::iterator pend = visited_patterns.end();
for(; pitor!=pend; ++pitor) {
pattern_index_type pattern_index = *pitor;
psc_ptr_type psc_p(new PatternScoreCard(pattern_index));
psc_list.push_back(psc_p);
bool is_c1 = is_c1_match(pattern_index);
psc_p->set_c1_match(is_c1);
psc_p->set_asserted(false);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::select_all_patterns
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::select_all_patterns(query_execution_state & query_state)
{
psc_list_type & psc_list = query_state.psc_list;
pattern_list_iterator itor = m_db_p->get_patterns_begin();
pattern_list_iterator end = m_db_p->get_patterns_end();
for(; itor!=end; ++itor) {
pattern_ptr_type pattern_p = *itor;
psc_ptr_type psc_p(new PatternScoreCard(&*pattern_p));
psc_list.push_back(psc_p);
psc_p->set_c1_match(false);
psc_p->set_asserted(false);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::prune_patterns_from_has_not_nodes
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::prune_patterns_from_has_not_nodes(query_execution_state & query_state)
{
psc_list_type & psc_list = query_state.psc_list;
node_const_iterator_type itor = m_psession_p->get_negated_nodes_begin();
node_const_iterator_type end = m_psession_p->get_negated_nodes_end();
for(; itor!=end; ++itor) {
node_index_type node_index = *itor;
nsc_ptr_type nsc_p = m_result_p->add_nsc(node_index);
nsc_p->set_has_node(false);
nsc_p->set_asserted_node(true);
nsc_p->add_weight(10);
psc_list_type::iterator pitor = psc_list.begin();
psc_list_type::iterator pend = psc_list.end();
while(pitor!=pend) {
if((*pitor)->get_pattern_index()->is_c2_node(node_index)) {
// prune out the pattern
pitor = psc_list.erase(pitor);
} else {
++pitor;
}
}
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::prune_patterns_from_has_nodes
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::prune_patterns_from_has_nodes(query_execution_state & query_state)
{
psc_list_type & psc_list = query_state.psc_list;
node_dual_set_const_iterator itor = m_psession_p->get_nodes_iterator();
while(not itor.is_end()) {
node_index_type node_index = itor.get_value_by_value();
bool is_asserted = m_psession_p->is_asserted_has_node(node_index);
nsc_ptr_type nsc_p = m_result_p->add_nsc(node_index);
nsc_p->set_has_node(true);
nsc_p->set_asserted_node(is_asserted);
nsc_p->add_weight(10);
psc_list_type::iterator pitor = psc_list.begin();
psc_list_type::iterator pend = psc_list.end();
while(pitor!=pend) {
pattern_index_type pattern_index = (*pitor)->get_pattern_index();
if(pattern_index->is_negated_by_node(node_index)) {
// prune out the pattern
pitor = psc_list.erase(pitor);
} else {
++pitor;
}
}
itor.next();
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::collect_nodes_from_patterns
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::collect_nodes_from_patterns(query_params const& params, query_execution_state & query_state)
{
psc_list_type & psc_list = query_state.psc_list;
unsigned int nbr_asserted_nodes = m_psession_p->get_nbr_nodes();
psc_list_type::iterator pitor = psc_list.begin();
psc_list_type::iterator pend = psc_list.end();
for(; pitor!=pend; ++pitor) {
psc_ptr_type psc_p = *pitor;
pattern_index_type pattern_index = psc_p->get_pattern_index();
// for each c2 nodes of the pattern, create/update the node score card
unsigned int nbr_match_nodes = 0;
node_const_iterator_type nitor = pattern_index->get_c2_nodes_begin();
node_const_iterator_type nend = pattern_index->get_c2_nodes_end();
for(; nitor!=nend; ++nitor) {
nsc_ptr_type nsc_p = m_result_p->get_nsc_ptr(*nitor);
if(!nsc_p) {
// create the node score card
nsc_p = m_result_p->add_nsc(*nitor);
nsc_p->add_frequency(1);
nsc_p->set_has_node(true);
nsc_p->set_asserted_node(false);
nsc_p->add_weight(pattern_index->get_weight());
} else {
// update the node score card
if(nsc_p->is_asserted_node()) nbr_match_nodes++;
nsc_p->add_frequency(1);
nsc_p->add_weight(pattern_index->get_weight());
}
}
// update the pattern score card and add to m_result_p
psc_p->set_all_session_nodes_related(nbr_match_nodes == nbr_asserted_nodes);
m_result_p->add_psc(psc_p);
}
// filter the nsc based on the params
m_result_p->apply_filter(params);
};
/////////////////////////////////////////////////////////////////////////////////////////
// PSearchQuery::printQuery
//
/////////////////////////////////////////////////////////////////////////////////////////
void
PSearchQuery::printQuery()
{
std::cout << "\nQuery's asserted selected type are ";
node_const_iterator_type itor = m_psession_p->get_nodes_begin();
node_const_iterator_type end = m_psession_p->get_nodes_end();
for(; itor!=end; ++itor) std::cout << "'" << (*itor)->get_name() << "' ";
std::cout << "\nQuery's inferred selected nodes are: ";
itor = m_psession_p->get_inferred_nodes_begin();
end = m_psession_p->get_inferred_nodes_end();
for(; itor!=end; ++itor) std::cout << "'" << (*itor)->get_name() << "' ";
std::cout << "\nQuery's negated nodes are: ";
itor = m_psession_p->get_negated_nodes_begin();
end = m_psession_p->get_negated_nodes_end();
for(; itor!=end; ++itor) std::cout << "'" << (*itor)->get_name() << "' ";
std::cout << std::endl;
std::cout << std::endl;
};
/////////////////////////////////////////////////////////////////////////////////////////
// whyNode
//
/////////////////////////////////////////////////////////////////////////////////////////
void
whyNode(std::vector<std::string> &explanation, node_index_type node_index, PSearchSession * m_psession_p, psresult_ptr_type m_result_p)
{
explanation.clear();
if(m_result_p->get_nbr_nsc()==0 and m_result_p->get_nbr_psc()==0) return;
char buf[1000], buf2[600];
bool is_asserted = false;
if(m_psession_p->has_node(node_index)) {
snprintf(buf, 200, "Node %s is asserted", node_index->get_index_name().c_str());
explanation.push_back(buf);
is_asserted = true;
}
if(m_psession_p->has_negated_node(node_index)) {
snprintf(buf, 200, "Node %s is negated", node_index->get_index_name().c_str());
explanation.push_back(buf);
return;
}
psc_map_const_iterator pitor = m_result_p->get_psc_ptr_iterator();
while(!pitor.is_end()) {
Pattern const* pattern_p = pitor.get_value_by_value()->get_pattern_index();
if(pattern_p->is_c2_node(node_index)) {
if(is_asserted) {
snprintf(buf, 200, "Node %s is related to pattern %s, weight %d",
node_index->get_index_name().c_str(),
pattern_p->get_index_name().c_str(),
pattern_p->get_weight());
explanation.push_back(std::string(buf));
} else {
buf2[0] = '\0';
node_dual_set_const_iterator itor = m_psession_p->get_nodes_iterator();
while(!itor.is_end()) {
node_index_type idx = itor.get_value_by_value();
if(pattern_p->is_c1_node(idx)) {
sprintf(&buf2[strlen(buf2)], "%s ", idx->get_index_name().c_str());
}
itor.next();
}
if(strlen(buf2) > 0) buf2[strlen(buf2)-1] = '\0';
snprintf(buf, 1000, "Node %s is related to pattern %s, relevant because of nodes [%s], weight %d",
node_index->get_index_name().c_str(),
pattern_p->get_index_name().c_str(),
buf2,
pattern_p->get_weight());
explanation.push_back(buf);
}
}
pitor.next();
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// whyPattern
//
/////////////////////////////////////////////////////////////////////////////////////////
void
whyPattern(std::vector<std::string> &explanation, pattern_index_type pindex, PSearchSession * m_psession_p, psresult_ptr_type m_result_p)
{
explanation.clear();
if(m_result_p->get_nbr_nsc()==0 and m_result_p->get_nbr_psc()==0) return;
char buf[1000], buf2[600];
psc_map_const_iterator pitor = m_result_p->get_psc_ptr_iterator();
while(!pitor.is_end()) {
if(pindex == pitor.get_value_by_value()->get_pattern_index()) {
Pattern const* pattern_p = pitor.get_value_by_value()->get_pattern_index();
node_dual_set_const_iterator itor = m_psession_p->get_nodes_iterator();
while(!itor.is_end()) {
node_index_type idx = itor.get_value_by_value();
if(pattern_p->is_c1_node(idx)) {
sprintf(&buf2[strlen(buf2)], "%s ", idx->get_index_name().c_str());
}
itor.next();
}
if(strlen(buf2) > 0) buf2[strlen(buf2)-1] = '\0';
snprintf(buf, 1000, "Pattern %s is related to nodes [%s], weight %d",
pattern_p->get_index_name().c_str(),
buf2,
pattern_p->get_weight());
explanation.push_back(buf);
return;
}
pitor.next();
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// isResultValid
//
// This checks if the obtained result is valid or not. This is for testing purpose.
/////////////////////////////////////////////////////////////////////////////////////////
bool
isResultValid(std::vector<std::string> &errors, PSearchSession * m_psession_p, psresult_ptr_type m_result_p)
{
errors.clear();
if(m_psession_p->get_nbr_nodes()==0) {
errors.push_back("Error: no results to validate (has_nodes or has_not_nodes is NULL)");
return false;
}
// map of expected results for node score cards and pattern score cards
nsc_map_type xnsc;
psc_map_type xpsc;
computeResults(m_psession_p->get_psearch_db(), m_psession_p, xnsc, xpsc);
// check that the results are the same
bool isok = true;
char buf[1000];
if(m_result_p->m_nsc_map.size() != xnsc.size()) {
snprintf(buf, 1000, "Error: expected to have %d node score cards, but have %d in result", xnsc.size(), m_result_p->m_nsc_map.size());
errors.push_back(buf);
isok = false;
}
if(m_result_p->m_psc_map.size() != xpsc.size()) {
snprintf(buf, 1000, "Error: expected to have %d pattern score cards, but have %d in result", xpsc.size(), m_result_p->m_psc_map.size());
errors.push_back(buf);
isok = false;
}
// check the node score cards
nsc_map_const_iterator nsc_itor = m_result_p->get_nsc_ptr_iterator();
while(!nsc_itor.is_end()) {
nsc_ptr_type nsc_p = nsc_itor.get_value_by_value();
nsc_map_type::const_iterator xnsc_itor = xnsc.find(nsc_p->get_node_index());
if(xnsc_itor == xnsc.end()) {
snprintf(buf, 1000, "Error: result contains score card for node %s which is not expected", nsc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
} else {
nsc_ptr_type xnsc_p = xnsc_itor->second;
if(xnsc_p->get_frequency() != nsc_p->get_frequency()) {
snprintf(buf, 1000, "Error: node %s; result frequency is %d, should be %d", nsc_p->get_index_name().c_str(), nsc_p->get_frequency(), xnsc_p->get_frequency());
errors.push_back(buf);
isok = false;
}
if(xnsc_p->get_weight() != nsc_p->get_weight()) {
snprintf(buf, 1000, "Error: node %s; result weight is %d, should be %d", nsc_p->get_index_name().c_str(), nsc_p->get_weight(), xnsc_p->get_weight());
errors.push_back(buf);
isok = false;
}
if(xnsc_p->is_asserted_node() != nsc_p->is_asserted_node()) {
snprintf(buf, 1000, "Error: node %s; expected result does not match on is_asserted_node!", nsc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
if(xnsc_p->is_has_node() != nsc_p->is_has_node()) {
snprintf(buf, 1000, "Error: node %s; expected result does not match on is_has_node!", nsc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
xnsc.erase(nsc_p->get_node_index());
}
nsc_itor.next();
}
// check for any node score cards that was not found in result
for (nsc_map_type::iterator itor = xnsc.begin(); itor != xnsc.end(); ++itor) {
snprintf(buf, 1000, "Error: result is missing score card for node %s which is expected", itor->first->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
// check the pattern score cards
psc_map_const_iterator psc_itor = m_result_p->get_psc_ptr_iterator();
while(!psc_itor.is_end()) {
psc_ptr_type psc_p = psc_itor.get_value_by_value();
psc_map_type::const_iterator xpsc_itor = xpsc.find(psc_p->get_pattern_index());
if(xpsc_itor == xpsc.end()) {
snprintf(buf, 1000, "Error: result contains score card for pattern %s which is not expected", psc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
} else {
psc_ptr_type xpsc_p = xpsc_itor->second;
if(xpsc_p->is_all_session_nodes_related() != psc_p->is_all_session_nodes_related()) {
snprintf(buf, 1000, "Error: pattern %s; expected result does not match on is_all_session_nodes_related!", psc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
if(xpsc_p->is_asserted() != psc_p->is_asserted()) {
snprintf(buf, 1000, "Error: pattern %s; expected result does not match on is_asserted!", psc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
if(xpsc_p->is_c1_match() != psc_p->is_c1_match()) {
snprintf(buf, 1000, "Error: pattern %s; expected result does not match on c1_match!", psc_p->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
xpsc.erase(psc_p->get_pattern_index());
}
psc_itor.next();
}
// check for any node score cards that was not found in result
for (psc_map_type::iterator itor = xpsc.begin(); itor != xpsc.end(); ++itor) {
snprintf(buf, 1000, "Error: result is missing score card for pattern %s which is expected", itor->first->get_index_name().c_str());
errors.push_back(buf);
isok = false;
}
return isok;
};
/////////////////////////////////////////////////////////////////////////////////////////
// computeResults
//
// Compute the result using a naive implementation.
// Used for comparing performance of implementation and to validate the result
/////////////////////////////////////////////////////////////////////////////////////////
void
computeResults(PSearchDB const* db_p, PSearchSession const* psession_p, nsc_map_type & xnsc, psc_map_type & xpsc)
{
// put in results for negated nodes since will not show up via activated patterns
node_set_const_iterator nitor = psession_p->get_negated_nodes_iterator();
while(!nitor.is_end()) {
node_index_type node_index = nitor.get_value_by_value();
nsc_ptr_type nsc_p(new NodeScoreCard(node_index));
nsc_p->set_has_node(false);
nsc_p->set_asserted_node(true);
nsc_p->add_weight(10);
xnsc.insert(std::make_pair(node_index, nsc_p));
nitor.next();
}
// put the results for asserted nodes, will update the frequency when going thru the patterns
node_dual_set_const_iterator itor = psession_p->get_nodes_iterator();
while(!itor.is_end()) {
node_index_type node_index = itor.get_value_by_value();
nsc_ptr_type nsc_p(new NodeScoreCard(node_index));
nsc_p->set_has_node(true);
nsc_p->set_asserted_node(true);
nsc_p->add_weight(10);
xnsc.insert(std::make_pair(node_index, nsc_p));
itor.next();
}
// now the patterns; the hard way - go through all of them
pattern_list_iterator pitor = db_p->get_patterns_begin();
pattern_list_iterator pend = db_p->get_patterns_end();
for(; pitor!=pend; ++pitor) {
pattern_ptr_type pattern_p = *pitor;
pattern_index_type pattern_index = &*pattern_p;
// check if pattern is negated by the has_not_nodes
bool negated = false;
nitor = psession_p->get_negated_nodes_iterator();
while(!nitor.is_end()) {
node_index_type node_index = nitor.get_value_by_value();
if(pattern_index->is_c2_node(node_index)) {
negated = true;
break;
}
nitor.next();
}
if(negated) continue;
// check if pattern is negated by the has_nodes
// and calculate nbr of session's has nodes that are c2 related to pattern
size_t nmatch=0;
itor = psession_p->get_nodes_iterator();
while(!itor.is_end()) {
node_index_type node_index = itor.get_value_by_value();
if(pattern_index->is_negated_by_node(node_index)) {
negated = true;
break;
}
if(pattern_index->is_c2_node(node_index)) nmatch++;
itor.next();
}
if(negated) continue;
// check if pattern should be activated
bool is_asserted = false;
node_index_type node_index = db_p->get_node_index(pattern_index->get_rdf_index());
if(node_index and psession_p->has_node(node_index)) is_asserted = true;
bool is_c1_match = true;
bool has_c1_match = false;
nitor = pattern_index->get_c1_nodes_iterator();
while(!nitor.is_end()) {
node_index_type node_index = nitor.get_value_by_value();
if(not psession_p->has_node(node_index)) {
is_c1_match = false;
} else {
if(not node_index->is_skip_pattern_activation()) has_c1_match = true;
}
nitor.next();
}
if(not has_c1_match) is_c1_match = false;
//* if(is_asserted or is_c1_match or (not psession_p->is_strictly_relevant_filter() and has_c1_match)) {
if(is_asserted or is_c1_match or has_c1_match) {
// keep the pattern if filter is not strictly related or if all the has nodes are related
psc_ptr_type psc_p(new PatternScoreCard(pattern_index));
psc_p->set_all_session_nodes_related(nmatch==psession_p->get_nbr_nodes());
psc_p->set_asserted(is_asserted);
psc_p->set_c1_match(is_c1_match);
xpsc.insert(std::make_pair(pattern_index, psc_p));
// now that we are keeping the pattern, let's update the node score cards
nitor = pattern_index->get_c2_nodes_iterator();
while(!nitor.is_end()) {
node_index_type node_index = nitor.get_value_by_value();
// update node score card
nsc_map_type::iterator nsc_itor = xnsc.find(node_index);
if(nsc_itor == xnsc.end()) {
nsc_ptr_type nsc_p(new NodeScoreCard(node_index));
nsc_p->add_frequency(1);
nsc_p->set_has_node(true);
nsc_p->set_asserted_node(false);
nsc_p->add_weight(pattern_index->get_weight());
xnsc.insert(std::make_pair(node_index, nsc_p));
} else {
nsc_ptr_type nsc_p = nsc_itor->second;
nsc_p->add_frequency(1);
nsc_p->add_weight(pattern_index->get_weight());
}
nitor.next();
}
}
}
};
}; /* psearch namespace */