#include "owl_import.h"
namespace parser {
namespace xml {
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::error
//
// Overrides of the SAX ErrorHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::error(const SAXParseException& e)
{
std::cout << "\nERROR at file " << strx(e.getSystemId())
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: " << strx(e.getMessage()) << std::endl;
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::fatalError
//
// Overrides of the SAX ErrorHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::fatalError(const SAXParseException& e)
{
std::cout << "\nFatal Error at file " << strx(e.getSystemId())
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: " << strx(e.getMessage()) << std::endl;
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::warning
//
// Overrides of the SAX ErrorHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::warning(const SAXParseException& e)
{
std::cout << "\nWarning at file " << strx(e.getSystemId())
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: " << strx(e.getMessage()) << std::endl;
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::characters
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::characters(XMLCh const* const chars, unsigned int const length)
{
std::string str(strx(chars).localForm(), length);
boost::trim(str);
if(str.size() == 0) return;
// std::cout << "rdf_handler::characters: called with " << str << std::endl;
if(m_stack.empty()) return;
parse_event_state & current = m_stack.back();
if(current.data_type == 1) {
current.object = m_graph_p->create_literal_as_index(str, boost::lexical_cast<int>(str));
} else {
current.object = m_graph_p->create_literal_as_index("top:text_"+boost::lexical_cast<std::string>(m_graph_p->size()), str);
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::endDocument
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::endDocument()
{
// std::cout << "rdf_handler::endDocument: called\n";
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::endElement
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::endElement(XMLCh const* const name)
{
strx namex(name);
std::string elm_name(namex.localForm());
if(m_verbose) std::cout << "\nrdf_handler::endElement: called with " << elm_name << std::endl;
if(m_stack.empty()) return;
// don't use reference here since we need to access 'current'
// after the call to pop_back.
parse_event_state current = m_stack.back();
if(!current.is_collection_parse_type and current.object != m_rdf_description) {
if(m_verbose) std::cout << "endElement: " << current << std::endl;
m_graph_p->insert(current.subject, current.predicate, current.object);
}
m_stack.pop_back();
// //*
// std::cout << "endElement: The stack size is (after pop this elm) " << m_stack.size() << std::endl;
if(current.is_collection_parse_type) {
// collection completed
//*
if(m_verbose) std::cout << "endElement: collection completed." << std::endl;
} else if(current.from_class_event and !m_stack.empty()) {
parse_event_state & parent = m_stack.back();
// check if current is an item into a collection
if(parent.is_collection_parse_type) {
if(current.subject != m_owl_thing) {
if(m_verbose) std::cout << "endElement: adding to collection: "
<< rdf::index_triple(parent.subject, parent.predicate, current.subject) << std::endl;
m_graph_p->insert(parent.subject, parent.predicate, current.subject);
}
} else {
if(!parent.object) parent.object = current.subject;
}
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::processingInstruction
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::processingInstruction(XMLCh const* const target, XMLCh const* const data)
{
if(is_verbose()) {
std::cout << "rdf_handler::processingInstruction: called with target " << strx(target);
if(data) std::cout << " and " << strx(data);
std::cout << std::endl;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::startDocument
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::startDocument()
{
// std::cout << "rdf_handler::startDocument: called\n";
m_stack.reserve(100);
m_rdf_type = m_graph_p->create_resource_as_index("rdf:type");
m_owl_thing = m_graph_p->create_resource_as_index("owl:Thing");
m_rdf_description = m_graph_p->create_resource_as_index("rdf:Description");
m_top_label = m_graph_p->create_resource_as_index("top:label");
// some defensive measures, keep tag names that are known
m_known_class_name.insert("rdfs:Class");
m_known_class_name.insert("owl:AllDifferent");
m_known_class_name.insert("owl:AnnotationProperty");
m_known_class_name.insert("owl:Class");
m_known_class_name.insert("owl:DataRange");
m_known_class_name.insert("owl:DatatypeProperty");
m_known_class_name.insert("owl:DeprecatedClass");
m_known_class_name.insert("owl:DeprecatedProperty");
m_known_class_name.insert("owl:FunctionalProperty");
m_known_class_name.insert("owl:InverseFunctionalProperty");
m_known_class_name.insert("owl:Nothing");
m_known_class_name.insert("owl:ObjectProperty");
m_known_class_name.insert("owl:Ontology");
m_known_class_name.insert("owl:OntologyProperty");
m_known_class_name.insert("owl:Restriction");
m_known_class_name.insert("owl:SymmetricProperty");
m_known_class_name.insert("owl:Thing");
m_known_class_name.insert("owl:TransitiveProperty");
m_known_class_name.insert("rdf:Property");
m_known_predicate_name.insert("owl:allValuesFrom");
m_known_predicate_name.insert("owl:backwardCompatibleWith");
m_known_predicate_name.insert("owl:cardinality");
m_known_predicate_name.insert("owl:complementOf");
m_known_predicate_name.insert("owl:differentFrom");
m_known_predicate_name.insert("owl:disjointWith");
m_known_predicate_name.insert("owl:distinctMembers");
m_known_predicate_name.insert("owl:equivalentClass");
m_known_predicate_name.insert("owl:equivalentProperty");
m_known_predicate_name.insert("owl:hasValue");
m_known_predicate_name.insert("owl:imports");
m_known_predicate_name.insert("owl:incompatibleWith");
m_known_predicate_name.insert("owl:intersectionOf");
m_known_predicate_name.insert("owl:inverseOf");
m_known_predicate_name.insert("owl:maxCardinality");
m_known_predicate_name.insert("owl:minCardinality");
m_known_predicate_name.insert("owl:oneOf");
m_known_predicate_name.insert("owl:onProperty");
m_known_predicate_name.insert("owl:priorVersion");
m_known_predicate_name.insert("owl:sameAs");
m_known_predicate_name.insert("owl:someValuesFrom");
m_known_predicate_name.insert("owl:unionOf");
m_known_predicate_name.insert("owl:versionInfo");
};
/////////////////////////////////////////////////////////////////////////////////////////
// rdf_handler::startElement
//
// Overrides of the SAX DocumentHandler interface
/////////////////////////////////////////////////////////////////////////////////////////
void
rdf_handler::startElement(XMLCh const* const name, AttributeList & attributes)
{
strx namex(name);
std::string elm_name(namex.localForm());
// //*
// std::cout << "\nrdf_handler::startElement: called with " << elm_name << std::endl;
// std::cout << "startElement: The stack size is " << m_stack.size() << std::endl;
// pull the attributes into a map
str_map_type map;
unsigned int len = attributes.getLength();
for (unsigned int index = 0; index < len; index++) {
strx key(attributes.getName(index));
strx val(attributes.getValue(index));
// std::cout << "\twith attribute " << key << " = " << val << std::endl;
map.insert(str_map_type::value_type(key.localForm(), val.localForm()));
}
if(!m_namespace_done and elm_name == std::string("rdf:RDF")) {
m_namespace_done = true;
// keep the reverse mapping
str_map_type::const_iterator itor = map.begin();
str_map_type::const_iterator end = map.end();
for(; itor!=end; ++itor) {
std::string key = itor->first;
std::string val = itor->second;
if(key == "xml:base") {
m_model_xml_base = val;
key = "";
} else if(key == "xmlns") {
// no default short name for xml namespace
key = "";
} else {
std::string::size_type pos = 0;
if((pos=key.find(':')) != std::string::npos) {
key = key.substr(pos+1) + ":";
} else {
std::cout << "ERROR (WARNING): invalid ns attribute " << key << std::endl;
key = "";
}
}
// std::cout << "Namespace reverse mapping: "<< val << " --> " << key << std::endl;
// store the reverse mapping with trimmed ns
if(key != "") m_xml_ns_rmap.insert(str_map_type::value_type(val, key));
}
// get the short name for m_model_xml_base
std::string xmlns_base = m_model_xml_base+"#";
itor = m_xml_ns_rmap.find(xmlns_base);
if(itor != m_xml_ns_rmap.end()) {
m_xmlns = itor->second;
} else {
m_xmlns = "";
m_xml_ns_rmap.insert(str_map_type::value_type(xmlns_base, m_xmlns));
}
// //*
// std::cout << "processing rdf:RDF\n";
return;
}
if(expect_class()) {
if(!validate_tag(elm_name, true)) {
std::string msg = "ERROR-O1: startElement: invalid tag - expecting class tag but have: "+elm_name;
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
parse_event_state e(get_subject(map), m_rdf_type, get_resource(elm_name), true, false);
m_stack.push_back(e);
if(m_verbose) std::cout << "startElement: " << e << std::endl;
} else {
if(!validate_tag(elm_name, false)) {
std::string msg = "ERROR-O1: startElement: invalid tag - expecting predicate tag but have: "+elm_name;
std::cout << msg << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, msg);
}
if(m_stack.empty()) {
std::cout << "ERROR-O1: startElement: invalid state - having predicate w/o parent class state!" << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR-O1: startElement: invalid state - having predicate w/o parent class state!");
}
parse_event_state & current = m_stack.back();
parse_event_state e(current.subject, get_resource(elm_name), get_resource(map), false, has_collection_parse_type(map));
e.data_type = get_data_type(map);
m_stack.push_back(e);
if(m_verbose) std::cout << "startElement: " << e << std::endl;
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_imported_owl_model
//
// Utility function to cleanup and print info about imported model
/////////////////////////////////////////////////////////////////////////////////////////
void
process_imported_owl_model(rdf::rdf_graph_ptr_type & graph_p, bool verbose)
{
// Since in the asserted hierarchy it is very likely that the base classes does not
// explicitly rdfs_subClassOf owl_Thing, then we must scan all classes and find those who don't extend owl_Thing
// and fix this. This is to allow to construct the asserted class hierarchy simply.
rdf::index_type rdf_type = graph_p->create_resource_as_index("rdf:type");
rdf::index_type owl_Class = graph_p->create_resource_as_index("owl:Class");
rdf::index_type owl_Restriction = graph_p->create_resource_as_index("owl:Restriction");
rdf::index_type rdfs_subClassOf = graph_p->create_resource_as_index("rdfs:subClassOf");
rdf::index_type owl_Thing = graph_p->create_resource_as_index("owl:Thing");
// add triple (subject, rdfs_subClassOf, owl_Thing) to all subjects that have a class descriptor
// i.e., is not a bnode, but all it's parent classes do not have a class descriptor (are all bnodes)
rdf::rdf_graph::index_iterator itor = graph_p->find_index(rdf::all_subjects(), rdf_type, owl_Class);
while(!itor.is_end()) {
rdf::index_type subject = itor.get_triple().get_subject();
if(!subject->is_bnode()) {
bool has_parent_w_descriptor = false;
rdf::rdf_graph::index_iterator jtor = graph_p->find_index(subject, rdfs_subClassOf, rdf::all_objects());
while(!has_parent_w_descriptor and !jtor.is_end()) {
// now, check if parent class has a class descriptor
rdf::index_type parent = jtor.get_triple().get_object();
if(!parent->is_bnode()) {
has_parent_w_descriptor = true;
}
jtor.next();
}
if(!has_parent_w_descriptor) {
graph_p->insert(subject, rdfs_subClassOf, owl_Thing);
}
}
itor.next();
}
itor = graph_p->find_index(rdf::all_subjects(), rdf_type, owl_Restriction);
while(!itor.is_end()) {
rdf::index_type subject = itor.get_triple().get_subject();
if(!graph_p->contains(subject, rdfs_subClassOf, rdf::all_objects())) {
graph_p->insert(subject, rdfs_subClassOf, owl_Thing);
graph_p->insert(subject, rdfs_subClassOf, owl_Class);
}
itor.next();
}
if(verbose) {
unsigned int count = 0;
std::cout << "The graph contains the following statements:" << std::endl;
rdf::rdf_graph::iterator itor = graph_p->find();
while(!itor.is_end()) {
std::cout << "\t" << itor.get_triple() << std::endl;
itor.next();
++count;
};
std::cout << "\nThe graph contains " << count << " statements.\n";
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model
//
// main function to import owl model - import to the provided graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model(std::string const& fname, rdf::rdf_graph_ptr_type & graph_p, bool verbose)
{
if(verbose) std::cout << "Importing owl model from file " << fname << std::endl;
if(fname.size() == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid file name");
// Initialize the XML4C2 system
try {
XMLPlatformUtils::Initialize();
} catch (const XMLException& toCatch) {
std::cout << "ERROR: XMLException caught while initializing library, message: "
<< strx(toCatch.getMessage()) << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: XMLException caught while initializing library");
}
process_file(fname, graph_p, verbose);
XMLPlatformUtils::Terminate();
process_imported_owl_model(graph_p, verbose);
return graph_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model
//
// main function to import owl model
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model(std::string const& fname, bool verbose)
{
// parameter can be set here such as pool_size, triple_size, sessions_map_size
// leaving to default for now.
rdf::rdf_graph_ptr_type graph_p = rdf::create_rdf_graph();
return import_owl_model(fname, graph_p, verbose);
};
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model_membuffer
//
// main function to import owl model - importing to the provided graph from memory buffer
// unstead of from file
// Import into existing graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model_membuffer(std::string const& buffer, rdf::rdf_graph_ptr_type & graph_p, bool verbose)
{
if(verbose) std::cout << "Importing owl model from buffer of size " << buffer.size() << std::endl;
if(buffer.size() == 0) throw rdf::rdf_exception(rdf::parsing_error, "ERROR, invalid buffer size");
// Initialize the XML4C2 system
try {
XMLPlatformUtils::Initialize();
} catch (const XMLException& toCatch) {
std::cout << "ERROR: XMLException caught while initializing library, message: "
<< strx(toCatch.getMessage()) << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: XMLException caught while initializing library");
}
process_membuffer(buffer, graph_p, verbose);
XMLPlatformUtils::Terminate();
process_imported_owl_model(graph_p, verbose);
return graph_p;
};
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model_membuffer
//
// main function to import owl model - importing to the provided graph from memory buffer
// unstead of from file
// Import into new graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model_membuffer(std::string const& buffer, bool verbose)
{
// parameter can be set here such as pool_size, triple_size, sessions_map_size
// leaving to default for now.
rdf::rdf_graph_ptr_type graph_p = rdf::create_rdf_graph();
return import_owl_model_membuffer(buffer, graph_p, verbose);
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_file
//
/////////////////////////////////////////////////////////////////////////////////////////
void
process_file(std::string const& fname, rdf::rdf_graph_ptr_type & graph_p, bool verbose)
{
// Create a SAX parser object.
// the command line, set it to validate or not.
//
sax_parser_ptr_type parser_p = sax_parser_ptr_type(new SAXParser());
parser_p->setValidationScheme(SAXParser::Val_Never);
// Create the handler object and install it as the document and error
// handler for the parser_p-> Then parse the file and catch any exceptions
// that propogate out
int errorCount = 0;
try {
rdf_handler handler(graph_p, verbose);
parser_p->setDocumentHandler(&handler);
parser_p->setErrorHandler(&handler);
parser_p->parse(fname.c_str());
errorCount = parser_p->getErrorCount();
if(verbose) std::cout << "Error count is " << errorCount << std::endl;
} catch (OutOfMemoryException const&) {
std::cout << "ERROR: OutOfMemoryException caught while importing model" << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: OutOfMemoryException caught while importing owl model");
} catch (XMLException const& toCatch) {
std::cout << "ERROR: XMLException caught while importing model, message: "
<< strx(toCatch.getMessage()) << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: XMLException caught while importing model");
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught with message: '" << e.what()
<< " while importing from file " << fname << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught "
<< " while importing from file " << fname << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception!");
}
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_membuffer
//
/////////////////////////////////////////////////////////////////////////////////////////
void
process_membuffer(std::string const& buffer, rdf::rdf_graph_ptr_type & graph_p, bool verbose)
{
// Create a SAX parser object.
// the command line, set it to validate or not.
//
sax_parser_ptr_type parser_p = sax_parser_ptr_type(new SAXParser());
parser_p->setValidationScheme(SAXParser::Val_Never);
// Create the handler object and install it as the document and error
// handler for the parser_p-> Then parse the file and catch any exceptions
// that propogate out
int errorCount = 0;
try {
rdf_handler handler(graph_p, verbose);
parser_p->setDocumentHandler(&handler);
parser_p->setErrorHandler(&handler);
// create an input source
mem_bufinput_source_ptr_type mem_bufinput_source_p =
mem_bufinput_source_ptr_type(new MemBufInputSource(
(const XMLByte*)buffer.c_str(),
buffer.length(),
"memory_buffer",
false));
parser_p->parse(*mem_bufinput_source_p);
errorCount = parser_p->getErrorCount();
if(verbose) std::cout << "Error count is " << errorCount << std::endl;
} catch (OutOfMemoryException const&) {
std::cout << "ERROR: OutOfMemoryException caught while importing model" << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: OutOfMemoryException caught while importing owl model");
} catch (XMLException const& toCatch) {
std::cout << "ERROR: XMLException caught while importing model, message: "
<< strx(toCatch.getMessage()) << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR: XMLException caught while importing model");
} catch(rdf::rdf_exception const& e) {
std::cout << "ERROR, exception caught with message: '" << e.what()
<< " while importing from buffer of size " << buffer.size() << std::endl;
throw e;
} catch(...) {
std::cout << "ERROR, unknown exception caught "
<< " while importing from buffer of size " << buffer.size() << std::endl;
throw rdf::rdf_exception(rdf::parsing_error, "ERROR, unknown exception!");
}
};
}; /* xml namespace */
}; /* parser namespace */