#ifndef OWL_IMPORT_H_
#define OWL_IMPORT_H_
//#include <cstring>
#include "rdf_rule_core.h"
#include "rdf_graph.h"
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/sax/AttributeList.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <boost/algorithm/string/trim.hpp>
namespace parser {
namespace xml {
XERCES_CPP_NAMESPACE_USE
// ---------------------------------------------------------------------------
// This is a simple class that lets us do easy (though not terribly efficient)
// trancoding of XMLCh data to local code page for display.
// ---------------------------------------------------------------------------
class strx
{
public :
strx(XMLCh const* const toTranscode)
{
fLocalForm = XMLString::transcode(toTranscode);
};
~strx()
{
XMLString::release(&fLocalForm);
}
const char* localForm() const
{
return fLocalForm;
}
private :
char* fLocalForm;
};
inline std::ostream& operator<<(std::ostream& target, const strx& toDump)
{
target << toDump.localForm();
return target;
}
typedef std::tr1::shared_ptr<SAXParser> sax_parser_ptr_type;
typedef std::tr1::shared_ptr<MemBufInputSource> mem_bufinput_source_ptr_type;
/////////////////////////////////////////////////////////////////////////////////////////
// class parse_event_state
//
/////////////////////////////////////////////////////////////////////////////////////////
struct parse_event_state
{
typedef rdf::index_type index_t;
parse_event_state(index_t s, index_t p, index_t o, bool from_class, bool is_collection):
subject(s),
predicate(p),
object(o),
from_class_event(from_class),
is_collection_parse_type(is_collection),
data_type(0)
{};
index_t subject;
index_t predicate;
index_t object;
bool from_class_event;
bool is_collection_parse_type;
int data_type;
};
inline std::ostream & operator<<(std::ostream & sout, parse_event_state const& estate)
{
sout << "--> event state: (";
if(estate.subject) {
sout << rdf::internal::to_resource(estate.subject) << " , ";
} else {
sout << " (null) , ";
}
if(estate.predicate) {
sout << rdf::internal::to_resource(estate.predicate) << " , ";
} else {
sout << " (null) , ";
}
if(estate.object) {
sout << rdf::internal::to_resource(estate.object) << " ) ";
} else {
sout << " (null) ) ";
}
if(estate.from_class_event) {
sout << " from class elm";
} else {
sout << " from predicate elm";
}
if(estate.is_collection_parse_type) {
sout << ", is collection ";
}
if(estate.data_type == 1) {
sout << ", the obj is int ";
}
return sout;
};
/////////////////////////////////////////////////////////////////////////////////////////
// class rdf_handler
//
/////////////////////////////////////////////////////////////////////////////////////////
class rdf_handler: public HandlerBase
{
typedef rdf::index_type index_t;
typedef std::vector<parse_event_state> stack_type;
typedef std::map<std::string, std::string> str_map_type;
typedef std::set<std::string> str_set_type;
public:
rdf_handler(rdf::rdf_graph_ptr_type const& graph_p, bool verbose):
HandlerBase(),
m_graph_p(graph_p),
m_stack(),
m_xml_ns_rmap(),
m_known_class_name(),
m_known_predicate_name(),
m_model_xml_base("http://top/test"),
m_xmlns(""),
m_namespace_done(false),
m_rdf_type(NULL),
m_owl_thing(NULL),
m_rdf_description(NULL),
m_top_label(NULL),
m_rdf_id("rdf:ID"), m_rdf_res("rdf:resource"), m_rdf_about("rdf:about"),
m_rdf_datatype("rdf:datatype"),
m_non_neg_num("nonNegativeInteger"),
m_int_num("int"),
m_parse_type("rdf:parseType"),
m_rdf_collection("Collection"),
m_verbose(verbose)
{};
~rdf_handler(){};
inline bool is_verbose()const{return m_verbose;};
// -----------------------------------------------------------------------
// Implementations of the SAX DocumentHandler interface
// -----------------------------------------------------------------------
void endDocument();
void endElement(XMLCh const* const name);
void characters(XMLCh const* const chars, unsigned int const length);
void processingInstruction(XMLCh const* const target, XMLCh const* const data);
void startDocument();
void startElement(XMLCh const* const name, AttributeList & attributes);
// -----------------------------------------------------------------------
// Implementations of the SAX ErrorHandler interface
// -----------------------------------------------------------------------
void warning(SAXParseException const& exc);
void error(SAXParseException const& exc);
void fatalError(SAXParseException const& exc);
// -----------------------------------------------------------------------
// Utility methods
// -----------------------------------------------------------------------
bool
expect_class()
{
if(m_stack.empty()) return true;
parse_event_state & e = m_stack.back();
return !e.from_class_event;
};
std::string
short_uri(std::string const& uri)
{
std::string::size_type pos = 0;
if(uri[0] == '#') {
return m_xmlns + uri.substr(1);
}
if(uri.find("http:") == 0 and (pos=uri.find('#'))!=std::string::npos) {
std::string prefix = uri.substr(0, pos+1);
str_map_type::const_iterator itor = m_xml_ns_rmap.find(prefix);
if(itor != m_xml_ns_rmap.end()) {
return itor->second + uri.substr(pos+1);
}
}
return uri;
};
index_t
get_resource(str_map_type const& map)
{
str_map_type::const_iterator end = map.end();
str_map_type::const_iterator itor;
itor = map.find(m_rdf_id);
if(itor != end) {
std::string top_label = itor->second;
index_t index = m_graph_p->create_resource_as_index(m_xmlns + top_label);
// usually rdf::ID only show up one per resource without the '#'
// so take opportunity to add a label - use top:label rather than rdf:label in case one exist
// in the ontology if the xmlns is not empty (to avoid collision with resource name.)
if(m_xmlns.size() > 0) {
m_graph_p->insert(index, m_top_label, m_graph_p->create_literal_as_index(top_label, top_label));
}
return index;
}
itor = map.find(m_rdf_res);
if(itor != end) return m_graph_p->create_resource_as_index(short_uri(itor->second));
itor = map.find(m_rdf_about);
if(itor != end) {
std::string v = short_uri(itor->second);
boost::trim(v);
if(v == "") v = m_model_xml_base;
return m_graph_p->create_resource_as_index(v);
}
return NULL;
}
index_t
get_subject(str_map_type const& map)
{
index_t i = get_resource(map);
if(!i) return m_graph_p->create_bnode_as_index();
return i;
}
index_t
get_resource(std::string & elm_name)
{
// if(elm_name.find(':')==std::string::npos and elm_name[0]!='#') elm_name = "#"+elm_name;
return m_graph_p->create_resource_as_index(elm_name);
};
int
get_data_type(str_map_type const& map)
{
int type = 0;
str_map_type::const_iterator end = map.end();
str_map_type::const_iterator itor = map.find(m_rdf_datatype);
if(itor != end) {
if(itor->second.find(m_int_num) != std::string::npos) type = 1;
else if(itor->second.find(m_non_neg_num) != std::string::npos) type = 1;
}
return type;
};
bool
has_collection_parse_type(str_map_type const& map)
{
str_map_type::const_iterator end = map.end();
str_map_type::const_iterator itor = map.find(m_parse_type);
if(itor != end) return itor->second == m_rdf_collection;
return false;
};
bool
validate_tag(std::string elm_name, bool for_class_event)
{
// if this is a class event then we don't expect
// to have the elm_name to be a known predicate name
if(for_class_event) {
if(m_known_predicate_name.find(elm_name) != m_known_predicate_name.end()) return false;
} else {
if(m_known_class_name.find(elm_name) != m_known_class_name.end()) return false;
}
return true;
};
private:
rdf::rdf_graph_ptr_type m_graph_p;
stack_type m_stack;
str_map_type m_xml_ns_rmap;
str_set_type m_known_class_name;
str_set_type m_known_predicate_name;
std::string m_model_xml_base;
std::string m_xmlns;
bool m_namespace_done;
index_t m_rdf_type;
index_t m_owl_thing;
index_t m_rdf_description;
index_t m_top_label;
std::string m_rdf_id;
std::string m_rdf_res;
std::string m_rdf_about;
std::string m_rdf_datatype;
std::string m_non_neg_num;
std::string m_int_num;
std::string m_parse_type;
std::string m_rdf_collection;
bool m_verbose;
};
/////////////////////////////////////////////////////////////////////////////////////////
// process_imported_owl_model
//
// Utility function to print info about imported model
/////////////////////////////////////////////////////////////////////////////////////////
void
process_imported_owl_model(rdf::rdf_graph_ptr_type & graph_p, bool verbose);
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model
//
// main function to import owl model
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model(std::string const& fname, bool verbose=false);
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model
//
// main function to import owl model - importing to the provided graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model(std::string const& fname, rdf::rdf_graph_ptr_type & graph_p, bool verbose=false);
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model_membuffer
//
// main function to import owl model - importing to the provided graph from memory buffer
// unstead of from file
// Import into new graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model_membuffer(std::string const& buffer, bool verbose=false);
/////////////////////////////////////////////////////////////////////////////////////////
// import_owl_model_membuffer
//
// main function to import owl model - importing to the provided graph from memory buffer
// unstead of from file
// Import into existing graph
/////////////////////////////////////////////////////////////////////////////////////////
rdf::rdf_graph_ptr_type
import_owl_model_membuffer(std::string const& buffer, rdf::rdf_graph_ptr_type & graph_p, bool verbose=false);
/////////////////////////////////////////////////////////////////////////////////////////
// process_file
//
// process .trd file
/////////////////////////////////////////////////////////////////////////////////////////
void
process_file(std::string const& fname, rdf::rdf_graph_ptr_type & graph_p, bool verbose=false);
/////////////////////////////////////////////////////////////////////////////////////////
// process_membuffer
//
// parse the mem buffer containing the a trd file.
/////////////////////////////////////////////////////////////////////////////////////////
void
process_membuffer(std::string const& buffer, rdf::rdf_graph_ptr_type & graph_p, bool verbose=false);
}; /* xml namespace */
}; /* parser namespace */
#endif /*OWL_IMPORT_H_*/