If you're interested in functional programming, you might also want to checkout my second blog which i'm actively working on!!

Thursday, August 23, 2012

XML Database as source for RDF Database

We've come a long way reading and transforming XML resources from a plain filesystem to setting up an XML database and executing sophisticated cross collection xqueries. As we aim to always improve our role as information providers we are on the verge of switching to one-stop-shopping. Currently we only have a part of the masterdata stored in the XMLDB. And we already are able to
  • generate DITA maps / topics  (PDF creation,  automated translations, ...)
  • generate publications (xhtml)
  • answer data related question in real time
The main components in this architectural picture are
  • Websphere product centre  (exports productinformation as XML) (soon to change)
  • Apache Cocoon (main framework that does all of the above)
  • Sedna (XMLDB)

But to get one-stop-shopping we need a more flexible way to link data from different sources (RDBMS, XMLDB, CSV, ...)

We will automate data extraction for all information resources and transform that data into RDF so it becomes easy to link the data and offer a consistent way of quering the data (SPARQL endpoint)

Below an example of an XQuery library from which we can generate RDF from the XMLDB.
module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";

declare copy-namespaces preserve, inherit;

declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace skos="http://www.w3.org/2004/02/skos/core#";
declare namespace foaf="http://xmlns.com/foaf/0.1/";
declare namespace nxp="http://purl.org/nxp/schema/v1/";

import module namespace basictypes = "http://www.nxp.com/basictypes";
import module namespace string = "http://www.nxp.com/string";
import module namespace rdfutil = "http://www.nxp.com/rdfutil";
import module namespace packages2rdf = "http://www.nxp.com/packages2rdf";


declare function basictypes2rdf:fromBasicTypesRaw($products as element(Product)*) as element(rdf:Description)* {
    for $product in $products
    let $btn := basictypes:getName($product)
    return
    <rdf:Description rdf:about="{basictypes2rdf:getURI($product)}">
      <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
      <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{data($product/ProductInformation/ProductStatusDate)}</nxp:productStatusDate>
      <skos:prefLabel xml:lang="en-us">{data($product/ProductInformation/Description)}</skos:prefLabel>
      <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/{string:toCamelCase(lower-case(data($product/ProductInformation/ProductStatus)))}"/>
      <foaf:homepage rdf:resource="http://www.nxp.com/pip/{$btn}"/>
      <nxp:typeNumber>{$btn}</nxp:typeNumber>
      {
        if (exists($product/ProductInformation/PackageID))
        then <nxp:mechanicalOutline rdf:resource="{packages2rdf:getURI(basictypes:getPackage($product))}"/>
        else ()
      }
    </rdf:Description>
};

declare function basictypes2rdf:fromBasicTypes($products as element(Product)*) as element(rdf:RDF) {
    rdfutil:wrapRDF(basictypes2rdf:fromBasicTypesRaw($products))
};

declare function basictypes2rdf:fromBasicTypeIds($ids as xs:string*) as element(rdf:RDF) {
    basictypes2rdf:fromBasicTypes(basictypes:filterBySet(basictypes:getBasicTypes(), $ids))
};

declare function basictypes2rdf:getURI($product as element(Product)) as xs:anyURI {
    rdfutil:getURI("basic_types", data($product/ProductInformation/Name))
};

(:
    Usages:
    basictypes2rdf:fromBasicTypes(basictypes:getBasicType("PH3330L"))
    basictypes2rdf:fromBasicTypes(basictypes:getBasicTypes()[ProductInformation/PIPType = 0])
    basictypes2rdf:fromBasicTypeIds(("PH3330L","PH3330CL"))
:)
Following expression will produce the output below:
import module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";
basictypes2rdf:fromBasicTypeIds("PH3330L")

<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:nxp="http://purl.org/nxp/schema/v1/">
  <rdf:Description rdf:about="http://data.nxp.com/id/basic_types/ph3330l">
    <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
    <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-28</nxp:productStatusDate>
    <skos:prefLabel xml:lang="en-us">N-channel TrenchMOS logic level FET</skos:prefLabel>
    <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/endOfLife"/>
    <foaf:homepage rdf:resource="http://www.nxp.com/pip/PH3330L"/>
    <nxp:typeNumber>PH3330L</nxp:typeNumber>
    <nxp:mechanicalOutline rdf:resource="http://data.nxp.com/id/package_outline_versions/sot669"/>
  </rdf:Description>
</rdf:RDF>
And you should validate the output just to make sure.

No comments:

Post a Comment