If you're interested in functional programming, you might also want to checkout my second blog which i'm actively working on!!

Thursday, September 13, 2012

Using properties from within XQuery modules

Today I ran into an interesting issue. Sedna XMLDB does not support declaring external variables. But sometimes we need to have access to properties, even from a stored xquery module. We could hardcode values but some values are environment specific so no luck there.

So I was scratching my hair today and decided to get lucky on the Sedna mailinglist. Charles Foster was kind enough to share some ideas and one idea was storing the properties in the XMLDB itself.

So I quickly hacked together some prototype properties xquery support library which offers similar functionality to the Cocoon-Spring-Configurator. We can specify generic properties on the highest level and also specify environment specific properties. The nice thing is we can actually always pas the environment (prod / test/ dev) and if it can't find the property it will default to searching for a generic property by that name.
<?xml version="1.0" encoding="UTF-8"?>
<properties id="test-suite">
    <!-- default properties -->
    <property id="string1">this is text</property>
    <property id="boolean_false">false</property>
    <property id="boolean_true">true</property>
    <property id="int">5</property>
    <property id="double">3.56</property>
    <property id="decimal">6.23</property>
    <property id="float">002002.270</property>
    <property id="time">12:20:46.275+01:00</property>
    <property id="dateTime">2002-12-07T12:20:46.275+01:00</property>
    <property id="date">2002-12-07</property>
    <property id="duration">P30Y243D</property>
    <property id="anyURI">http://www.google.com</property>
    <environment id="prod">
        <property id="base_uri">http://nww.prod.spider.nxp.com</property>
        <property id="port">8513</property>
    </environment>
</properties>

Properties XQuery library:
module namespace properties = "http://www.nxp.com/properties";

declare function properties:getPropertyFiles() as element(properties)* {
    collection("properties")/properties
};

declare function properties:getPropertyFile($id as xs:string) as element(properties)? {
    properties:getPropertyFiles()[@id=$id]
};

(:  properties:getProperty("test-suite.string1") :)
declare function properties:getProperty($expr as xs:string) {
   properties:getProperty($expr, ())
};

(: properties:getProperty("test-suite.base_uri", "test") :)
declare function properties:getProperty($expr as xs:string, $env as xs:string?) {
    let $tokens := tokenize($expr, "\.")
    let $fileId := $tokens[1]
    let $propertyId := $tokens[2]
    let $property := if (exists($env) and exists(properties:getPropertyFile($fileId)/environment[@id=$env]/property[@id=$propertyId]))
       then properties:getPropertyFile($fileId)/environment[@id=$env]/property[@id=$propertyId]
       else properties:getPropertyFile($fileId)/property[@id=$propertyId]
    return  if (exists($property)) then data($property)
            else fn:error(fn:QName('http://www.nxp.com/error', 'properties:doesNotExist'), concat('Property ', $expr, ' does not exist'))
};

import module namespace properties = "http://www.nxp.com/properties";

<test-suite>
  <test>test-suite.base_uri      = {properties:getProperty("test-suite.base_uri", "prod")}</test>
  <test>test-suite.port          = {properties:getProperty("test-suite.port", "prod")}</test>  
  <test>test-suite.string1       = {properties:getProperty("test-suite.string1", "prod")}</test>
  <test>test-suite.boolean_false = {properties:getProperty("test-suite.boolean_false")}</test>
  <test>test-suite.boolean_true  = {properties:getProperty("test-suite.boolean_true")}</test>
  <test>test-suite.int           = {properties:getProperty("test-suite.int")}</test>
  <test>test-suite.double        = {properties:getProperty("test-suite.double")}</test>
  <test>test-suite.decimal       = {properties:getProperty("test-suite.decimal")}</test>
  <test>test-suite.float         = {properties:getProperty("test-suite.float")}</test> 
  <test>test-suite.time          = {properties:getProperty("test-suite.time")}</test>
  <test>test-suite.date          = {properties:getProperty("test-suite.date")}</test> 
  <test>test-suite.dateTime      = {properties:getProperty("test-suite.dateTime")}</test>
  <test>minutes from test-suite.dateTime = {minutes-from-dateTime(properties:getProperty("test-suite.dateTime"))}</test>
  <test>test-suite.anyURI        = {properties:getProperty("test-suite.anyURI")}</test>
  <test>test-suite.duration      = {properties:getProperty("test-suite.duration")}</test>
  <test>year from test-suite.duration      = {years-from-duration(properties:getProperty("test-suite.duration"))}</test>
</test-suite>
Output from test-suite
<test-suite>
  <test>test-suite.base_uri      = http://nww.prod.spider.nxp.com</test>
  <test>test-suite.port          = 8513</test>
  <test>test-suite.string1       = this is text</test>
  <test>test-suite.boolean_false = false</test>
  <test>test-suite.boolean_true  = true</test>
  <test>test-suite.int           = 5</test>
  <test>test-suite.double        = 3.56</test>
  <test>test-suite.decimal       = 6.23</test>
  <test>test-suite.float         = 002002.270</test>
  <test>test-suite.time          = 12:20:46.275+01:00</test>
  <test>test-suite.date          = 2002-12-07</test>
  <test>test-suite.dateTime      = 2002-12-07T12:20:46.275+01:00</test>
  <test>minutes from test-suite.dateTime = 20</test>
  <test>test-suite.anyURI        = http://www.google.com</test>
  <test>test-suite.duration      = P30Y243D</test>
  <test>year from test-suite.duration      = 30</test>
</test-suite>

Now let's try and see what happens if we access a non existing property.
import module namespace properties = "http://www.nxp.com/properties";

<test-suite>
  <test>should result in exception = {properties:getProperty("test-suite.nonexisting", "prod")}</test>
</test-suite>

2012/09/14 09:40:17 database query/update failed (SEDNA Message: ERROR doesNotExist
    Property test-suite.nonexisting does not exist
)

Now we only need to make sure that the correct environment is passed. As all our environments use a different database we only need to store a specific constants library in each database and we're good to go.
module namespace constants = "http://www.nxp.com/constants";

declare variable $constants:ENVIRONMENT as xs:string := "test";

So now we can rewrite our little test-suite to use this constant
import module namespace properties = "http://www.nxp.com/properties";
import module namespace constants = "http://www.nxp.com/constants";

<test-suite>
  <test>test-suite.boolean_false = {properties:getProperty("test-suite.boolean_false", $constants:ENVIRONMENT)}</test>
</test-suite>

Wednesday, September 12, 2012

How to easily deal with timestamp based URLs

Let me shortly describe the problem. You have to fetch data from some website which exposes the data based upon timestamped URL's. For the below use case the report is generated weekly on monday.

An example {server}:{port}/exports/classificationreport_20120924.csv

So you can't exactly hardcode the URL as a property but will need to generate it dynamically whenever a request is made to that resource. I've already used joda-time in the past so I decided to use it again.
public String getClassificationReportURL() {
   MutableDateTime now = new MutableDateTime();
   now.setDayOfWeek(DateTimeConstants.MONDAY);
   DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyyMMdd");
   return getExportsBaseURL() + "classificationreport_" + fmt.print(now) + ".csv";
}

Monday, August 27, 2012

Still using XSLT1.0? Time to start using Saxon.


Folder structure:
   - input
        - jsonxml-1.xml
        - jsonxml-2.xml
        - jsonxml-3.xml
   - xslt
        - jsonxmltransformer.xslt
   - output (empty)

Below some basic usage instructions. For more details checkout the official documentation. You can download the saxon.jar from the official saxon home page or from this maven repository
java -jar Saxon-HE-9.4.jar [options] [params]

-s:filename    -- Identifies the source file or directory
-o:filename    -- Send output to named file. In the absence of this option, the results go to standard output.
                  If the source argument identifies a directory, this option is mandatory and must also identify a directory; 
                  on completion it will contain one output file for each file in the source directory
-threads:N     -- Used only when the -s option specifies a directory. Controls the number of threads used to process the files in the directory
-xsl:filename  -- Specifies the file containing the principal stylesheet module

Now let's see how easy it is to transform a single file jsonxml-1.xml and save the result to transformed-result1.xml
java -jar Saxon-HE-9.4.jar -s:C:/tmp/easytransform/input/jsonxml-1.xml -o:C:/tmp/easytransform/output/transformed-result1.xml -xsl:C:/tmp/easytransform/xslt/jsonxmltransformer.xslt
That was easy enough. But suppose we want to transform a complete directory of source files?
java -jar Saxon-HE-9.4.jar -s:C:/tmp/easytransform/input -o:C:/tmp/easytransform/output -xsl:C:/tmp/easytransform/xslt/jsonxmltransformer.xslt
This will by convention save the transformed results using the same filenames as the input files to the specified output directory.

Thursday, August 23, 2012

XML Database as source for RDF Database

We've come a long way reading and transforming XML resources from a plain filesystem to setting up an XML database and executing sophisticated cross collection xqueries. As we aim to always improve our role as information providers we are on the verge of switching to one-stop-shopping. Currently we only have a part of the masterdata stored in the XMLDB. And we already are able to
  • generate DITA maps / topics  (PDF creation,  automated translations, ...)
  • generate publications (xhtml)
  • answer data related question in real time
The main components in this architectural picture are
  • Websphere product centre  (exports productinformation as XML) (soon to change)
  • Apache Cocoon (main framework that does all of the above)
  • Sedna (XMLDB)

But to get one-stop-shopping we need a more flexible way to link data from different sources (RDBMS, XMLDB, CSV, ...)

We will automate data extraction for all information resources and transform that data into RDF so it becomes easy to link the data and offer a consistent way of quering the data (SPARQL endpoint)

Below an example of an XQuery library from which we can generate RDF from the XMLDB.
module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";

declare copy-namespaces preserve, inherit;

declare namespace rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
declare namespace skos="http://www.w3.org/2004/02/skos/core#";
declare namespace foaf="http://xmlns.com/foaf/0.1/";
declare namespace nxp="http://purl.org/nxp/schema/v1/";

import module namespace basictypes = "http://www.nxp.com/basictypes";
import module namespace string = "http://www.nxp.com/string";
import module namespace rdfutil = "http://www.nxp.com/rdfutil";
import module namespace packages2rdf = "http://www.nxp.com/packages2rdf";


declare function basictypes2rdf:fromBasicTypesRaw($products as element(Product)*) as element(rdf:Description)* {
    for $product in $products
    let $btn := basictypes:getName($product)
    return
    <rdf:Description rdf:about="{basictypes2rdf:getURI($product)}">
      <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
      <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{data($product/ProductInformation/ProductStatusDate)}</nxp:productStatusDate>
      <skos:prefLabel xml:lang="en-us">{data($product/ProductInformation/Description)}</skos:prefLabel>
      <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/{string:toCamelCase(lower-case(data($product/ProductInformation/ProductStatus)))}"/>
      <foaf:homepage rdf:resource="http://www.nxp.com/pip/{$btn}"/>
      <nxp:typeNumber>{$btn}</nxp:typeNumber>
      {
        if (exists($product/ProductInformation/PackageID))
        then <nxp:mechanicalOutline rdf:resource="{packages2rdf:getURI(basictypes:getPackage($product))}"/>
        else ()
      }
    </rdf:Description>
};

declare function basictypes2rdf:fromBasicTypes($products as element(Product)*) as element(rdf:RDF) {
    rdfutil:wrapRDF(basictypes2rdf:fromBasicTypesRaw($products))
};

declare function basictypes2rdf:fromBasicTypeIds($ids as xs:string*) as element(rdf:RDF) {
    basictypes2rdf:fromBasicTypes(basictypes:filterBySet(basictypes:getBasicTypes(), $ids))
};

declare function basictypes2rdf:getURI($product as element(Product)) as xs:anyURI {
    rdfutil:getURI("basic_types", data($product/ProductInformation/Name))
};

(:
    Usages:
    basictypes2rdf:fromBasicTypes(basictypes:getBasicType("PH3330L"))
    basictypes2rdf:fromBasicTypes(basictypes:getBasicTypes()[ProductInformation/PIPType = 0])
    basictypes2rdf:fromBasicTypeIds(("PH3330L","PH3330CL"))
:)
Following expression will produce the output below:
import module namespace basictypes2rdf = "http://www.nxp.com/basictypes2rdf";
basictypes2rdf:fromBasicTypeIds("PH3330L")

<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:nxp="http://purl.org/nxp/schema/v1/">
  <rdf:Description rdf:about="http://data.nxp.com/id/basic_types/ph3330l">
    <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
    <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-28</nxp:productStatusDate>
    <skos:prefLabel xml:lang="en-us">N-channel TrenchMOS logic level FET</skos:prefLabel>
    <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/endOfLife"/>
    <foaf:homepage rdf:resource="http://www.nxp.com/pip/PH3330L"/>
    <nxp:typeNumber>PH3330L</nxp:typeNumber>
    <nxp:mechanicalOutline rdf:resource="http://data.nxp.com/id/package_outline_versions/sot669"/>
  </rdf:Description>
</rdf:RDF>
And you should validate the output just to make sure.

Using SPARQL describe queries

This post will show how to use describe queries from the stardog CLI but the query is not database specific. You can actually export the triples in different formats:
  • NTRIPLES
  • RDFXML
  • TURTLE
  • TRIG
  • TRIX
  • N3
  • NQUADS
Let's try out a simple describe query in 2 formats.

$ ./stardog query  -c http://localhost:5822/nxp -q "DESCRIBE <http://data.nxp.com/basicTypes/PH3330L>" -f RDFXML

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
  xmlns:owl="http://www.w3.org/2002/07/owl#"
  xmlns:foaf="http://xmlns.com/foaf/0.1/"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
  xmlns:skos="http://www.w3.org/2004/02/skos/core#"
  xmlns:nxp="http://purl.org/nxp/schema/v1/"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/PH3330L">
  <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
  <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-28</nxp:productStatusDate>
  <skos:prefLabel xml:lang="en-us">N-channel TrenchMOS logic level FET</skos:prefLabel>
  <nxp:productStatus rdf:resource="http://purl.org/nxp/schema/v1/endOfLife"/>
  <foaf:homepage rdf:resource="http://www.nxp.com/pip/PH3330L"/>
  <nxp:typeNumber>PH3330L</nxp:typeNumber>
  <nxp:mechanicalOutline rdf:resource="http://data.nxp.com/packageOutlineVersion/SOT669"/>
</rdf:Description>

</rdf:RDF>

$ ./stardog query  -c http://localhost:5822/nxp -q "DESCRIBE <http://data.nxp.com/basicTypes/PH3330L>" -f TURTLE

@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix nxp: <http://purl.org/nxp/schema/v1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<http://data.nxp.com/basicTypes/PH3330L> a nxp:BasicType ;
        nxp:productStatusDate "2011-10-28"^^xsd:date ;
        skos:prefLabel "N-channel TrenchMOS logic level FET"@en-us ;
        nxp:productStatus nxp:endOfLife ;
        foaf:homepage <http://www.nxp.com/pip/PH3330L> ;
        nxp:typeNumber "PH3330L" ;
        nxp:mechanicalOutline <http://data.nxp.com/packageOutlineVersion/SOT669> .

You can also return ALL object graphs of a specific type. This will wrap all descriptions in a rdf:RDF tag.
./stardog query  -c http://localhost:5822/nxp -f RDFXML -q "
PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
DESCRIBE ?s
WHERE {
  ?s a nxp:BasicType.
}
" 

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
 xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
 xmlns:owl="http://www.w3.org/2002/07/owl#"
 xmlns:foaf="http://xmlns.com/foaf/0.1/"
 xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
 xmlns:skos="http://www.w3.org/2004/02/skos/core#"
 xmlns:nxp="http://purl.org/nxp/schema/v1/"
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/74AUP1G57GW">
 <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
 <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-20</nxp:productStatusDate>
    ...
</rdf:Description>

<rdf:Description rdf:about="http://data.nxp.com/basicTypes/74AUP1G58GW">
 <rdf:type rdf:resource="http://purl.org/nxp/schema/v1/BasicType"/>
 <nxp:productStatusDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2011-10-14</nxp:productStatusDate>
    ...
</rdf:Description>

</rdf:RDF>

Tuesday, August 21, 2012

Stardog command line interface

There are two CLI's available:
  • stardog-admin: admininstrative client (uses SNARL Protocol only)
  • stardog: a user's client (uses HTTP or SNARL)

$ ./stardog help
Stardog 1.0.4 command line client

Type 'help <cmd>' or '<cmd> -h/--help' to print the usage information for a specific command

Type stardog [subcommand] [args]'

Available commands:
        add
        consistency
        explain inference
        explain plan
        export
        icv convert
        icv validate
        namespace add
        namespace list
        namespace remove
        passwd
        query
        remove
        search
        status

For more information on this library, visit the home page at http://stardog.com/docs/
For information on Stardog, please visit http://stardog.com

$ ./stardog-admin help
Stardog 1.0.4 command line client

Type 'help <cmd>' or '<cmd> -h/--help' to print the usage information for a specific command

Type stardog-admin [global args] [subcommand] [args]'

The global commands are --home, --disable-security, --logfile. See docs for more info.

Available commands:
        copy
        create
        drop
        icv add
        icv drop
        icv remove
        list
        metadata get
        metadata set
        migrate
        offline
        online
        optimize
        passwd
        role add
        role drop
        role grant
        role list
        role permission
        role revoke
        server start
        server stop
        user add
        user drop
        user edit
        user grant
        user list
        user permission
        user revoke

For more information on this library, visit the home page at http://stardog.com/docs/
For information on Stardog, please visit http://stardog.com

Suppose we want to get detailed info about using the 'user list' command.
$ ./stardog-admin help user list
Usage: user list [options]

Lists all users.

Valid Options:
        [--all, -A]                    : Be verbose with user info.

        [--ask-password, -P]           : Prompt for password.

        --format, -f arg               : Format for the output [TEXT, CSV, HTML]

        [--help, -h]                   : Display usage information

        [--passwd, -p arg]             : Password

        [--server arg]                 : URL of Stardog Server. If this option isn't specified, it will be read from JVM argument 'stardog.default.cli.server'. If the JVM arg isn't set, the default value 'snarl://localhost:5820' is used. If server URL
has no explicit port value, the default port value '5820' is used.

        [--username, -u arg]           : User name

Stardog Quick-start notes (Windows)

  • Laptop: Windows 7, 64 bit, intel Core i5-2520M CPU @2.5Ghz, 8GB RAM, 120 GB SSD
  • set STARDOG_HOME variable to e.g. c:/development/stardog-1.0.4 
  • copy the license file over to STARDOG_HOME
  • start the server
$ ./stardog-admin server start
   ************************************************************
   Stardog server 1.0.4 started on Tue Aug 21 11:02:34 CEST 2012.

   SNARL server running on snarl://localhost:5820/
   HTTP server running on http://localhost:5822/.
   Stardog documentation accessible at http://localhost:5822/docs
   SNARL & HTTP servers listening on all interfaces

   STARDOG_HOME=C:\development\stardog-1.0.4
   ************************************************************
  • create a database from input file
  $ ./stardog-admin create -n nxp  -t D -u admin -p admin --server snarl://localhost:5820/ c:/testdata/products.rdf
Bulk loading data to new database.
Data load complete. Loaded 340,819 triples in 00:00:04 @ 69.4K triples/sec.
Successfully created database 'nxp'.  
  • query the database:
  
$ ./stardog query -c http://localhost:5822/nxp -q "
  PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
  PREFIX skos:  <http://www.w3.org/2004/02/skos/core#>
  SELECT ?typeNumber ?prefLabel
  WHERE
  {
    ?x nxp:typeNumber ?typeNumber;
       skos:prefLabel ?prefLabel .
  } 
  LIMIT 10
  " 
Executing Query:

PREFIX nxp:   <http://purl.org/nxp/schema/v1/>
PREFIX skos:  <http://www.w3.org/2004/02/skos/core#>
SELECT ?typeNumber ?prefLabel
WHERE
{
  ?x nxp:typeNumber ?typeNumber;
     skos:prefLabel ?prefLabel .
}
LIMIT 10

+------------------+---------------------------------------------------------------+
|    typeNumber    |                           prefLabel                           |
+------------------+---------------------------------------------------------------+
| "74AUP1G57GW"    | "Low-power configurable multiple function gate"@en-us         |
| "74AUP1G58GW"    | "Low-power configurable multiple function gate"@en-us         |
| "74AUP1T45GW"    | "Low-power dual supply translating transeiver; 3-state"@en-us |
| "74AUP2G32GN"    | "Dual2-inputORgate (IMPULSE)"@en-us                           |
| "74AUP2G32GS"    | "Dual2-inputORgate (IMPULSE)"@en-us                           |
| "74AVC16T245DGG" | "74AVC16T245DGG (IMPULSE)"@en-us                              |
| "74AVC16T245DGV" | "16-Bit Dual-SupplyTx/Rx w/3-State (IMPULSE)"@en-us           |
| "74AVC2T45DC"    | "74AVC2T45DC (IMPULSE)"@en-us                                 |
| "74AVC2T45DP"    | "2-bit Dual-supply translator (IMPULSE)"@en-us                |
| "74AVC2T45GD"    | "2-bit Dual-supply translator (IMPULSE)"@en-us                |
+------------------+---------------------------------------------------------------+

Query returned 10 results in 00:00:00.124

Monday, August 20, 2012

Some extra useful String functions (XQuery)

module namespace string = "http://www.nxp.com/string";

(:
  string:capitalize("test")  -->  "Test" 
:)
declare function string:capitalize($string as xs:string) as xs:string {
    let $tokens := string:split($string)
    return concat(upper-case($tokens[1]), string-join(subsequence($tokens, 2), ''))
};

(:
    string:capitalizeAll("makes you wonder")  --> "Makes You Wonder"
:)
declare function string:capitalizeAll($string as xs:string) as xs:string {
   string-join(for $word in string:splitWords($string) return string:capitalize($word), ' ') 
};

(:
   string:split("work") --> ("w", "o", "r", "k")
:)
declare function string:split($string as xs:string) as xs:string* {
    for $codepoint in string-to-codepoints($string) return codepoints-to-string($codepoint)
};

(:
   string:splitWords("go live")  --> ("go", "live")
:)
declare function string:splitWords($string as xs:string) as xs:string* {
    tokenize($string, "\s+")
};

(: 
  string:toCamelCase("business segment") --> "businessSegment" 
:)
declare function string:toCamelCase($string as xs:string) as xs:string {
    let $words := string:splitWords($string)
    return string-join(($words[1], for $word in subsequence($words, 2) return string:capitalize($word)), '')
};

Wednesday, July 4, 2012

Generating XQueries from XML based definitions using XSLT2.0

I'm currently working on a project where the customer wants to be able to specify what properties on a per category level of products have to be shown in a parametric table. The parametric header definition contains things like column names, tooltips, ordering, databinding and so on. This blog shows the current approach I took to dynamically generate an XQuery to generate preview table from that header definition. It's definitely not the final result but that is also not in my scope and will be handled by another team of developers. Just as a side note... All this will be automated using Apache Cocoon.

Parametric Header definition (XML input)

Header transformer (XSLT)

Generated XQuery

Preview table image

Final table image

Tuesday, July 3, 2012

Splitting XML file into multiple files using XSLT2.0

Suppose we have 1 folder where a manifest.xml is stored and some other files (basictypes.xml and packages.xml) which are referenced by the manifest file. These files contain multiple objects of a specific type and we want to split those in separate files. There are some hurdles to overcome:
  •  As some objects are logically duplicates (same identifier) which would be written to the same URI this would result in an exception.
SystemID: C:\pelssers\demo\manifest_transformer.xsl
Engine name: Saxon-HE 9.3.0.5
Severity: fatal
Description: Cannot write more than one result document to the same URI: file:/c:/pelssers/demo/export/basictypes/PH3330L.xml
Start location: 27:0
URL: http://www.w3.org/TR/xslt20/#err-XTDE1490
  • Second difficulty is that they are not identifiable with the same xpath-expression so to use 1 single group-by declaration for this heterogeneous bunch of elements needed a bit of thinking. I had to resort to a "Generic" function that would delegate to matching templates for the specific type of element. 

 manifest.xml
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
  <file href="basictypes.xml"/>
  <file href="packages.xml"/> 
</manifest>
 
basictypes.xml
<?xml version="1.0" encoding="UTF-8"?>
<basictypes>
    <basictype identifier="PH3330L">
        <description>N-channel TrenchMOS logic level FET</description>
        <magcode>R73</magcode> 
    </basictype>
    <basictype identifier="BUK3F00-50WDFE">
        <description>9675 AUTO IC (IMPULSE)</description>
        <magcode>R73</magcode>   
    </basictype>
    <basictype identifier="PH3330L">
        <description>this is a duplicate of PH3330L</description>
        <magcode>R73</magcode>         
    </basictype>
</basictypes>

packages.xml
<?xml version="1.0" encoding="UTF-8"?>
<packages>
    <package id="SOT669">
        <description>plastic single-ended surface-mounted package; 4 leads</description>
        <name>LFPAK; Power-SO8</name> 
    </package>
    <package id="SOT600-1">
        <description>plastic thin fine-pitch ball grid array package;</description>
        <name>TFBGA208</name>   
    </package>   
</packages>

In the XSLT below I first chose a grouping strategy to resolve the error of writing duplicate items to the same URI. Next I had to use a abstract function getURI for all element cases (basictype and package) which delegates the call to matching templates of @mode="getURI". I only use @mode="write" for the first element in each group and use @mode="skip" for all subsequent elements of that group. For this purpose I only log a messsage that i'm skipping them but I could also have implemented that handler differently like writing them to another folder. Only thing I would have to make sure of would be to include some unique identifiable part in the URI. I could e.g. use generate-id().
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  xmlns:pelssers="http://robbypelssers.blogspot.com"
  version="2.0">
 
  <xsl:param name="sourceFolder" select="xs:anyURI('file:///c:/pelssers/demo/')"/>
  <xsl:param name="destinationFolder" select="xs:anyURI('file:///c:/pelssers/demo/export/')"/>
    
  <xsl:function name="pelssers:getURI" as="xs:anyURI">
    <xsl:param name="element" as="element()"/> 
    <xsl:apply-templates select="$element" mode="getURI"/>  
  </xsl:function>  
    
  <xsl:template match="/">
   <xsl:variable name="elements" select="for $doc in (for $href in manifest/file/@href return document(xs:anyURI(concat($sourceFolder, $href)))    ) return $doc/*/*"/> 
   <xsl:for-each-group select="$elements" group-by="pelssers:getURI(.)">
     <xsl:apply-templates select="current-group()[1]" mode="write"/>
     <xsl:apply-templates select="subsequence(current-group(), 2)" mode="skip"/>
   </xsl:for-each-group> 
  </xsl:template>
  
  <xsl:template match="basictype | package" mode="write">
    <xsl:variable name="uri" select="pelssers:getURI(.)"/>
    <xsl:message>Processing <xsl:value-of select="local-name()"/> to URI <xsl:value-of select="$uri"/> </xsl:message>
    <xsl:result-document method="xml" href="{$uri}">
      <xsl:element name="{../local-name()}">
        <xsl:apply-templates select="../@*"/>
        <xsl:copy-of select="."/>
      </xsl:element>
    </xsl:result-document>    
  </xsl:template> 
  
  <xsl:template match="basictype | package" mode="skip">  
    <xsl:variable name="uri" select="pelssers:getURI(.)"/>
    <xsl:message>Warning !! Skipping duplicate <xsl:value-of select="local-name()"/> with URI <xsl:value-of select="$uri"/> </xsl:message>    
  </xsl:template>  
  
  <xsl:template match="basictype" as="xs:anyURI" mode="getURI">
    <xsl:sequence select="xs:anyURI(concat($destinationFolder, 'basictypes/', @identifier, '.xml'))"/>
  </xsl:template>
  
  <xsl:template match="package" as="xs:anyURI" mode="getURI">
    <xsl:sequence select="xs:anyURI(concat($destinationFolder, 'packages/', @id, '.xml'))"/>
  </xsl:template>

</xsl:stylesheet>

The output of running this transformation nicely reports what's happening.
[Saxon-HE] Processing basictype to URI file:///c:/pelssers/demo/export/basictypes/PH3330L.xml
[Saxon-HE] Warning !! Skipping duplicate basictype with URI file:///c:/pelssers/demo/export/basictypes/PH3330L.xml
[Saxon-HE] Processing basictype to URI file:///c:/pelssers/demo/export/basictypes/BUK3F00-50WDFE.xml
[Saxon-HE] Processing package to URI file:///c:/pelssers/demo/export/packages/SOT669.xml
[Saxon-HE] Processing package to URI file:///c:/pelssers/demo/export/packages/SOT600-1.xml