cse 6331 © leonidas fegaras xml tools1 xml tools leonidas fegaras
TRANSCRIPT
CSE 6331 © Leonidas Fegaras XML Tools 1
XML Tools
Leonidas Fegaras
CSE 6331 © Leonidas Fegaras XML Tools 2
XML Processing
documentparser
documentvalidator
applicationXMLdocument
XMLinfoset
XMLinfoset(annotated)
Well-formedness checksReference expansion
DTD or XML schema storagesystem
CSE 6331 © Leonidas Fegaras XML Tools 3
DOM
The Document Object Model (DOM) is a platform- and language-neutral interface that allows programs and scripts to dynamically access and update the content and structure of XML documents. The following is part of the DOM interface:
public interface Node {public String getNodeName ();public String getNodeValue ();public NodeList getChildNodes ();public NamedNodeMap getAttributes ();
}public interface Element extends Node {
public Node getElementsByTagName ( String name );}public interface Document extends Node {
public Element getDocumentElement ();}public interface NodeList { public int getLength (); public Node item ( int index );}
CSE 6331 © Leonidas Fegaras XML Tools 4
DOM Example
import java.io.File;import javax.xml.parsers.*;import org.w3c.dom.*;
class Test {public static void main ( String args[] ) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();DocumentBuilder db = dbf.newDocumentBuilder();Document doc = db.parse(new File("depts.xml"));NodeList nodes = doc.getDocumentElement().getChildNodes();for (int i=0; i<nodes.getLength(); i++) { Node n = nodes.item(i); NodeList ndl = n.getChildNodes(); for (int k=0; k<ndl.getLength(); k++) {
Node m = ndl.item(k); if ( (m.getNodeName() == "dept")
&& (m.getFirstChild().getNodeValue() == "cse") ) { NodeList ncl = ((Element) m).getElementsByTagName("tel"); for (int j=0; j<ncl.getLength(); j++) {
Node nc = ncl.item(j); System.out.print(nc.getFirstChild().getNodeValue());
} } } } } }
CSE 6331 © Leonidas Fegaras XML Tools 5
Better Programming
import java.io.File;import javax.xml.parsers.*;import org.w3c.dom.*;import java.util.Vector;
class Sequence extends Vector {
Sequence () { super(); }
Sequence ( String filename ) throws Exception { super(); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File(filename)); add((Object) doc.getDocumentElement()); }
Sequence child ( String tagname ) { Sequence result = new Sequence(); for (int i = 0; i<size(); i++) { Node n = (Node) elementAt(i); NodeList c = n.getChildNodes(); for (int k = 0; k<c.getLength(); k++) if (c.item(k).getNodeName().equals(tagname)) result.add((Object) c.item(k)); }; return result; }
void print () { for (int i = 0; i<size(); i++) System.out.println(elementAt(i).toString()); }}
class DOM {
public static void main ( String args[] ) throws Exception {
(new Sequence("cs.xml")).child("gradstudent").child("name").print();
}
}
CSE 6331 © Leonidas Fegaras XML Tools 6
SAX
• SAX is the Simple API for XML that allows you to process a document as it's being read– in contrast to DOM, which requires the entire document to be read before
it takes any action)
• The SAX API is event based– The XML parser sends events, such as the start or the end of an element,
to an event handler, which processes the information
CSE 6331 © Leonidas Fegaras XML Tools 7
Parser Events
• Receive notification of the beginning of a documentvoid startDocument ()
• Receive notification of the end of a documentvoid endDocument ()
• Receive notification of the beginning of an elementvoid startElement ( String namespace, String localName,
String qName, Attributes atts )
• Receive notification of the end of an elementvoid endElement ( String namespace, String localName,
String qName )
• Receive notification of character datavoid characters ( char[] ch, int start, int length )
CSE 6331 © Leonidas Fegaras XML Tools 8
SAX Example: a Printer
import java.io.FileReader;import javax.xml.parsers.*;import org.xml.sax.*;import org.xml.sax.helpers.*;
class Printer extends DefaultHandler { public Printer () { super(); } public void startDocument () {} public void endDocument () { System.out.println(); } public void startElement ( String uri, String name, String tag, Attributes atts ) { System.out.print(“<” + tag + “>”); } public void endElement ( String uri, String name, String tag ) { System.out.print(“</”+ tag + “>”); } public void characters ( char text[], int start, int length ) { System.out.print(new String(text,start,length)); }}
CSE 6331 © Leonidas Fegaras XML Tools 9
The Child Handler
class Child extends DefaultHandler {
DefaultHandler next; // the next handler in the pipeline
String ptag; // the tagname of the child
boolean keep; // are we keeping or skipping events?
short level; // the depth level of the current element
public Child ( String s, DefaultHandler n ) {
super();
next = n; ptag = s;
keep = false; level = 0;
}
public void startDocument () throws SAXException {
next.startDocument();
}
public void endDocument () throws SAXException {
next.endDocument();
}
CSE 6331 © Leonidas Fegaras XML Tools 10
The Child Handler (cont.)
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (level++ == 1)
keep = ptag.equals(qn);
if (keep)
next.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (keep)
next.endElement(nm,ln,qn);
if (--level == 1)
keep = false;
}
public void characters ( char[] text, int start, int length ) throws SAXException {
if (keep)
next.characters(text,start,length);
}
}
CSE 6331 © Leonidas Fegaras XML Tools 11
Forming the Pipeline
class SAX {
public static void main ( String args[] ) throws Exception {
SAXParserFactory pf = SAXParserFactory.newInstance();
SAXParser parser = pf.newSAXParser();
DefaultHandler handler
= new Child("gradstudent",
new Child("name",
new Printer()));
parser.parse(new InputSource(new FileReader("cs.xml")),
handler);
}
}
Child:gradstudent Child:name PrinterSAX parser
CSE 6331 © Leonidas Fegaras XML Tools 12
Example
Input Stream
<department>
<deptname>
Computer Science
</deptname>
<gradstudent>
<name>
<lastname>
Smith
</lastname>
<firstname>
John
</firstname>
</name>
</gradstudent>
...
</department>
SAX Events
SD:
SE: department
SE: deptname
C: Computer Science
EE: deptname
SE: gradstudent
SE: name
SE: lastname
C: Smith
EE: lastname
SE: firstname
C: John
EE: firstname
EE: name
EE: gradstudent
...
EE: department
ED:
Child: gradstudent Child: name Printer
CSE 6331 © Leonidas Fegaras XML Tools 13
XSL Transformation
A stylesheet specification language for converting XML documents into various forms (XML, HTML, plain text, etc).
• Can transform each XML element into another element, add new elements into the output file, or remove elements.
• Can rearrange and sort elements, test and make decisions about which elements to display, and much more.
• Based on XPath:
<xsl:stylesheet version=’1.0’
xmlns:xsl=’http//www.w3.org/1999/XSL/Transform’>
<students>
<xsl:copy-of select=”//student/name”/>
</students>
</xsl:stylesheet>
CSE 6331 © Leonidas Fegaras XML Tools 14
XSLT Templates
• XSL uses XPath to define parts of the source document that match one or more predefined templates.
• When a match is found, XSLT will transform the matching part of the source document into the result document.
• The parts of the source document that do not match a template will end up unmodified in the result document (they will use the default templates).
Form:
<xsl:template match=”XPath expression”>…
</xsl:template>
The default (implicit) templates visit all nodes and strip out all tags:<xsl:template match=”*|/”>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match=“text()|@*">
<xsl:value-of select=“.”/>
</xsl:template>
CSE 6331 © Leonidas Fegaras XML Tools 15
Other XSLT Elements
<xsl:value-of select=“XPath expression“/>
select the value of an XML element and add it to the output stream of the transformation, e.g. <xsl:value-of select="//books/book/author"/>.
<xsl:copy-of select=“XPath expression“/>copy the entire XML element to the output stream of the transformation.
<xsl:apply-templates match=“XPath expression“/>
apply the template rules to the elements that match the XPath expression.
<xsl:element name=“XPath expression“> … </xsl:element>
add an element to the output with a tag-name derived from the XPath.
Example:<xsl:stylesheet version = ’1.0’
xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’><xsl:template match="employee">
<b> <xsl:apply-templates select="node()"/> </b></xsl:template><xsl:template match="surname">
<i> <xsl:value-of select="."/> </i></xsl:template>
</xsl:stylesheet>
CSE 6331 © Leonidas Fegaras XML Tools 16
Copy the Entire Document
<xsl:stylesheet version = ’1.0’
xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’>
<xsl:template match=“/">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match=“text()">
<xsl:value-of select=“.”/>
</xsl:template>
<xsl:template match=“*">
<xsl:element name=“name(.)”>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
CSE 6331 © Leonidas Fegaras XML Tools 17
More on XSLT
• Conflict resolution: more specific templates overwrite more general templates. Templates are assigned default priorities, but they can be overwritten using priority=“n” in a template.
• Modes can be used to group together templates. No mode is an empty mode.
<xsl:template match=“…” mode=“A”>
<xsl:apply-templates mode=“B”/>
</xsl:template>
• Conditional and loop statements:<xsl:if test=“XPath predicate”> body </xsl:if>
<xsl:for-each select=“XPath”> body </xsl:for-each>
• Variables can be used to name data:<xsl:variable name=“x”> value </xsl:variable>
Variables are used as {$x} in XPaths.
CSE 6331 © Leonidas Fegaras XML Tools 18
Using XSLT
import javax.xml.parsers.*;import org.xml.sax.*;import org.w3c.dom.*;import javax.xml.transform.*;import javax.xml. . transform.dom.*;import javax.xml.transformstream.*;import java.io.*;
class XSLT { public static void main ( String argv[] ) throws Exception {
File stylesheet = new File("x.xsl");File xmlfile = new File("a.xml");DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();DocumentBuilder db = dbf.newDocumentBuilder();Document document = db.parse(xmlfile);StreamSource stylesource = new StreamSource(stylesheet);TransformerFactory tf = TransformerFactory.newInstance();Transformer transformer = tf.newTransformer(stylesource);DOMSource source = new DOMSource(document);StreamResult result = new StreamResult(System.out);transformer.transform(source,result);
}}