Document 7877727

Download Report

Transcript Document 7877727

XML Tools
Leonidas Fegaras
CSE 6331
© Leonidas Fegaras
XML Tools
1
XML Processing
Well-formedness checks
Reference expansion
XML
document
document
parser
XML
infoset
document
validator
DTD or XML schema
CSE 6331
© Leonidas Fegaras
XML Tools
XML
infoset
(annotated)
application
storage
system
2
DOM
The Document Object Model (DOM) is a platform- and language-neutral interface
that allows programs and scripts to dynamically access and update the content
and structure of XML documents. The following is part of the DOM interface:
public interface Node {
public String getNodeName ();
public String getNodeValue ();
public NodeList getChildNodes ();
public NamedNodeMap getAttributes ();
}
public interface Element extends Node {
public Node getElementsByTagName ( String name );
}
public interface Document extends Node {
public Element getDocumentElement ();
}
public interface NodeList {
public int getLength ();
public Node item ( int index );
}
CSE 6331
© Leonidas Fegaras
XML Tools
3
DOM Example
import java.io.File;
import javax.xml.parsers.*;
import org.w3c.dom.*;
class Test {
public static void main ( String args[] ) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File("depts.xml"));
NodeList nodes = doc.getDocumentElement().getChildNodes();
for (int i=0; i<nodes.getLength(); i++) {
Node n = nodes.item(i);
NodeList ndl = n.getChildNodes();
for (int k=0; k<ndl.getLength(); k++) {
Node m = ndl.item(k);
if ( (m.getNodeName() == "dept")
&& (m.getFirstChild().getNodeValue() == "cse") ) {
NodeList ncl = ((Element) m).getElementsByTagName("tel");
for (int j=0; j<ncl.getLength(); j++) {
Node nc = ncl.item(j);
System.out.print(nc.getFirstChild().getNodeValue());
} } } } } }
CSE 6331
© Leonidas Fegaras
XML Tools
4
Better Programming
import java.io.File;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import java.util.Vector;
class Sequence extends Vector {
Sequence () { super(); }
Sequence ( String filename ) throws Exception {
super();
DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File(filename));
add((Object) doc.getDocumentElement());
}
Sequence child ( String tagname ) {
Sequence result = new Sequence();
for (int i = 0; i<size(); i++) {
Node n = (Node) elementAt(i);
NodeList c = n.getChildNodes();
for (int k = 0; k<c.getLength(); k++)
if
(c.item(k).getNodeName().equals(tagname))
result.add((Object) c.item(k));
};
return result;
}
void print () {
for (int i = 0; i<size(); i++)
System.out.println(elementAt(i).toString());
}
}
class DOM {
public static void main ( String args[] ) throws Exception {
(new Sequence("cs.xml")).child("gradstudent").child("name").print();
}
}
CSE 6331
© Leonidas Fegaras
XML Tools
5
SAX
• SAX is the Simple API for XML that allows you to process a
document as it's being read
– in contrast to DOM, which requires the entire document to be read before
it takes any action)
• The SAX API is event based
– The XML parser sends events, such as the start or the end of an element, to
an event handler, which processes the information
CSE 6331
© Leonidas Fegaras
XML Tools
6
Parser Events
• Receive notification of the beginning of a document
void startDocument ()
• Receive notification of the end of a document
void endDocument ()
• Receive notification of the beginning of an element
void startElement ( String namespace, String localName,
String qName, Attributes atts )
• Receive notification of the end of an element
void endElement ( String namespace, String localName,
String qName )
• Receive notification of character data
void characters ( char[] ch, int start, int length )
CSE 6331
© Leonidas Fegaras
XML Tools
7
SAX Example: a Printer
import
import
import
import
java.io.FileReader;
javax.xml.parsers.*;
org.xml.sax.*;
org.xml.sax.helpers.*;
class Printer extends DefaultHandler {
public Printer () { super(); }
public void startDocument () {}
public void endDocument () { System.out.println(); }
public void startElement ( String uri, String name,
String tag, Attributes atts ) {
System.out.print(“<” + tag + “>”);
}
public void endElement ( String uri, String name, String tag ) {
System.out.print(“</”+ tag + “>”);
}
public void characters ( char text[], int start, int length ) {
System.out.print(new String(text,start,length));
}
}
CSE 6331
© Leonidas Fegaras
XML Tools
8
The Child Handler
class Child extends DefaultHandler {
DefaultHandler next;
// the next handler in the pipeline
String ptag;
// the tagname of the child
boolean keep;
// are we keeping or skipping events?
short level;
// the depth level of the current element
public Child ( String s, DefaultHandler n ) {
super();
next = n; ptag = s;
keep = false; level = 0;
}
public void startDocument () throws SAXException {
next.startDocument();
}
public void endDocument () throws SAXException {
next.endDocument();
}
CSE 6331
© Leonidas Fegaras
XML Tools
9
The Child Handler (cont.)
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (level++ == 1)
keep = ptag.equals(qn);
if (keep)
next.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (keep)
next.endElement(nm,ln,qn);
if (--level == 1)
keep = false;
}
public void characters ( char[] text, int start, int length ) throws SAXException {
if (keep)
next.characters(text,start,length);
}
}
CSE 6331
© Leonidas Fegaras
XML Tools
10
Forming the Pipeline
class SAX {
public static void main ( String args[] ) throws Exception {
SAXParserFactory pf = SAXParserFactory.newInstance();
SAXParser parser = pf.newSAXParser();
DefaultHandler handler
= new Child("gradstudent",
new Child("name",
new Printer()));
parser.parse(new InputSource(new FileReader("cs.xml")),
handler);
}
}
SAX parser
CSE 6331
© Leonidas Fegaras
Child:gradstudent
XML Tools
Child:name
Printer
11
Example
Input Stream
SAX Events
Child: gradstudent
Child: name
Printer
SD:
<department>
SE: department
<deptname>
SE: deptname
Computer Science
C: Computer Science
</deptname>
EE: deptname
<gradstudent>
SE: gradstudent
<name>
SE: name
<lastname>
SE: lastname
Smith
C: Smith
</lastname>
EE: lastname
<firstname>
SE: firstname
John
C: John
</firstname>
EE: firstname
</name>
EE: name
</gradstudent>
EE: gradstudent
...
...
</department>
EE: department
ED:
CSE 6331
© Leonidas Fegaras
XML Tools
12
XSL Transformation
A stylesheet specification language for converting XML
documents into various forms (XML, HTML, plain text, etc).
• Can transform each XML element into another element, add
new elements into the output file, or remove elements.
• Can rearrange and sort elements, test and make decisions about
which elements to display, and much more.
• Based on XPath:
<xsl:stylesheet version=’1.0’
xmlns:xsl=’http//www.w3.org/1999/XSL/Transform’>
<students>
<xsl:copy-of select=”//student/name”/>
</students>
</xsl:stylesheet>
CSE 6331
© Leonidas Fegaras
XML Tools
13
XSLT Templates
• XSL uses XPath to define parts of the source document that match one or
more predefined templates.
• When a match is found, XSLT will transform the matching part of the source
document into the result document.
• The parts of the source document that do not match a template will end up
unmodified in the result document (they will use the default templates).
Form:
<xsl:template match=”XPath expression”>
…
</xsl:template>
The default (implicit) templates visit all nodes and strip out all tags:
<xsl:template match=”*|/”>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match=“text()|@*">
<xsl:value-of select=“.”/>
</xsl:template>
CSE 6331
© Leonidas Fegaras
XML Tools
14
Other XSLT Elements
<xsl:value-of select=“XPath expression“/>
select the value of an XML element and add it to the output stream of the
transformation, e.g. <xsl:value-of select="//books/book/author"/>.
<xsl:copy-of select=“XPath expression“/>
copy the entire XML element to the output stream of the transformation.
<xsl:apply-templates match=“XPath expression“/>
apply the template rules to the elements that match the XPath expression.
<xsl:element name=“XPath expression“> … </xsl:element>
add an element to the output with a tag-name derived from the XPath.
Example:
<xsl:stylesheet version = ’1.0’
xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’>
<xsl:template match="employee">
<b> <xsl:apply-templates select="node()"/> </b>
</xsl:template>
<xsl:template match="surname">
<i> <xsl:value-of select="."/> </i>
</xsl:template>
</xsl:stylesheet>
CSE 6331
© Leonidas Fegaras
XML Tools
15
Copy the Entire Document
<xsl:stylesheet version = ’1.0’
xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’>
<xsl:template match=“/">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match=“text()">
<xsl:value-of select=“.”/>
</xsl:template>
<xsl:template match=“*">
<xsl:element name=“name(.)”>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
CSE 6331
© Leonidas Fegaras
XML Tools
16
More on XSLT
• Conflict resolution: more specific templates overwrite more
general templates. Templates are assigned default priorities, but
they can be overwritten using priority=“n” in a template.
• Modes can be used to group together templates. No mode is an
empty mode.
<xsl:template match=“…” mode=“A”>
<xsl:apply-templates mode=“B”/>
</xsl:template>
• Conditional and loop statements:
<xsl:if test=“XPath predicate”> body </xsl:if>
<xsl:for-each select=“XPath”> body </xsl:for-each>
• Variables can be used to name data:
<xsl:variable name=“x”> value </xsl:variable>
Variables are used as
CSE 6331
© Leonidas Fegaras
XML Tools
{$x}
in XPaths.
17
Using XSLT
import
import
import
import
import
import
import
javax.xml.parsers.*;
org.xml.sax.*;
org.w3c.dom.*;
javax.xml.transform.*;
javax.xml. . transform.dom.*;
javax.xml.transformstream.*;
java.io.*;
class XSLT {
public static void main ( String argv[] ) throws Exception {
File stylesheet = new File("x.xsl");
File xmlfile = new File("a.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(xmlfile);
StreamSource stylesource = new StreamSource(stylesheet);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer(stylesource);
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(System.out);
transformer.transform(source,result);
}
}
CSE 6331
© Leonidas Fegaras
XML Tools
18