com.sun.xml.stream
Class XMLScanner

java.lang.Object
  extended by com.sun.xml.stream.XMLScanner
All Implemented Interfaces:
org.apache.xerces.xni.parser.XMLComponent
Direct Known Subclasses:
XMLDocumentFragmentScannerImpl, XMLDTDScannerImpl

public abstract class XMLScanner
extends java.lang.Object
implements org.apache.xerces.xni.parser.XMLComponent

This class is responsible for holding scanning methods common to scanning the XML document structure and content as well as the DTD structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit from this base class.

This component requires the following features and properties from the component manager that uses it:

Version:
$Id: XMLScanner.java,v 1.1 2006/03/31 03:17:07 jeffsuttor Exp $
Author:
Andy Clark, IBM, Arnaud Le Hors, IBM, Eric Ye, IBM, K.Venugopal SUN Microsystems

Field Summary
protected  java.util.ArrayList attributeValueCache
           
protected static boolean DEBUG_ATTR_NORMALIZATION
          Debug attribute normalization.
protected static java.lang.String ENTITY_MANAGER
          Property identifier: entity manager.
protected static java.lang.String ERROR_REPORTER
          Property identifier: error reporter.
protected static java.lang.String fAmpSymbol
          Symbol: "amp".
protected static java.lang.String fAposSymbol
          Symbol: "apos".
protected  boolean fAttributeCacheInitDone
           
protected  int fAttributeCacheUsedCount
           
protected  java.lang.String fCharRefLiteral
          Literal value of the last character refence scanned.
protected static java.lang.String fEncodingSymbol
          Symbol: "encoding".
protected  int fEntityDepth
          Entity depth.
protected  XMLEntityManager fEntityManager
          Entity manager.
protected  XMLEntityReaderImpl fEntityScanner
          Entity scanner, this alwasy works on last entity that was opened.
protected  XMLEntityStorage fEntityStore
          xxx this should be available from EntityManager Entity storage
protected  XMLErrorReporter fErrorReporter
          Error reporter.
protected  javax.xml.stream.events.XMLEvent fEvent
          event type
protected static java.lang.String fGtSymbol
          Symbol: "gt".
protected static java.lang.String fLtSymbol
          Symbol: "lt".
protected  boolean fNotifyCharRefs
          Character references notification.
protected  PropertyManager fPropertyManager
           
protected static java.lang.String fQuotSymbol
          Symbol: "quot".
protected  boolean fReportEntity
          Report entity boundary.
protected  org.apache.xerces.util.XMLResourceIdentifierImpl fResourceIdentifier
           
protected  boolean fScanningAttribute
          Scanning attribute.
protected static java.lang.String fStandaloneSymbol
          Symbol: "standalone".
protected  int fStringBufferIndex
           
protected  org.apache.xerces.util.SymbolTable fSymbolTable
          Symbol table.
protected  boolean fValidation
          Validation.
protected static java.lang.String fVersionSymbol
          Symbol: "version".
protected static java.lang.String NOTIFY_CHAR_REFS
          Feature identifier: notify character references.
protected  java.util.ArrayList stringBufferCache
           
protected static java.lang.String SYMBOL_TABLE
          Property identifier: symbol table.
protected static java.lang.String VALIDATION
          Feature identifier: validation.
 
Constructor Summary
XMLScanner()
           
 
Method Summary
 void endEntity(java.lang.String name)
          This method notifies the end of an entity.
 boolean getFeature(java.lang.String featureId)
           
protected static boolean isInvalid(int value)
           
protected static boolean isInvalidLiteral(int value)
           
protected static boolean isValidNameChar(int value)
           
protected static boolean isValidNameStartChar(int value)
           
protected static boolean isValidNCName(int value)
           
protected  void normalizeWhitespace(org.apache.xerces.xni.XMLString value)
          Normalize whitespace in an XMLString converting all whitespace characters to space characters.
protected  void reportFatalError(java.lang.String msgId, java.lang.Object[] args)
          Convenience function used in all XML scanners.
 void reset(PropertyManager propertyManager)
           
 void reset(org.apache.xerces.xni.parser.XMLComponentManager componentManager)
          Resets the component.
protected  void scanAttributeValue(org.apache.xerces.xni.XMLString value, org.apache.xerces.xni.XMLString nonNormalizedValue, java.lang.String atName, org.apache.xerces.xni.XMLAttributes attributes, int attrIndex, boolean checkEntities)
          Scans an attribute value and normalizes whitespace converting all whitespace characters to space characters.
protected  int scanCharReferenceValue(org.apache.xerces.util.XMLStringBuffer buf, org.apache.xerces.util.XMLStringBuffer buf2)
          Scans a character reference and append the corresponding chars to the specified buffer.
protected  void scanComment(org.apache.xerces.util.XMLStringBuffer text)
          Scans a comment.
protected  void scanExternalID(java.lang.String[] identifiers, boolean optionalSystemId)
          Scans External ID and return the public and system IDs.
protected  void scanPI(org.apache.xerces.util.XMLStringBuffer data)
          Scans a processing instruction.
protected  void scanPIData(java.lang.String target, org.apache.xerces.util.XMLStringBuffer data)
          Scans a processing data.
 java.lang.String scanPseudoAttribute(boolean scanningTextDecl, org.apache.xerces.xni.XMLString value)
          Scans a pseudo attribute.
protected  boolean scanPubidLiteral(org.apache.xerces.xni.XMLString literal)
          Scans public ID literal.
protected  boolean scanSurrogates(org.apache.xerces.util.XMLStringBuffer buf)
          Scans surrogates and append them to the specified buffer.
protected  void scanXMLDeclOrTextDecl(boolean scanningTextDecl, java.lang.String[] pseudoAttributeValues)
          Scans an XML or text declaration.
 void setFeature(java.lang.String featureId, boolean value)
          Sets the state of a feature.
 void setProperty(java.lang.String propertyId, java.lang.Object value)
          Sets the value of a property during parsing.
protected  void setPropertyManager(PropertyManager propertyManager)
           
 void startEntity(java.lang.String name, org.apache.xerces.xni.XMLResourceIdentifier identifier, java.lang.String encoding)
          This method notifies of the start of an entity.
protected  boolean versionSupported(java.lang.String version)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.xerces.xni.parser.XMLComponent
getFeatureDefault, getPropertyDefault, getRecognizedFeatures, getRecognizedProperties
 

Field Detail

VALIDATION

protected static final java.lang.String VALIDATION
Feature identifier: validation.

See Also:
Constant Field Values

NOTIFY_CHAR_REFS

protected static final java.lang.String NOTIFY_CHAR_REFS
Feature identifier: notify character references.

See Also:
Constant Field Values

SYMBOL_TABLE

protected static final java.lang.String SYMBOL_TABLE
Property identifier: symbol table.

See Also:
Constant Field Values

ERROR_REPORTER

protected static final java.lang.String ERROR_REPORTER
Property identifier: error reporter.

See Also:
Constant Field Values

ENTITY_MANAGER

protected static final java.lang.String ENTITY_MANAGER
Property identifier: entity manager.

See Also:
Constant Field Values

DEBUG_ATTR_NORMALIZATION

protected static final boolean DEBUG_ATTR_NORMALIZATION
Debug attribute normalization.

See Also:
Constant Field Values

attributeValueCache

protected java.util.ArrayList attributeValueCache

stringBufferCache

protected java.util.ArrayList stringBufferCache

fStringBufferIndex

protected int fStringBufferIndex

fAttributeCacheInitDone

protected boolean fAttributeCacheInitDone

fAttributeCacheUsedCount

protected int fAttributeCacheUsedCount

fValidation

protected boolean fValidation
Validation. This feature identifier is: http://xml.org/sax/features/validation


fNotifyCharRefs

protected boolean fNotifyCharRefs
Character references notification.


fPropertyManager

protected PropertyManager fPropertyManager

fSymbolTable

protected org.apache.xerces.util.SymbolTable fSymbolTable
Symbol table.


fErrorReporter

protected XMLErrorReporter fErrorReporter
Error reporter.


fEntityManager

protected XMLEntityManager fEntityManager
Entity manager.


fEntityStore

protected XMLEntityStorage fEntityStore
xxx this should be available from EntityManager Entity storage


fEvent

protected javax.xml.stream.events.XMLEvent fEvent
event type


fEntityScanner

protected XMLEntityReaderImpl fEntityScanner
Entity scanner, this alwasy works on last entity that was opened.


fEntityDepth

protected int fEntityDepth
Entity depth.


fCharRefLiteral

protected java.lang.String fCharRefLiteral
Literal value of the last character refence scanned.


fScanningAttribute

protected boolean fScanningAttribute
Scanning attribute.


fReportEntity

protected boolean fReportEntity
Report entity boundary.


fVersionSymbol

protected static final java.lang.String fVersionSymbol
Symbol: "version".


fEncodingSymbol

protected static final java.lang.String fEncodingSymbol
Symbol: "encoding".


fStandaloneSymbol

protected static final java.lang.String fStandaloneSymbol
Symbol: "standalone".


fAmpSymbol

protected static final java.lang.String fAmpSymbol
Symbol: "amp".


fLtSymbol

protected static final java.lang.String fLtSymbol
Symbol: "lt".


fGtSymbol

protected static final java.lang.String fGtSymbol
Symbol: "gt".


fQuotSymbol

protected static final java.lang.String fQuotSymbol
Symbol: "quot".


fAposSymbol

protected static final java.lang.String fAposSymbol
Symbol: "apos".


fResourceIdentifier

protected org.apache.xerces.util.XMLResourceIdentifierImpl fResourceIdentifier
Constructor Detail

XMLScanner

public XMLScanner()
Method Detail

reset

public void reset(org.apache.xerces.xni.parser.XMLComponentManager componentManager)
           throws org.apache.xerces.xni.parser.XMLConfigurationException
Description copied from interface: org.apache.xerces.xni.parser.XMLComponent
Resets the component. The component can query the component manager about any features and properties that affect the operation of the component.

Specified by:
reset in interface org.apache.xerces.xni.parser.XMLComponent
Parameters:
componentManager - The component manager.
Throws:
SAXException - Throws exception if required features and properties cannot be found.
org.apache.xerces.xni.parser.XMLConfigurationException

setPropertyManager

protected void setPropertyManager(PropertyManager propertyManager)

setProperty

public void setProperty(java.lang.String propertyId,
                        java.lang.Object value)
                 throws org.apache.xerces.xni.parser.XMLConfigurationException
Sets the value of a property during parsing.

Specified by:
setProperty in interface org.apache.xerces.xni.parser.XMLComponent
Parameters:
propertyId -
value -
Throws:
org.apache.xerces.xni.parser.XMLConfigurationException - Thrown for configuration error. In general, components should only throw this exception if it is really a critical error.

setFeature

public void setFeature(java.lang.String featureId,
                       boolean value)
                throws org.apache.xerces.xni.parser.XMLConfigurationException
Description copied from interface: org.apache.xerces.xni.parser.XMLComponent
Sets the state of a feature. This method is called by the component manager any time after reset when a feature changes state.

Note: Components should silently ignore features that do not affect the operation of the component.

Specified by:
setFeature in interface org.apache.xerces.xni.parser.XMLComponent
Parameters:
featureId - The feature identifier.
value - The state of the feature.
Throws:
org.apache.xerces.xni.parser.XMLConfigurationException - Thrown for configuration error. In general, components should only throw this exception if it is really a critical error.

getFeature

public boolean getFeature(java.lang.String featureId)
                   throws org.apache.xerces.xni.parser.XMLConfigurationException
Throws:
org.apache.xerces.xni.parser.XMLConfigurationException

reset

public void reset(PropertyManager propertyManager)

scanXMLDeclOrTextDecl

protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
                                     java.lang.String[] pseudoAttributeValues)
                              throws java.io.IOException,
                                     org.apache.xerces.xni.XNIException
Scans an XML or text declaration.

 [23] XMLDecl ::= ''
 [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
 [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
 [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
                 | ('"' ('yes' | 'no') '"'))

 [77] TextDecl ::= ''
 

Parameters:
scanningTextDecl - True if a text declaration is to be scanned instead of an XML declaration.
pseudoAttributeValues - An array of size 3 to return the version, encoding and standalone pseudo attribute values (in that order). Note: This method uses fString, anything in it at the time of calling is lost.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanPseudoAttribute

public java.lang.String scanPseudoAttribute(boolean scanningTextDecl,
                                            org.apache.xerces.xni.XMLString value)
                                     throws java.io.IOException,
                                            org.apache.xerces.xni.XNIException
Scans a pseudo attribute.

Parameters:
scanningTextDecl - True if scanning this pseudo-attribute for a TextDecl; false if scanning XMLDecl. This flag is needed to report the correct type of error.
value - The string to fill in with the attribute value.
Returns:
The name of the attribute Note: This method uses fStringBuffer2, anything in it at the time of calling is lost.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanPI

protected void scanPI(org.apache.xerces.util.XMLStringBuffer data)
               throws java.io.IOException,
                      org.apache.xerces.xni.XNIException
Scans a processing instruction.

 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
 

Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanPIData

protected void scanPIData(java.lang.String target,
                          org.apache.xerces.util.XMLStringBuffer data)
                   throws java.io.IOException,
                          org.apache.xerces.xni.XNIException
Scans a processing data. This is needed to handle the situation where a document starts with a processing instruction whose target name starts with "xml". (e.g. xmlfoo) This method would always read the whole data. We have while loop and data is buffered until delimeter is encountered.

Parameters:
target - The PI target
data - The string to fill in with the data
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanComment

protected void scanComment(org.apache.xerces.util.XMLStringBuffer text)
                    throws java.io.IOException,
                           org.apache.xerces.xni.XNIException
Scans a comment.

 [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
 

Note: Called after scanning past '<!--' Note: This method uses fString, anything in it at the time of calling is lost.

Parameters:
text - The buffer to fill in with the text.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanAttributeValue

protected void scanAttributeValue(org.apache.xerces.xni.XMLString value,
                                  org.apache.xerces.xni.XMLString nonNormalizedValue,
                                  java.lang.String atName,
                                  org.apache.xerces.xni.XMLAttributes attributes,
                                  int attrIndex,
                                  boolean checkEntities)
                           throws java.io.IOException,
                                  org.apache.xerces.xni.XNIException
Scans an attribute value and normalizes whitespace converting all whitespace characters to space characters. [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"

Parameters:
value - The XMLString to fill in with the value.
nonNormalizedValue - The XMLString to fill in with the non-normalized value.
atName - The name of the attribute being parsed (for error msgs).
attributes - The attributes list for the scanned attribute.
attrIndex - The index of the attribute to use from the list.
checkEntities - true if undeclared entities should be reported as VC violation, false if undeclared entities should be reported as WFC violation. Note: This method uses fStringBuffer2, anything in it at the time of calling is lost.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanExternalID

protected void scanExternalID(java.lang.String[] identifiers,
                              boolean optionalSystemId)
                       throws java.io.IOException,
                              org.apache.xerces.xni.XNIException
Scans External ID and return the public and system IDs.

Parameters:
identifiers - An array of size 2 to return the system id, and public id (in that order).
optionalSystemId - Specifies whether the system id is optional. Note: This method uses fString and fStringBuffer, anything in them at the time of calling is lost.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

scanPubidLiteral

protected boolean scanPubidLiteral(org.apache.xerces.xni.XMLString literal)
                            throws java.io.IOException,
                                   org.apache.xerces.xni.XNIException
Scans public ID literal. [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] The returned string is normalized according to the following rule, from http://www.w3.org/TR/REC-xml#dt-pubid: Before a match is attempted, all strings of white space in the public identifier must be normalized to single space characters (#x20), and leading and trailing white space must be removed.

Parameters:
literal - The string to fill in with the public ID literal.
Returns:
True on success. Note: This method uses fStringBuffer, anything in it at the time of calling is lost.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

normalizeWhitespace

protected void normalizeWhitespace(org.apache.xerces.xni.XMLString value)
Normalize whitespace in an XMLString converting all whitespace characters to space characters.


startEntity

public void startEntity(java.lang.String name,
                        org.apache.xerces.xni.XMLResourceIdentifier identifier,
                        java.lang.String encoding)
                 throws org.apache.xerces.xni.XNIException
This method notifies of the start of an entity. The document entity has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" parameter entity names start with '%'; and general entities are just specified by their name.

Parameters:
name - The name of the entity.
identifier - The resource identifier.
encoding - The auto-detected IANA encoding name of the entity stream. This value will be null in those situations where the entity encoding is not auto-detected (e.g. internal entities or a document entity that is parsed from a java.io.Reader).
Throws:
org.apache.xerces.xni.XNIException - Thrown by handler to signal an error.

endEntity

public void endEntity(java.lang.String name)
               throws java.io.IOException,
                      org.apache.xerces.xni.XNIException
This method notifies the end of an entity. The document entity has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" parameter entity names start with '%'; and general entities are just specified by their name.

Parameters:
name - The name of the entity.
Throws:
org.apache.xerces.xni.XNIException - Thrown by handler to signal an error.
java.io.IOException

scanCharReferenceValue

protected int scanCharReferenceValue(org.apache.xerces.util.XMLStringBuffer buf,
                                     org.apache.xerces.util.XMLStringBuffer buf2)
                              throws java.io.IOException,
                                     org.apache.xerces.xni.XNIException
Scans a character reference and append the corresponding chars to the specified buffer.

 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 
Note: This method uses fStringBuffer, anything in it at the time of calling is lost.

Parameters:
buf - the character buffer to append chars to
buf2 - the character buffer to append non-normalized chars to
Returns:
the character value or (-1) on conversion failure
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

isInvalid

protected static boolean isInvalid(int value)

isInvalidLiteral

protected static boolean isInvalidLiteral(int value)

isValidNameChar

protected static boolean isValidNameChar(int value)

isValidNCName

protected static boolean isValidNCName(int value)

isValidNameStartChar

protected static boolean isValidNameStartChar(int value)

versionSupported

protected boolean versionSupported(java.lang.String version)

scanSurrogates

protected boolean scanSurrogates(org.apache.xerces.util.XMLStringBuffer buf)
                          throws java.io.IOException,
                                 org.apache.xerces.xni.XNIException
Scans surrogates and append them to the specified buffer.

Note: This assumes the current char has already been identified as a high surrogate.

Parameters:
buf - The StringBuffer to append the read surrogates to.
Returns:
True if it succeeded.
Throws:
java.io.IOException
org.apache.xerces.xni.XNIException

reportFatalError

protected void reportFatalError(java.lang.String msgId,
                                java.lang.Object[] args)
                         throws org.apache.xerces.xni.XNIException
Convenience function used in all XML scanners.

Throws:
org.apache.xerces.xni.XNIException


Copyright ? 2002-2003 Apache XML Project. All Rights Reserved.