// ---------------------------------------------------------------------------
// Define the package we belong to
// ---------------------------------------------------------------------------
package com.mcp.stock.internet;
// ---------------------------------------------------------------------------
// Import the packages used by this class
// ---------------------------------------------------------------------------
import java.util.*;
import java.io.*;
import java.net.*;
// ---------------------------------------------------------------------------
// HTMLParser
// ---------------------------------------------------------------------------
public class HTMLParser
{
// -----------------------------------------------------------------------
// Define a Vector to hold tokens
// -----------------------------------------------------------------------
Vector m_vTokens = new Vector();
// -----------------------------------------------------------------------
// Empty Constructor
// -----------------------------------------------------------------------
public HTMLParser()
{
}
// -----------------------------------------------------------------------
// Constructor( strHTMLText )
//
// Send strHTMLText to parsePage
// -----------------------------------------------------------------------
public HTMLParser( String strHTMLText )
{
parsePage( strHTMLText );
}
// -----------------------------------------------------------------------
// public boolean parsePage( String strHTMLText )
// -----------------------------------------------------------------------
public Boolean parsePage( String strHTMLText )
{
// Initialize our token Vector
m_vTokens.removeAllElements();
// Set the tokens that we are going to use as delimiters:
// \r = Carriage Return
// \n = New line
// < = Less than (HTML tag prefix)
// > = Greater than (HTML tag postfix
String strTokens = "\r\n<>";
// Create a StringTokenizer object - note we are leaving in all
// delimiter tokens!
StringTokenizer st = new StringTokenizer( strHTMLText, strTokens, true );
// Loop through all tokens in our token vector
while (st.hasMoreTokens())
{
// Extract a string token
String tok = st.nextToken();
// See if we have an HTML tag prefix
if( tok.equals( "<" ) )
{
// We don't want any HTML tags, so keep reading tokens
// until we get to an HTML tag postfix
while( !tok.equals( ">" ) && st.hasMoreTokens() )
{
tok = st.nextToken();
}
}
// See if we have a valid token
if( !tok.equals( "\n" ) &&
!tok.equals( "\r" ) &&
!tok.equals( "<" ) &&
!tok.equals( ">" ) &&
tok.length() != 0 )
{
// Remove all leading and trailing whitespace
tok = tok.trim();
// Add this token to our vector of tokens
m_vTokens.add( tok );
}
}
// Success - Return true
return new Boolean( true );
}
// -----------------------------------------------------------------------
// public String getToken( int nIndex )
//
// Retrieve the String token at position nIndex.
// -----------------------------------------------------------------------
public String getToken( int nIndex ) throws ArrayIndexOutOfBoundsException
{
// Verify that the requested index is valid
if( nIndex >= m_vTokens.size() )
{
ArrayIndexOutOfBoundsException e =
new ArrayIndexOutOfBoundsException
(
"Requested Index: " +
Integer.toString( nIndex ) +
" of a vector with " +
Integer.toString( m_vTokens.size() ) +
" elements"
);
throw e;
}
// Return the requested token
return ( String )m_vTokens.elementAt( nIndex );
}
// -----------------------------------------------------------------------
// public String getToken( String strSearchString, int nOffset )
//
// Retrieve the token that is nOffset (positive or negative)
// units away from strSearchString.
// -----------------------------------------------------------------------
public String getToken( String strSearchString, int nOffset )
{
// Search for the relative token
int nIndex = findToken( strSearchString );
// See if we found the relative token
if( nIndex == -1 )
{
NoSuchElementException e =
new NoSuchElementException
(
"HTMLParser.getToken() Error: " +
strSearchString +
" does not exist in the Token list"
);
throw e;
}
// Ensure that the offset is legal
if( nIndex + nOffset >= m_vTokens.size() )
{
ArrayIndexOutOfBoundsException e =
new ArrayIndexOutOfBoundsException
(
"Requested Index: " +
Integer.toString( nIndex ) +
" of a vector with " +
Integer.toString( m_vTokens.size() ) +
" elements"
);
throw e;
}
// Return the requested token
return getToken( nIndex + nOffset );
}
// -----------------------------------------------------------------------
// public int getNumberOfTokens()
//
// Returns the number of tokens in the page
// -----------------------------------------------------------------------
public int getNumberOfTokens()
{
return m_vTokens.size();
}
// -----------------------------------------------------------------------
// public int findToken( String strToken )
//
// Returns the index of strToken in the token vector
// Returns -1 if strToken is not in the vector
// -----------------------------------------------------------------------
public int findToken( String strToken )
{
// Loop through all tokens in the vector
for( int i=0; i < m_vTokens.size(); i++ )
{
// Extract a String token from the vector
String tok = ( String )m_vTokens.elementAt( i );
if( tok.equalsIgnoreCase( strToken ) )
{
return i;
}
}
// Could not find the requested token
return -1;
}
}
####
...
// See if we found the relative token
if( nIndex == -1 )
...
// Could not find the requested token
return -1;
...
##
##
// Search for the relative token
int nIndex = findToken( strSearchString );
// See if we found the relative token
if( nIndex == -1 )
{
NoSuchElementException e =
new NoSuchElementException
(
"HTMLParser.getToken() Error: " +
strSearchString +
" does not exist in the Token list"
);
throw e;
}
##
##
public String getToken( int nIndex ) throws ArrayIndexOutOfBoundsException
{
...
ArrayIndexOutOfBoundsException e =
new ArrayIndexOutOfBoundsException
(
...
);
throw e;
...
##
##
public String getToken( int nIndex )
throws ArrayIndexOutOfBoundsException e
{
...
throw e( ... );
..
}
##
##
ArrayIndexOutOfBoundsException
m_AiOoBExIndexException =
new ArrayIndexOutOfBoundsException = ...