// --------------------------------------------------------------------------- // Define the package we belong to // --------------------------------------------------------------------------- package com.mcp.stock.internet; // --------------------------------------------------------------------------- // Import the packages used by this class // --------------------------------------------------------------------------- import java.util.*; import java.io.*; import java.net.*; // --------------------------------------------------------------------------- // HTMLParser // --------------------------------------------------------------------------- public class HTMLParser { // ----------------------------------------------------------------------- // Define a Vector to hold tokens // ----------------------------------------------------------------------- Vector m_vTokens = new Vector(); // ----------------------------------------------------------------------- // Empty Constructor // ----------------------------------------------------------------------- public HTMLParser() { } // ----------------------------------------------------------------------- // Constructor( strHTMLText ) // // Send strHTMLText to parsePage // ----------------------------------------------------------------------- public HTMLParser( String strHTMLText ) { parsePage( strHTMLText ); } // ----------------------------------------------------------------------- // public boolean parsePage( String strHTMLText ) // ----------------------------------------------------------------------- public Boolean parsePage( String strHTMLText ) { // Initialize our token Vector m_vTokens.removeAllElements(); // Set the tokens that we are going to use as delimiters: // \r = Carriage Return // \n = New line // < = Less than (HTML tag prefix) // > = Greater than (HTML tag postfix String strTokens = "\r\n<>"; // Create a StringTokenizer object - note we are leaving in all // delimiter tokens! StringTokenizer st = new StringTokenizer( strHTMLText, strTokens, true ); // Loop through all tokens in our token vector while (st.hasMoreTokens()) { // Extract a string token String tok = st.nextToken(); // See if we have an HTML tag prefix if( tok.equals( "<" ) ) { // We don't want any HTML tags, so keep reading tokens // until we get to an HTML tag postfix while( !tok.equals( ">" ) && st.hasMoreTokens() ) { tok = st.nextToken(); } } // See if we have a valid token if( !tok.equals( "\n" ) && !tok.equals( "\r" ) && !tok.equals( "<" ) && !tok.equals( ">" ) && tok.length() != 0 ) { // Remove all leading and trailing whitespace tok = tok.trim(); // Add this token to our vector of tokens m_vTokens.add( tok ); } } // Success - Return true return new Boolean( true ); } // ----------------------------------------------------------------------- // public String getToken( int nIndex ) // // Retrieve the String token at position nIndex. // ----------------------------------------------------------------------- public String getToken( int nIndex ) throws ArrayIndexOutOfBoundsException { // Verify that the requested index is valid if( nIndex >= m_vTokens.size() ) { ArrayIndexOutOfBoundsException e = new ArrayIndexOutOfBoundsException ( "Requested Index: " + Integer.toString( nIndex ) + " of a vector with " + Integer.toString( m_vTokens.size() ) + " elements" ); throw e; } // Return the requested token return ( String )m_vTokens.elementAt( nIndex ); } // ----------------------------------------------------------------------- // public String getToken( String strSearchString, int nOffset ) // // Retrieve the token that is nOffset (positive or negative) // units away from strSearchString. // ----------------------------------------------------------------------- public String getToken( String strSearchString, int nOffset ) { // Search for the relative token int nIndex = findToken( strSearchString ); // See if we found the relative token if( nIndex == -1 ) { NoSuchElementException e = new NoSuchElementException ( "HTMLParser.getToken() Error: " + strSearchString + " does not exist in the Token list" ); throw e; } // Ensure that the offset is legal if( nIndex + nOffset >= m_vTokens.size() ) { ArrayIndexOutOfBoundsException e = new ArrayIndexOutOfBoundsException ( "Requested Index: " + Integer.toString( nIndex ) + " of a vector with " + Integer.toString( m_vTokens.size() ) + " elements" ); throw e; } // Return the requested token return getToken( nIndex + nOffset ); } // ----------------------------------------------------------------------- // public int getNumberOfTokens() // // Returns the number of tokens in the page // ----------------------------------------------------------------------- public int getNumberOfTokens() { return m_vTokens.size(); } // ----------------------------------------------------------------------- // public int findToken( String strToken ) // // Returns the index of strToken in the token vector // Returns -1 if strToken is not in the vector // ----------------------------------------------------------------------- public int findToken( String strToken ) { // Loop through all tokens in the vector for( int i=0; i < m_vTokens.size(); i++ ) { // Extract a String token from the vector String tok = ( String )m_vTokens.elementAt( i ); if( tok.equalsIgnoreCase( strToken ) ) { return i; } } // Could not find the requested token return -1; } } #### ... // See if we found the relative token if( nIndex == -1 ) ... // Could not find the requested token return -1; ... #### // Search for the relative token int nIndex = findToken( strSearchString ); // See if we found the relative token if( nIndex == -1 ) { NoSuchElementException e = new NoSuchElementException ( "HTMLParser.getToken() Error: " + strSearchString + " does not exist in the Token list" ); throw e; } #### public String getToken( int nIndex ) throws ArrayIndexOutOfBoundsException { ... ArrayIndexOutOfBoundsException e = new ArrayIndexOutOfBoundsException ( ... ); throw e; ... #### public String getToken( int nIndex ) throws ArrayIndexOutOfBoundsException e { ... throw e( ... ); .. } #### ArrayIndexOutOfBoundsException m_AiOoBExIndexException = new ArrayIndexOutOfBoundsException = ...