in your code you could use lines such as with the classes below it:
HttpURLConnection connection = null;
HttpURLConnection.setFollowRedirects(true);
if (debug)System.out.println("Using normal connection");
connection = getHttpURLConnection(urlstring);
if (connection != null)System.out.println("Established connection");
HTMLParser htmlparser = new HTMLParser();
HTMLEditorKit.Parser parser = htmlparser.getParser();
HREFExtractor hrefextractor = new HREFExtractor();
if (connection == null)System.out.println("Connection is null");
if (hrefextractor == null)System.out.println("hrefextractor is null");
hrefextractor.parseHttpURLConnection(connection);
Vector links = hrefextractor.getHREFAttributes();
*************************
class HTMLParser extends HTMLEditorKit{
//HTMLEditorKit.getParser() has protected access
//thus must subclass HTMLEditorKit to get a parser object
public HTMLEditorKit.Parser getParser(){
return super.getParser();
}
}
class HREFExtractor extends HTMLEditorKit.ParserCallback{
HREFExtractor(){
v = new Vector();
htmlparser = new HTMLParser();
parser = htmlparser.getParser();
}
HTMLParser htmlparser;
Vector v;
HTMLEditorKit.Parser parser;
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position){
SimpleAttributeSet simpleattributeset = new SimpleAttributeSet (attributes);
if (tag == HTML.Tag.A){
if (debug) System.out.println("Found link element: " + tag);
Enumeration e = simpleattributeset.getAttributeNames();
while (e.hasMoreElements()){
Object o = e.nextElement();
if (o.toString().compareToIgnoreCase("href")==0){
Object link = simpleattributeset.getAttribute(o);
v.add(link.toString());
if (debug)System.out.println("adding: " + link.toString());
}
}
}
}
/** Parses a document on the web with a URL connection.
*/
public void parseURLConnection(URLConnection connection){
try{
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
parser.parse(isr,this,false);
}catch (IOException ioexception){
showErrorMessage("IOException: " + ioexception.getMessage());
}
}
/** Parses a document on the web with a URL connection.
*/
public void parseHttpURLConnection(HttpURLConnection connection){
try{
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
parser.parse(isr,this,true);
//last argumnet set true to avoid ChangedCharSetException
}catch (ChangedCharSetException ccse){
showErrorMessage(ccse.getMessage() + " CharSetSpec: " + ccse.getCharSetSpec());
}catch (IOException ioexception){
showErrorMessage("IOException: " + ioexception.getMessage());
}
}
public Vector getHREFAttributes(){
return v;
}
}