Extract paragraph from html
hi i got this piece of code from this forum, and am not being able to modify it so it returns only texts within the
tags can any one help me outimport java.io.*;
import java.net.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
class GetHTMLText
{
public static void main(String[] args)
throws Exception
{
EditorKit kit = new HTMLEditorKit();
Document doc = kit.createDefaultDocument();
// The Document class does not yet handle charset's properly.
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
// Create a reader on the HTML content.
Reader rd = getReader(args[0]);
// Parse the HTML.
kit.read(rd, doc, 0);
// The HTML text is now stored in the document
System.out.println( doc.getText(0, doc.getLength()) );
}
// Returns a reader on the HTML data. If 'uri' begins
// with "http:", it's treated as a URL; otherwise,
// it's assumed to be a local filename.
static Reader getReader(String uri)
throws IOException
{
// Retrieve from Internet.
if (uri.startsWith("http:"))
{
URLConnection conn = new URL(uri).openConnection();
return new InputStreamReader(conn.getInputStream());
}
// Retrieve from file.
else
{
return new FileReader(uri);
}
}
}

