Find 10 bad things about this piece of Java Code! :D :D
I am having this assignment in which I need to find 10 bad things about this piece of java code. I have never used Java before,, so please help.
ps. anything goes, such as bad comments, ways to make the code more concise, bad format, or whatever! Thanks a lot in advance
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// File: cnnCrawler.java
//
// This code looks at the CNN website and follows some links to get info on articles that I want more
// info on.
// All output is written in the working directory to: cnnCrawlerOutput.txt
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
import gnu.regexp.*;
import java.net.*;
import java.io.*;
public class cnnCrawler{
public static void main(String[] args)
{
StringBuffer basePage = new StringBuffer();
// Connect to CNN and get the document
basePage = getBasePageContents("http://www.cnn.com");
// Look at the area of interest (The "MORE FROM CNN" section)
basePage = initialIsolateBasePageContents(basePage);
// Pull all of the URLs out
basePage = getInfo(basePage, "?lt;a href=\"[^\"]*|/b> <a href=\"[^>]*|/b><a href=\"[^>]*");
basePage = getInfo(basePage, "\"/[^(\")]*");
basePage = getInfo(basePage,"\"[^&]*");
// Go to the URLs and pull out the information of interest and
// write to file.
goToURLs(basePage);
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: getBasePageContents
//
// This method opens a connection to the webpage we are interested in and stores
// all of the text on the page
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer getBasePageContents(String myURL){
try{
// Set base document to CNN, open connection,
// and copy the source text into a buffer
URL cnnBaseDoc = new URL(myURL);
cnnBaseDoc.openConnection();
BufferedReader cnnBaseBuffer = new BufferedReader(
new InputStreamReader(
cnnBaseDoc.openStream()));
String cnnBaseInputLine;
StringBuffer tempDocument = new StringBuffer();
while ((cnnBaseInputLine = cnnBaseBuffer.readLine()) != null){
tempDocument.append(cnnBaseInputLine);
}
cnnBaseBuffer.close();
return(tempDocument);
}
catch(MalformedURLException e) {
System.out.println("Unable to create URL object");
return(null);
}
catch(IOException e){
System.out.println("Unable to open URL");
return(null);
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: initialIsolateBasePageContents
//
// This method isolates us to store only the section we are interest in --
// the "MORE FROM CNN" section
//
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer initialIsolateBasePageContents(StringBuffer basePage){
try{
RE document = new RE(basePage);
// Define the left and right isolators
String sLeft = new String("MORE FROM CNN[//w//W]*");
RE leftCntxt = new RE(sLeft);
RE rightCntxt= new RE("><b>SPORTS");
StringBuffer sLIsolator = new StringBuffer("");
int iLIsolatorIndex = 0;
RE regLIsolator = new RE(leftCntxt);
REMatch ctxtLMatch = regLIsolator.getMatch(basePage);
sLIsolator.append(ctxtLMatch.toString());
iLIsolatorIndex = ctxtLMatch.getStartIndex();
// Find the Right Isolator
StringBuffer sRIsolator = new StringBuffer();
RE regRIsolator = new RE(rightCntxt);
int iRIsolatorIndex = 0;
REMatch ctxtRMatch = regRIsolator.getMatch(basePage);
sRIsolator.append(ctxtRMatch.toString());
iRIsolatorIndex = ctxtRMatch.getStartIndex();
basePage.delete(iRIsolatorIndex, basePage.length());
basePage.delete(0, iLIsolatorIndex);
return(basePage);
}
catch(REException e){
System.out.println("RE Exception");
return(null);
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: getInfo
//
// This method applies the specified regular expression to the string passed in
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer getInfo(StringBuffer textToSearch, String regExp){
try{
StringBuffer sIsolated = new StringBuffer("");
int iLIsolatorIndex = 0;
String sLeft = new String(regExp);
RE leftCntxt = new RE(sLeft);
RE regLIsolator = new RE(leftCntxt);
REMatchEnumeration ctxtLMatch = regLIsolator.getMatchEnumeration(textToSearch);
while (ctxtLMatch.hasMoreMatches()){
sIsolated.append(ctxtLMatch.nextMatch().toString());
sIsolated.append("\n");
}
return(sIsolated);
}
catch(REException e){
System.out.println("RE Exception");
return(null);
}
}
public static void goToURLs(StringBuffer textToSearch)
{
try{
StringBuffer interestingDoc = new StringBuffer("");
StringBuffer sInfoForFile = new StringBuffer("");
int numPage=0;
FileOutputStream fCnnOut;
PrintStream pCnnOut;
String sLeft = new String("/[^\"]*");
RE leftCntxt = new RE(sLeft);
String sIsolated = new String();
int iLIsolatorIndex = 0;
RE regLIsolator = new RE(leftCntxt);
REMatchEnumeration ctxtLMatch = regLIsolator.getMatchEnumeration(textToSearch);
fCnnOut = new FileOutputStream("cnnCrawlerOutput.txt");
pCnnOut = new PrintStream(fCnnOut);
while (ctxtLMatch.hasMoreMatches())
{
numPage++;
sIsolated = "http://www.cnn.com";
sIsolated += (ctxtLMatch.nextMatch().toString());
interestingDoc = connectToURLs(sIsolated);
sInfoForFile = getDocInfo(interestingDoc, sIsolated, numPage);
pCnnOut.println (sInfoForFile);
}
pCnnOut.close();
System.out.println("You may view the output in file: cnnCrawlerOutput.txt.");
}
catch(REException e){
System.out.println("RE Exception");
}
catch (Exception e)
{
System.out.println ("Error writing file.");
}
}
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
// Method: connectToURLs
// This method opens a URL and returns the text of the page
//**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~
public static StringBuffer connectToURLs(String urlText){
try{
URL cnnBaseDoc = new URL(urlText);
cnnBaseDoc.openConnection();
BufferedReader cnnBaseBuffer = new BufferedReader(
new InputStreamReader(
cnnBaseDoc.openStream()));
String cnnBaseInputLine;
StringBuffer tempDocument = new StringBuffer();
while ((cnnBaseInputLine = cnnBaseBuffer.readLine()) != null){
tempDocument.append(cnnBaseInputLine);
}
cnnBaseBuffer.close();
return(tempDocument);
}
catch(MalformedURLException e) {
System.out.println("Unable to create URL object");
return(null);
}
catch(IOException e){
System.out.println("Unable to open URL");
return(null);
}
}
Message was edited by:
rockingsoul

