Networking - Getting Garbled HTML Using java.net.URL multithreaded
Hi All,
basically i am trying to fetch different pages from the same domain using URL class and then storing this to file systems. but for some of them output comes garbled.
Your assistance is greatly appreciated!
code follows:
publicclass URLFetchTest{
staticint count=0;
publicvoid loadDocument(URL url, String charSet)
throws IOException{
InputStream in =null;
boolean ignoreCharSet =false;
try{
HttpURLConnection urlc = (HttpURLConnection)url.openConnection();
urlc.setRequestMethod("GET");
in = urlc.getInputStream();
Reader reader = (charSet ==null) ?new InputStreamReader(in)
:new InputStreamReader(in, charSet);
FileWriter fw =new FileWriter("a"+count+".html",true);
count++;
char buf[] =newchar[1024];
int readCount =0 ;
while((readCount=reader.read(buf,0,1024))!=-1){
fw.write(buf,0,readCount);
//buf = new char[1024];
//System.out.println(buf);
}
fw.close();
// All done
break;
}catch (ChangedCharSetException e){
break;
}
}
publicvoid loadDocument(URL url)throws IOException{
loadDocument(url,"UTF-8");
}
publicstaticvoid main(String ar[]){
/*
Properties systemSettings = System.getProperties();
systemSettings.put("http.proxyHost", "<PROXY>");
systemSettings.put("http.proxyPort", "<PORT>");
System.setProperties(systemSettings);
*/
String urls[] ={
"http://www.altavista.com/web/results?itag=ody&q=c++&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=java&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=perl&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=cobol&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=fortran&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=ada&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=basic&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=cgi&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=html&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=xml&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=vc++&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=css&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=dhtml&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=oracle&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=mssql&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=mysql&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=db2&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=sybase&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=informix&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=vbasic&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=swing&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=mfc&kgs=1&kls=0",
"http://www.altavista.com/web/results?itag=ody&q=spring&kgs=1&kls=0"
};
for(int i=0;i<urls.length;i++){
final String u=urls<i>;
new Thread(){
publicvoid run(){
try{
new URLFetchTest().loadDocument(new URL(u));
}catch(Exception e){
System.out.println(e);
}
}
}.start();
}
}
}
Thanks,
Sant

