Add to Favorites    Make Home Page 6299 Online  
 Language Categories  
 Our Services  

Home » Java Home » Java Swing Home » diffrent method of converting web page(html2text)

A D V E R T I S E M E N T

Search Projects & Source Codes:

Title diffrent method of converting web page(html2text)
Author Rockey_Mandy
Author Email pande_mandar [at] rediffmail.com
Description This project helps to have text of any web page like A to Z Amazon to Google to Yahoo any one.

Category Java » Java Swing
Hits 376785
Code Select and Copy the Code
Code : import java.io.File; import java.io.FileInputStream; import java.io.Reader; import java.io.StringReader; import java.io.IOException; /** * Convert text/html into text/plain * * Auther: Omindra Kumar Rana * Email: rana_omindra@yahoo.co.in * * @version 1.0 $Date: May 10, 2005 $ */ public class HTML2Text { boolean body_found = false; boolean in_body = false; boolean center = false; boolean pre = false; String href = ""; public String convert(String source) throws Exception { StringBuffer result = new StringBuffer(); StringBuffer result2 = new StringBuffer(); StringReader input = new StringReader(source); try { String text = null; int c = input.read(); while (c != -1) // Convert until EOF { text = ""; if (c == '<') // It's a tag!! { String CurrentTag = getTag(input); // Get the rest of the tag text = convertTag(CurrentTag); } else if (c == '&') { String specialchar = getSpecial(input); if (specialchar.equals("lt;") || specialchar.equals("#60")) text = "<"; else if (specialchar.equals("gt;") || specialchar.equals("#62")) text = ">"; else if (specialchar.equals("amp;") || specialchar.equals("#38")) text = "&"; else if (specialchar.equals("nbsp;")) text = " "; else if (specialchar.equals("quot;") || specialchar.equals("#34")) text = """; else if (specialchar.equals("copy;") || specialchar.equals("#169")) text = "[Copyright]"; else if (specialchar.equals("reg;") || specialchar.equals("#174")) text = "[Registered]"; else if (specialchar.equals("trade;") || specialchar.equals("#153")) text = "[Trademark]"; else text = "&" + specialchar; } else if (!pre && Character.isWhitespace((char)c)) { StringBuffer s = in_body ? result : result2; if (s.length() > 0 && Character.isWhitespace(s.charAt(s.length()-1))) text = ""; else text = " "; } else { text = "" + (char)c; } StringBuffer s = in_body ? result : result2; s.append(text); c = input.read(); } } catch (Exception e) { input.close(); throw e; } StringBuffer s = body_found ? result : result2; return s.toString().trim(); } String getTag(Reader r) throws IOException { StringBuffer result = new StringBuffer(); int level = 1; result.append('<'); while (level > 0) { int c = r.read(); if (c == -1) break; // EOF result.append((char)c); if (c == '<') level++; else if (c == '>') level--; } return result.toString(); } String getSpecial(Reader r) throws IOException { StringBuffer result = new StringBuffer(); r.mark(1);//Mark the present position in the stream int c = r.read(); while (Character.isLetter((char)c)) { result.append((char)c); r.mark(1); c = r.read(); } if (c == ';') result.append(';'); else r.reset(); return result.toString(); } boolean isTag(String s1, String s2) { s1 = s1.toLowerCase(); String t1 = "<" + s2.toLowerCase() + ">"; String t2 = "<" + s2.toLowerCase() + " "; return s1.startsWith(t1) || s1.startsWith(t2); } String convertTag(String t) throws IOException { String result = ""; if (isTag(t,"body")) { in_body = true; body_found = true; } else if (isTag(t,"/body")) { in_body = false; result = " "; } else if (isTag(t,"center")) { result = " "; center = true; } else if (isTag(t,"/center")) { result = " "; center = false; } else if (isTag(t,"pre")) { result = " "; pre = true; } else if (isTag(t,"/pre")) { result = " "; pre = false; } else if (isTag(t,"p")) result = " "; else if (isTag(t,"br")) result = " "; else if (isTag(t,"h1") || isTag(t,"h2") || isTag(t,"h3") ||isTag(t,"h4") || isTag(t,"h5") || isTag(t,"h6") || isTag(t,"h7")) result = " "; else if (isTag(t,"/h1") || isTag(t,"/h2") || isTag(t,"/h3") ||isTag(t,"/h4") || isTag(t,"/h5") || isTag(t,"/h6") || isTag(t,"/h7")) result = " "; else if (isTag(t,"/dl")) result = " "; else if (isTag(t,"dd")) result = " * "; else if (isTag(t,"dt")) result = " "; else if (isTag(t,"li")) result = " * "; else if (isTag(t,"/ul")) result = " "; else if (isTag(t,"/ol")) result = " "; else if (isTag(t,"hr")) result = "_________________________________________ "; else if (isTag(t,"table")) result = " "; else if (isTag(t,"/table")) result = " "; else if (isTag(t,"form")) result = " "; else if (isTag(t,"/form")) result = " "; else if (isTag(t,"b")) result = "*"; else if (isTag(t,"/b")) result = "*"; else if (isTag(t,"i")) result = """; else if (isTag(t,"/i")) result = """; else if (isTag(t,"img")) { int idx = t.indexOf("alt=""); if (idx != -1) { idx += 5; int idx2 = t.indexOf(""",idx); result = t.substring(idx,idx2); } } else if (isTag(t,"a")) { int idx = t.indexOf("href=""); if (idx != -1) { idx += 6; int idx2 = t.indexOf(""",idx); href = t.substring(idx,idx2); } else { href = ""; } } else if (isTag(t,"/a")) { if (href.length() > 0) { result = " [ " + href + " ]"; href = ""; } } return result; } public static void main(String argv[]) throws Exception { FileInputStream fis = null; String s = null; try { File file; if (argv[0] != null) file = new File(argv[0]); else file = new File("html_test_file.html"); fis = new FileInputStream(file); byte buf[] = new byte[fis.available()]; //bytes that can be read from this file input stream without blocking fis.read(buf); fis.close(); fis = null; s = new String(buf); HTML2Text h = new HTML2Text(); System.out.println(h.convert(s)); } catch (Exception e) { if (fis != null) fis.close(); throw e; } } }

Related Source Codes

Script Name Author
Sending mail Using JavaMail to Yahoo and Gmail accounts sai prasad
Simple Program in Java to Implement Multithreading Satish.K
Simple Calculator in Java Using Remote Method Invocation Satish.K
Guest Book Application Using Servlets Satish.K
String Manipulation Using Stringification Satish.K
String Manipulation Using Stringification Satish.K
Moving Ball Application Using Java Beans Satish.K
Rapid Roll game subrahmanyeswararao
student mgm arpan
Sourav Datta
Download Manager Sagar
Address Book in Java Rahul Chouhan
address book using java database connectivity(jdbc-msaccess) shekhar bansal
sun Steganography B.Rajavel
Connecting Java with MS-Access - Inserting data in Aseem

A D V E R T I S E M E N T




Google Groups Subscribe to SourceCodesWorld - Techies Talk
Email:

Free eBook - Interview Questions: Get over 1,000 Interview Questions in an eBook for free when you join JobsAssist. Just click on the button below to join JobsAssist and you will immediately receive the Free eBook with thousands of Interview Questions in an ebook when you join.

New! Click here to Add your Code!


ASP Home | C Home | C++ Home | COBOL Home | Java Home | Pascal Home
Source Codes Home Page

 Advertisements  

Google Search

Google

Source Codes World.com is a part of Vyom Network.

Vyom Network : Web Hosting | Dedicated Server | Free SMS, GRE, GMAT, MBA | Online Exams | Freshers Jobs | Software Downloads | Interview Questions | Jobs, Discussions | Placement Papers | Free eBooks | Free eBooks | Free Business Info | Interview Questions | Free Tutorials | Arabic, French, German | IAS Preparation | Jokes, Songs, Fun | Free Classifieds | Free Recipes | Free Downloads | Bangalore Info | Tech Solutions | Project Outsourcing, Web Hosting | GATE Preparation | MBA Preparation | SAP Info | Software Testing | Google Logo Maker | Freshers Jobs

Sitemap | Privacy Policy | Terms and Conditions | Important Websites
Copyright ©2003-2024 SourceCodesWorld.com, All Rights Reserved.
Page URL: http://www.sourcecodesworld.com/source/show.asp?ScriptID=976


Download Yahoo Messenger | Placement Papers | Free SMS | C Interview Questions | C++ Interview Questions | Quick2Host Review