recentpopularlog in


« earlier   
HTML Parser: How to scrap HTML content | Python Central
from html.parser import HTMLParser
import urllib.request as urllib2

class MyHTMLParser(HTMLParser):

   #Initializing lists
lsStartTags = list()
   lsEndTags = list()
   lsStartEndTags = list()
   lsComments = list()

   #HTML Parser Methods
   def handle_starttag(self, startTag, attrs):

   def handle_endtag(self, endTag):

   def handle_startendtag(self,startendTag, attrs):

  def handle_comment(self,data):

#creating an object of the overridden class
parser = MyHTMLParser()

#Opening NYTimes site using urllib2
html_page = html_page = urllib2.urlopen("")

#Feeding the content

#printing the extracted values
print(“Start tags”, parser.lsStartTags)
#print(“End tags”, parser.lsEndTags)
#print(“Start End tags”, parser.lsStartEndTags)
#print(“Comments”, parser.lsComments)
scraping  python  html  url  how  howto  How_to 
yesterday by catichenor

Copy this bookmark:

to read