from urllib import request
from html.parser import HTMLParser
from html.entities import name2codepoint
class meeting(object):
def init(self,name,location,time):
self.name = name
self.location = location
class MyHTMLParser(HTMLParser):
meeting2 = False
meeting1 = False
meetname = False
meetlocation = False
meetings = []
def handle_starttag(self,tag,attrs):
if tag == 'ul':
for key,value in attrs:
if (value == 'list-recent-events menu') &(key == 'class'):
self.meeting1 = True
break
if tag == 'li':
self.meeting2 = True
if tag =='a':
self.meetname = True
if tag == 'span':
for key,value in attrs:
if (key == 'class') & (value == 'event-location'):
self.meetlocation = True
self.cacheHead = tag
self.cacheAttrs = attrs
pass
def handle_endtag(self,tag):
if tag == 'ul':
self.meeting1 = False
if tag == 'li':
self.meeting2 = False
if tag =='a':
self.meetname = False
if tag == 'span':
self.meetlocation = False
pass
def handle_startendtag(self,tag,attrs):
pass
def handle_data(self,data):
if self.meeting1 & self.meeting2:
if (self.cacheHead == 'a') & self.meetname:
self.cacheMeetings = meeting(data,'none','none')
self.meetings.append(self.cacheMeetings)
if self.cacheHead == 'time':
self.cacheMeetings.time = data
if self.meetlocation:
self.cacheMeetings.location = data
with request.urlopen('https://www.python.org/events/python-events/') as f:
data = f.read().decode('utf-8')
parser = MyHTMLParser()
parser.feed(data)
for n in parser.meetings:
n.get()
云端67395
-- coding: utf-8 --
from urllib import request from html.parser import HTMLParser from html.entities import name2codepoint class meeting(object): def init(self,name,location,time): self.name = name self.location = location
class MyHTMLParser(HTMLParser): meeting2 = False meeting1 = False meetname = False meetlocation = False meetings = [] def handle_starttag(self,tag,attrs): if tag == 'ul': for key,value in attrs: if (value == 'list-recent-events menu') &(key == 'class'): self.meeting1 = True break if tag == 'li': self.meeting2 = True if tag =='a': self.meetname = True if tag == 'span': for key,value in attrs: if (key == 'class') & (value == 'event-location'): self.meetlocation = True self.cacheHead = tag self.cacheAttrs = attrs pass def handle_endtag(self,tag): if tag == 'ul': self.meeting1 = False if tag == 'li': self.meeting2 = False if tag =='a': self.meetname = False if tag == 'span': self.meetlocation = False pass def handle_startendtag(self,tag,attrs): pass def handle_data(self,data): if self.meeting1 & self.meeting2: if (self.cacheHead == 'a') & self.meetname: self.cacheMeetings = meeting(data,'none','none') self.meetings.append(self.cacheMeetings) if self.cacheHead == 'time': self.cacheMeetings.time = data if self.meetlocation: self.cacheMeetings.location = data
with request.urlopen('https://www.python.org/events/python-events/') as f: data = f.read().decode('utf-8') parser = MyHTMLParser() parser.feed(data) for n in parser.meetings: n.get()