from bs4 import BeautifulSoup html_doc = """
Uncle Scrooge's Nephews
Once upon a time Uncle Scrooge had three nephews; and their names were Hughie, Dewey and Louis; and they lived at Disneyland.
...
""" soup = BeautifulSoup(html_doc, 'html.parser') print(soup.prettify()) input("more > ") print("Here's the text of the page:") print(soup.get_text()) print("Title is: '" + str(soup.title) + "'") print("Title tag is: '" + str(soup.title.name) + "'") print("Title value is: '" + str(soup.title.string) + "'") print("Title parent is: '" + str(soup.title.parent.name) + "'") print("Title grandparent is: '" + str(soup.title.parent.parent.name) + "'") print("Title great-grandparent is: '" + str(soup.title.parent.parent.parent.name) + "'") try: print("Title great-great-grandparent is: '" + str(soup.title.parent.parent.parent.parent.name) + "'") except AttributeError: print("The Title great-great-grandparent does not exist") print("more > ") print("now for some attributes") print("attribute of first p is", soup.p.attrs) print("and another way", soup.p["class"]) if str(soup.p["class"]).index('['): print("yes, the [ ] are really in the string!") else: print("the [ ] are not in the string") try: if soup.p["class"].index('['): print("yes, the [ ] are really in the string!") else: print("the [ ] are not in the string") except Exception as m: print("If you forget the 'str', you get this exception:") print("\tValueError: "+str(m)) input("more > ") print("First paragraph is: '" + str(soup.p) + "'") print("Its class is: '" + str(soup.p['class']) + "'") print("Its value is: '" + str(soup.p.string) + "'") print("Its parent is: '" + str(soup.p.parent.name) + "'") print("Its great-grandparent is: '" + str(soup.p.parent.parent.parent.name) + "'") try: print("Its great-great-grandparent is: '" + str(soup.p.parent.parent.parent.parent.name) + "'") except AttributeError: print("Its great-great-grandparent does not exist") input("more > ") x = soup.find_all('a') print("The list of 'a' tags is:", x) print("Now 1 per line:") for i in soup.find_all('a'): print("\t"+str(i)) print("Now the links :") for i in soup.find_all('a'): print("\t"+i.get('href')) print("And another way:") for i in soup.find_all('a'): print("\t"+i['href']) input("more > ") print("p's next sibling:", soup.p.next_sibling.name) print("p's next HTML sibling:", soup.p.next_sibling.next_sibling) print("that's next HTML sibling:", soup.p.next_sibling.next_sibling.next_sibling.next_sibling) print("and finally:", soup.p.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling) input("more > ") print("Now for comments:") html = "" soup = BeautifulSoup(html, 'html.parser') comment = soup.b.string t = type(comment) print("The comment is:") print(comment) print("and its type is", t)