We will use the requests library to open a web page, and BeautifulSoup to extract arts of the page itself.
import requests
from bs4 import BeautifulSoup
strUrl = "https://www.bmoseley.com/test.html"
diHeaders = {"User-Agent": "Mozilla/5.0 (compatible; BeautifulSoupDemo/1.0; +http://example.com/info)"}
import requests
from bs4 import BeautifulSoup
strUrl = "https://www.bmoseley.com/test.html"
diHeaders = {"User-Agent": "Mozilla/5.0 (compatible; BeautifulSoupDemo/1.0; +http://example.com/info)"}
response = requests.get(strUrl, headers=diHeaders)
if response.status_code != 200:
print("There was an error!")
exit()
strHtml = response.text
# print(strHtml)
soup = BeautifulSoup(strHtml, "html.parser")
print("Page title:")
if soup.title:
print(soup.title.string)
else:
print("No title.")
print("\nPage links:")
for link in soup.find_all("a"):
strLink = link.get("href")
if strLink:
print(strLink)
print("\nParagraphs:")
for par in soup.find_all("p"):
strP = par.get_text()
if (strP):
print(strP)
print("\nDemo element:")
demo = soup.select_one("#demo")
if demo:
print(demo.get_text())
print("\nInfo class:")
liInfo = soup.select(".info")
if liInfo:
for infoItem in liInfo:
print(infoItem.get_text())
print("\nImages:")
for img in soup.find_all("img"):
strSrc = img.get("src")
strAlt = img.get("alt", "No alt text")
print("Image Src: ", strSrc)
print("Alt Text: ", strAlt)