Watch on YouTube

Day 41 - Extracting Data from Web Pages Using BeautifulSoup and requests Part 1/3

We will learn how to mine data from any database-driven web page using requests, BeautifulSoup, and a little bit of ingenuity.

Starter Code

from bs4 import BeautifulSoup
import requests

strUrl = "https://www.visitbakersfield.com/events"

diHeaders = {
        "User-Agent": "Mozilla/5.0 (compatible; WebCrawler/1.0; +http://example.com/info)"
    }

Finished Code

from bs4 import BeautifulSoup
import requests

strUrl = "https://www.visitbakersfield.com/events"

diHeaders = {
        "User-Agent": "Mozilla/5.0 (compatible; WebCrawler/1.0; +http://example.com/info)"
    }

response = requests.get(strUrl, headers=diHeaders, timeout=10)

liEvents = []

if response:

    # print(response.text)
    soup = BeautifulSoup(response.text, "html.parser")

    liCards = soup.find_all("article", "card--listing")

    for indCard in liCards:
        # print(indCard)
        # print("\n\n")
        # print("#"*80)

        strTitle = indCard.find("h3").text.strip()
        print(strTitle)

        bsLink = indCard.find("a")
        strLink = bsLink["href"]
        print(strLink)

        strAddress = indCard.find("span", "card__address").text.strip()
        strAddress = strAddress.replace("\n", " -- ")
        print(f"Address: {strAddress}")

        print("#"*60)