In [37]:
import requests
from bs4 import BeautifulSoup

url = "https://www.scrapethissite.com/pages/simple/"



response = requests.get(url)

response.encoding = response.apparent_encoding

if response.status_code == 200:
    html = response.text
    print("success")
else : 
    print("error")
success
In [ ]:
soup = BeautifulSoup(html , "html5lib")

Transform the infos on the website into a Dataframe¶

In [36]:
import pandas as pd 
data = []

def get_text(e):
    return e.get_text(strip=True) if e else None

cards = soup.find_all("div", class_=["col-md-4", "country"])

for card in cards:

    name = get_text(card.find("h3", class_="country-name"))
    capital = get_text(card.find("span", class_="country-capital"))
    population = get_text(card.find("span", class_="country-population"))
    area = get_text(card.find("span", class_="country-area"))

    data.append({
        "Country": name,
        "Capital": capital,
        "Population": population,
        "Area": area
    })
df = pd.DataFrame(data)
df
Out[36]:
Country Capital Population Area
0 Andorra Andorra la Vella 84000 468.0
1 United Arab Emirates Abu Dhabi 4975593 82880.0
2 Afghanistan Kabul 29121286 647500.0
3 Antigua and Barbuda St. John's 86754 443.0
4 Anguilla The Valley 13254 102.0
... ... ... ... ...
245 Yemen Sanaa 23495361 527970.0
246 Mayotte Mamoudzou 159042 374.0
247 South Africa Pretoria 49000000 1219912.0
248 Zambia Lusaka 13460305 752614.0
249 Zimbabwe Harare 11651858 390580.0

250 rows × 4 columns