import requests
from bs4 import BeautifulSoup
url = "https://www.scrapethissite.com/pages/simple/"
response = requests.get(url)
response.encoding = response.apparent_encoding
if response.status_code == 200:
html = response.text
print("success")
else :
print("error")
success
soup = BeautifulSoup(html , "html5lib")
import pandas as pd
data = []
def get_text(e):
return e.get_text(strip=True) if e else None
cards = soup.find_all("div", class_=["col-md-4", "country"])
for card in cards:
name = get_text(card.find("h3", class_="country-name"))
capital = get_text(card.find("span", class_="country-capital"))
population = get_text(card.find("span", class_="country-population"))
area = get_text(card.find("span", class_="country-area"))
data.append({
"Country": name,
"Capital": capital,
"Population": population,
"Area": area
})
df = pd.DataFrame(data)
df
| Country | Capital | Population | Area | |
|---|---|---|---|---|
| 0 | Andorra | Andorra la Vella | 84000 | 468.0 |
| 1 | United Arab Emirates | Abu Dhabi | 4975593 | 82880.0 |
| 2 | Afghanistan | Kabul | 29121286 | 647500.0 |
| 3 | Antigua and Barbuda | St. John's | 86754 | 443.0 |
| 4 | Anguilla | The Valley | 13254 | 102.0 |
| ... | ... | ... | ... | ... |
| 245 | Yemen | Sanaa | 23495361 | 527970.0 |
| 246 | Mayotte | Mamoudzou | 159042 | 374.0 |
| 247 | South Africa | Pretoria | 49000000 | 1219912.0 |
| 248 | Zambia | Lusaka | 13460305 | 752614.0 |
| 249 | Zimbabwe | Harare | 11651858 | 390580.0 |
250 rows × 4 columns