3주차 과제 비교

Created Diff never expires
9 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
15 lines
23 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
29 lines
## 웹 크롤링에 필요한 세팅: requests와 bs4 패키지
import requests
import requests
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')
soup = BeautifulSoup(data.text, 'html.parser')


trs = soup.select('#body-content > div.newest-list > div > table > tbody > tr')
## 지니뮤직의 1~50위 곡의 순위/곡명/가수를 스크래핑해보자
# 순위
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.number
# 곡명
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.title.ellipsis
# 가수
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.artist.ellipsis
# 공통부분:
#body-content > div.newest-list > div > table > tbody > tr


for tr in trs:

title = tr.select_one('td.info > a.title.ellipsis').text.strip()
# 최종 정리:
rank = tr.select_one('td.number').text[0:2].strip()
musics = list(soup.select("#body-content > div.newest-list > div > table > tbody > tr"))
artist = tr.select_one('td.info > a.artist.ellipsis').text
for music in musics:
rank = music.select_one("td.number").text[:2].replace('\n', ' ')
title = music.select_one("td.info > a.title.ellipsis").text.strip()
artist = music.select_one("td.info > a.artist.ellipsis").text.strip()

print(rank, title, artist)
print(rank, title, artist)