-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtask1.py
More file actions
32 lines (30 loc) · 1.26 KB
/
task1.py
File metadata and controls
32 lines (30 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests, bs4, json, os
def scrape_top_list():
if os.path.exists("movies.json"):
f = open('movies.json', 'r')
movies = json.load(f)
return movies
url = "https://www.imdb.com/list/ls063315922/?view=compact"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
response = requests.get(url, headers=headers)
soup = bs4.BeautifulSoup(response.content,"html.parser")
movie_list = soup.find('ul', class_="ipc-metadata-list").findAll("li")
movie_ka_array = []
for movie in movie_list:
movie_dict = {}
title = movie.find('a', class_="ipc-title-link-wrapper")
name = title.text.split(".")
url = title.get('href')
rating = movie.find('span', class_="ipc-rating-star--rating").text
year = movie.find('span', class_="sc-a55f6282-6 iMumIM cli-title-metadata-item").text
movie_dict['name'] = name[1].strip()
movie_dict['position'] = int(name[0])
movie_dict['url'] = url
movie_dict['rating'] = float(rating)
movie_dict['year'] = int(year)
movie_ka_array.append(movie_dict)
f = open("movies.json", 'w+')
json.dump(movie_ka_array,f, indent=4)
return movie_ka_array
# movies = scrape_top_list()
# print(movies)