python-web-scraping/task1.py at main · priyapatel2006/python-web-scraping · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests, bs4, json, os

def scrape_top_list():
    if os.path.exists("movies.json"):
        f = open('movies.json', 'r')
        movies = json.load(f)
        return movies
    url = "https://www.imdb.com/list/ls063315922/?view=compact"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    response = requests.get(url, headers=headers)
    soup = bs4.BeautifulSoup(response.content,"html.parser")
    movie_list = soup.find('ul', class_="ipc-metadata-list").findAll("li")
    movie_ka_array = []
    for movie in movie_list:
        movie_dict = {}
        title = movie.find('a', class_="ipc-title-link-wrapper")
        name = title.text.split(".")
        url = title.get('href')
        rating = movie.find('span', class_="ipc-rating-star--rating").text
        year = movie.find('span', class_="sc-a55f6282-6 iMumIM cli-title-metadata-item").text
        movie_dict['name'] = name[1].strip()
        movie_dict['position'] = int(name[0])
        movie_dict['url'] = url
        movie_dict['rating'] = float(rating)
        movie_dict['year'] = int(year)
        movie_ka_array.append(movie_dict)
    f = open("movies.json", 'w+')
    json.dump(movie_ka_array,f, indent=4)
    return movie_ka_array

# movies  = scrape_top_list()
# print(movies)