Web-scraping-with-Python/codes.py at main · danagh1/Web-scraping-with-Python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*-
"""Untitled3.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1MSMSqqAUcK-eksT6rx-bObsXcxpK3uP-

**Getting Data**
"""

from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import mysql.connector

url = "http://www.almadenahnews.com/search?q=%D8%A3%D9%88%D8%B1%D9%86%D8%AC"
url1 = requests.get("https://www.addustour.com/search.php?search=%D8%A3%D9%88%D8%B1%D9%86%D8%AC")
url2 = "https://www.jordanzad.com/index.php?page=tag&hashtag=%D8%A3%D9%88%D8%B1%D9%86%D8%AC"
url3 = "http://www.sarahanews.net/?s=%D8%A3%D9%88%D8%B1%D9%86%D8%AC"

client = urlopen(url)
client1 = url1.content
client2 = urlopen(url2)
client3 = urlopen(url3)

html = client.read()
html1 = client2.read()
html2 = client3.read()

soup = BeautifulSoup(html, "html.parser")
soup1 = BeautifulSoup(client1, "lxml")
soup2 = BeautifulSoup(html1, "html.parser")
soup3 = BeautifulSoup(html2, "html.parser")

containers = soup.find_all("div", {"class": "search_cart"})
containers2 = soup2.find_all("li")
containers3 = soup3.find_all("div", {"class": "jeg_postblock_content"})
containers1 = soup1.find_all("li", {"class": "search"})
title1 = containers[0].a.text.replace('\n', '').strip()
title2 = containers[3].a.text.replace('\n', '').strip()
date1 = containers[2].text.replace('\n', '').strip()
date2 = containers[5].text.replace('\n', '').strip()

title4 = containers2[1].a.text.replace('\n', '').strip()
title5 = containers2[2].a.text.replace('\n', '').strip()
title6 = containers2[3].a.text.replace('\n', '').strip()
title7 = containers2[4].a.text.replace('\n', '').strip()

datei = soup2.find_all("span", {"class": "date"})
date4 = datei[0].text.strip()
date5 = datei[1].text.strip()
date6 = datei[2].text.strip()
date7 = datei[3].text.strip()

title8 = containers3[0].a.text.replace('\n', '').strip()
title9 = containers3[1].a.text.replace('\n', '').strip()
title10 = containers3[2].a.text.replace('\n','').strip()
date8 = containers3[0].div.text.replace('\n', '').strip()
date9 = containers3[1].div.text.replace('\n', '').strip()
date10 = containers3[2].div.text.replace('\n','').strip()

"""**Saving data to Mysql Database**"""

mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="123456",
    database="orange"
)

mycursor = mydb.cursor()

sql = "INSERT INTO news_orange (News_Title, Publish_Date, News_Website) VALUES (%s,%s,%s)"
val1 = (title1, date1, "almadenah")
val2 = (title2, date2, "almadenah")
mycursor.execute(sql, val1)
mycursor.execute(sql, val2)

for i in containers1:
    titlee = i.findAll("h3")
    title3 = titlee[0].text.strip()

    datee = i.findAll("div", {"class": "date"})
    date3 = datee[0].text.strip()
    val3 = (title3, date3, "addustour")
    mycursor.execute(sql, val3)

val4 = (title4, date4, "jordanzad")
val5 = (title5, date5, "jordanzad")
val6 = (title6, date6, "jordanzad")
val7 = (title7, date7, "jordanzad")
val8 = (title8, date8, "sarahanews")
val9 = (title9, date9, "sarahanews")
val10 = (title10, date10, "sarahanews")
mycursor.execute(sql, val4)
mycursor.execute(sql, val5)
mycursor.execute(sql, val6)
mycursor.execute(sql, val7)
mycursor.execute(sql, val8)
mycursor.execute(sql, val9)
mycursor.execute(sql, val10)
mydb.commit()