-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_extract.py
More file actions
60 lines (52 loc) · 1.76 KB
/
sql_extract.py
File metadata and controls
60 lines (52 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
import mysql.connector
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def extract_sql_data(host, user, password, database, query):
data = []
conn = None
try:
conn = mysql.connector.connect(
host=host,
user=user,
password=password,
database=database
)
cursor = conn.cursor()
cursor.execute(query)
data = cursor.fetchall()
except mysql.connector.Error as err:
print(f"Error: {err}")
finally:
if conn.is_connected():
cursor.close()
conn.close()
return data
def scrape_website(base_url):
data = {}
response = requests.get(base_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a', href=True)
for link in links:
page_url = urljoin(base_url, link['href'])
page_response = requests.get(page_url)
if page_response.status_code == 200:
data[link['href']] = page_response.text
else:
print(f"Failed to retrieve data. Status code: {response.status_code}")
return data
website_url = input("Input web URL: ")
host = input("Input Host: ")
user = input("Input DB User: ")
password = input("Input DB Password: ")
database = input("Input DB Name: ")
query = input("Input Query eg. SELECT * FROM table: ")
website_data = scrape_website(website_url)
sql_data = extract_sql_data(host, user, password, database, query)
with open('website_data.txt', 'w') as file:
for key, value in website_data.items():
file.write(f"URL: {key}\nContent: {value}\n\n")
with open('sql_data.txt', 'w') as file:
for row in sql_data:
file.write(f"{row}\n")