-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_me.py
More file actions
98 lines (88 loc) · 3.97 KB
/
run_me.py
File metadata and controls
98 lines (88 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import getpass
from bs4 import BeautifulSoup
import requests
import os
import re
from mymodules.extract_profiles import Extract
import json
import sys
class Linkedin_Scraper():
source_code=None
browser=None
user_id=None
phantom_path=r'phantom_js\bin\phantomjs'
complete_detail_of_user={}
failed_url='https://www.linkedin.com/uas/login-submit'
def __init__(self):
print('Opening browser in background, Wait for few seconds... ')
self.browser = webdriver.PhantomJS(self.phantom_path) # or add to your PATH
self.browser.set_window_size(1024, 768)
self.browser.get('https://linkedin.com/')
time.sleep(5)
#self.stats = {}
def close_phantom(self):
#os.system('taskkill /f /im phantomjs.exe')
self.browser.close();
print('Successfully Ended.')
def get_authenticated(self,email,password):
username_input = self.browser.find_element_by_name("session_key")
password_input = self.browser.find_element_by_name("session_password")
login_attempt = self.browser.find_element_by_name("submit")
username_input.send_keys(email)
password_input.send_keys(password)
login_attempt.submit()
print('Wait, Logging In...')
time.sleep(3)
current=self.browser.current_url
if(current==self.failed_url):
print('Failed Loging.')
sys.exit("Re-run program, and provide correct credentials. Thank You.!")
else:
print('Successfully Loged In.')
url=input('Enter users url, to be scraped : ')
self.extract_user_data(user_url=url)
def extract_user_data(self,user_url):
self.browser.get(user_url)
split_url = re.split("/",user_url)
self.user_id=split_url[4]
self.browser.save_screenshot('screen_shots/'+self.user_id+'.png') # save a screenshot to disk
self.source_code = BeautifulSoup(self.browser.page_source, "lxml")
print('Screen Shot Taken.')
e = Extract(self.source_code)
self.complete_detail_of_user['Basic Information']=e.basic_data_extraction()
profile_skill=self.source_code.find('div',{"id":"profile-skills"})
education_detail=self.source_code.find('div',{"id":"background-education"})
profile_experience=self.source_code.find('div',{"class":"background-experience"})
if profile_skill is not None:
self.complete_detail_of_user['Skills']=e.profile_skills()
else:
self.complete_detail_of_user['Skills']='Skills Not Specified by user.'
if profile_experience is not None:
self.complete_detail_of_user['Experience']=e.get_experience()
else:
self.complete_detail_of_user['Experience']='Experience Not Specified by user.'
self.complete_detail_of_user['A Profile Summary '] = e.get_summary()
if education_detail is not None:
self.complete_detail_of_user['Education Detail']=e.get_education()
else:
self.complete_detail_of_user['Education Detail']='Education Detail, is Not Specified by user.'
def create_json(self):
pretty_complete_user_data=json.dumps(self.complete_detail_of_user,sort_keys=True, indent=6)
print(pretty_complete_user_data)
with open('json/'+self.user_id+'.json', 'w') as fp:
json.dump(self.complete_detail_of_user,fp,sort_keys=True, indent=4)
print('\n\n> Json file of the whole profile, is stored in json folder.\n> Screen Shot of profile is stored in Screen Shot folder.')
def main():
l = Linkedin_Scraper()
email=str(input('Enter Your Linkedin Email : '))
password=getpass.getpass('Enter Your Linkedin Password(Carefully) : ')
#email=str('myemail@gmail.com')
#password='my_password'
l.get_authenticated(email=email,password=password)
l.create_json()
l.close_phantom()
if __name__ == "__main__":
main()