-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.py
More file actions
152 lines (118 loc) · 4.53 KB
/
setup.py
File metadata and controls
152 lines (118 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
"""
Target Scrapy Scraper Setup Script
==================================
Quick setup and validation script for the Target scraping project.
This script helps ensure your environment is properly configured.
Usage:
python setup.py
Requirements:
- Python 3.7+
- ScrapeOps API key (free from https://scrapeops.io/app/register/main)
"""
import os
import sys
import subprocess
import importlib.util
def check_python_version():
"""Check if Python version is 3.7+"""
if sys.version_info < (3, 7):
print("❌ Python 3.7+ required. Current version:", sys.version)
return False
print(f"✅ Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
return True
def check_requirements():
"""Check if required packages are installed"""
required_packages = [
('scrapy', 'Scrapy'),
('scrapeops_scrapy', 'ScrapeOps Scrapy'),
('scrapeops_scrapy_proxy_sdk', 'ScrapeOps Proxy SDK')
]
missing_packages = []
for package, name in required_packages:
if importlib.util.find_spec(package) is None:
missing_packages.append(name)
print(f"❌ {name} not installed")
else:
print(f"✅ {name} installed")
return missing_packages
def check_settings():
"""Check if ScrapeOps API key is configured"""
settings_path = os.path.join('target_scraper', 'settings.py')
if not os.path.exists(settings_path):
print("❌ Settings file not found")
return False
with open(settings_path, 'r') as f:
content = f.read()
if 'YOUR_SCRAPEOPS_API_KEY_HERE' in content:
print("⚠️ ScrapeOps API key not configured")
print(" Please update target_scraper/settings.py with your API key")
print(" Get free key: https://scrapeops.io/app/register/main")
return False
print("✅ ScrapeOps API key configured")
return True
def install_requirements():
"""Install required packages"""
print("\n📦 Installing requirements...")
try:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
print("✅ Requirements installed successfully")
return True
except subprocess.CalledProcessError:
print("❌ Failed to install requirements")
return False
def test_spiders():
"""Test that spiders can be listed"""
print("\n🕷️ Testing spiders...")
try:
result = subprocess.run([sys.executable, '-m', 'scrapy', 'list'],
capture_output=True, text=True)
if result.returncode == 0:
spiders = result.stdout.strip().split('\n')
print(f"✅ Found {len(spiders)} spiders:")
for spider in spiders:
print(f" - {spider}")
return True
else:
print("❌ Failed to list spiders")
return False
except Exception as e:
print(f"❌ Error testing spiders: {e}")
return False
def main():
"""Main setup validation"""
print("🎯 Target Scrapy Scraper Setup")
print("=" * 40)
# Check Python version
if not check_python_version():
sys.exit(1)
print("\n📋 Checking requirements...")
missing_packages = check_requirements()
if missing_packages:
print(f"\n⚠️ Missing packages: {', '.join(missing_packages)}")
response = input("Install missing packages? (y/n): ").lower()
if response == 'y':
if not install_requirements():
sys.exit(1)
else:
print("❌ Setup incomplete - missing packages")
sys.exit(1)
# Check settings
print("\n⚙️ Checking configuration...")
settings_ok = check_settings()
# Test spiders
test_spiders()
print("\n" + "=" * 40)
if settings_ok:
print("🎉 Setup complete! Ready to scrape.")
print("\nQuick start commands:")
print(" scrapy crawl target_search -a search_query='laptop' -s CLOSESPIDER_ITEMCOUNT=5")
print(" scrapy crawl target_categories -s CLOSESPIDER_ITEMCOUNT=3")
print(" scrapy crawl target_deals -s CLOSESPIDER_ITEMCOUNT=10")
else:
print("⚠️ Setup complete but needs configuration:")
print(" 1. Get API key: https://scrapeops.io/app/register/main")
print(" 2. Update SCRAPEOPS_API_KEY in target_scraper/settings.py")
print(" 3. Run: python setup.py")
if __name__ == "__main__":
main()