-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxpathExample.py
More file actions
26 lines (23 loc) · 1.17 KB
/
xpathExample.py
File metadata and controls
26 lines (23 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# -*- coding: utf-8 -*-
# author:Gary
# 第一步,导入需要用到的库
import requests
from lxml import etree
res = requests.get('https://www.gary666.com/learn') # 获取网页内容
res.encoding = res.apparent_encoding # 设置编码,防止由于编码问题导致文字错乱
# 利用 etree.HTML 把字符串解析成 HTML 文件
# res.text就是获取把网页内容转为字符串
html = etree.HTML(res.text)
# 获取某个标签的内容
# 绝对路径定位:
title = html.xpath('/html/body/div[2]/div[1]/div[1]/h3/a') # 通过调用xpath这个方法就可以获取到对应路径的内容,返回一个列表,需要通过循环输出
for i in title:
print(i.text) # .text输出文本内容
# 获取某个标签的属性的值
title1 = html.xpath('/html/body/div[2]/div[1]/div[1]/h3/a/text()')[0] # 直接通过text()方法获取文本
print(title1)
title_href = html.xpath('/html/body/div[2]/div[1]/div[1]/h3/a/@href')[0] # 通过/@属性名即可获取对应的属性值
print(title_href)
# 获取指定标签对应属性值的内容
title2 = html_data = html.xpath('/html/body/div[2]/div[1]/div[1]/h3/a[@href="/detail?dbname=study&num=38"]/text()')[0]
print(title2)