2 回答

TA貢獻1982條經驗 獲得超2個贊
您可以使用 find_all() 并檢查所有標簽,然后根據該標簽查找標簽。用于next_element獲取值。
from bs4 import BeautifulSoup
html='''<b>foo:</b> bar
<br />
<b>baz:</b>
<font color="green">YES</font> spam
<br />
<b>eggs:</b> ham
<br />'''
soup=BeautifulSoup(html,'lxml')
for item in soup.find_all():
if item.name=='font':
print(item.text.strip())
print(item.next_element.next_element.strip())
if item.name=='b':
if item.next_element.next_element.strip()!='':
print(item.next_element.next_element.strip())
輸出:
bar
YES
spam
ham

TA貢獻1829條經驗 獲得超9個贊
我試了一下。希望它有效
# get the html here
soup = BeautifulSoup(content, 'html.parser')
all_b=soup.find_all('b')
for b in all_b:
print(b.get_text())
next_b=b.findNext('b')
#print(next_b)
for sibling in b.next_siblings:
if(sibling!=next_b):
if(sibling!=None and isinstance(sibling,str)==False):
print(sibling.get_text())
sibling=sibling.next_sibling
elif(sibling!=None and isinstance(sibling,str)==True):
print(sibling)
sibling=sibling.next_sibling
elif(sibling==next_b):
break
print("new")
添加回答
舉報