通过
bs4
,re
格式化html代码;可以控制递归的深度
所有属性attrs都在一行中,免得乱七八糟的
import re
from bs4 import BeautifulSoup, Tag, NavigableString
sub_place_pattern = re.compile(r'\s+')
KEEP_LAST_TAG_ONE_LINE=True # 保留最后一个tag在一行中
INDENT_SPACE = " " * 2 # 缩进空格数量
def flatten_attributes(tag: Tag) -> str:
"""将属性格式化为单行"""
attrs = []
for attr, val in tag.attrs.items():
if isinstance(val, list):
val = ' '.join(val)
else:
val = str(val).strip()
val = sub_place_pattern.sub(' ', val)
attrs.append(f'{attr}="{val}"')
return f"<{tag.name} {' '.join(attrs)}>"
def format_tag(tag: Tag, current_depth: int, max_depth: int, indent: str = INDENT_SPACE) -> str:
if not isinstance(tag, Tag):
return sub_place_pattern.sub(' ', str(tag)).strip()
# 没有子元素 or 都是字符串(无子标签)
if KEEP_LAST_TAG_ONE_LINE and (not tag.contents or all(not isinstance(child, Tag) for child in tag.contents)):
return sub_place_pattern.sub(' ', str(tag)).strip()
start_tag = flatten_attributes(tag)
end_tag = f"</{tag.name}>"
if current_depth >= max_depth or not tag.contents:
inner = " ".join(
c.strip() if isinstance(c, NavigableString) else str(c)
for c in tag.contents
)
return f"{start_tag} {sub_place_pattern.sub(' ', inner)} {end_tag}".strip()
child_lines = []
for child in tag.contents:
if isinstance(child, NavigableString):
text = child.strip()
if text:
child_lines.append(indent * (current_depth + 1) + text)
elif isinstance(child, Tag):
child_lines.append(indent * (current_depth + 1) + format_tag(child, current_depth + 1, max_depth, indent))
formatted = [start_tag] + child_lines + [indent * current_depth + end_tag]
return "\n".join(formatted)
def format_html_single_line_blocks(html_str: str, max_depth: int = 1) -> str:
soup = BeautifulSoup(html_str, "html.parser")
htmls = []
for child in soup.contents:
if isinstance(child, Tag):
htmls.append(format_tag(child, 0, max_depth))
htmls = "\n".join(htmls)
# 删除tag label两端的空格
htmls = re.sub(r' +>', ">", htmls)
htmls = re.sub(r'< +', "<", htmls)
# 删除上下层级tag紧邻处的空格
htmls = re.sub(r'> +', ">", htmls)
htmls = re.sub(r'(?<=\S) +<', "<", htmls)
return htmls
测试
html = '''
<section id="main" data-role="page"
class="container-fluid"
style="
background: #f0f0f0;
padding: 20px;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
">
<header class="header-area"
style="
text-align: center;
border-bottom: 2px solid #ccc;
margin-bottom: 15px;
">
<h1 style="font-size: 32px; color: #336699;">🔥 多语言支持 & Complex 标题 💡</h1>
<p style="
font-size: 14px;
color: gray;
margin-top: -10px;
">Welcome to the <strong>HTML Formatter</strong> demo!</p>
</header>
<div class="row content-area" style="display: flex; flex-direction: row;">
<div class="col left" style="width: 50%; padding: 10px;">
<p data-id="left-paragraph" style="line-height: 1.5em; color: #444;">
This is a <em>sample</em> paragraph with <a href="https://example.com" target="_blank">a link</a>
and a <span style="color: red;">red text</span> inside.
</p>
<ul class="list-group"
style="
list-style: none;
padding-left: 0;
">
<li class="list-item">Item 1</li>
<li class="list-item">Item 2 with <strong>bold</strong> text</li>
<li class="list-item">
Item 3 with nested list:
<ul>
<li>Subitem A</li>
<li>Subitem B</li>
</ul>
</li>
</ul>
</div>
<div class="col right" style="width: 50%; padding: 10px;">
<img src="image.jpg" alt="Example Image"
style="
width: 100%;
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
" />
<form method="POST" action="/submit" style="margin-top: 20px;">
<input type="text" name="username" placeholder="Enter username"
style="width: 100%; padding: 8px;" />
<input type="submit" value="Submit" style="margin-top: 10px;" />
</form>
</div>
</div>
<footer style="
margin-top: 40px;
text-align: center;
font-size: 12px;
color: #999;
">
© 2025 Web Formatter Inc. All rights reserved.
</footer>
</section>
'''
print("---- depth = 1 ----")
print(format_html_single_line_blocks(html, max_depth=0))
print("---- depth = 2 ----")
print(format_html_single_line_blocks(html, max_depth=1000))
---- depth = 1 ----
<section id="main" data-role="page" class="container-fluid" style="background: #f0f0f0; padding: 20px; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;"><header class="header-area" style=" text-align: center; border-bottom: 2px solid #ccc; margin-bottom: 15px; "><h1 style="font-size: 32px; color: #336699;">🔥 多语言支持 & Complex 标题 💡</h1><p style=" font-size: 14px; color: gray; margin-top: -10px; ">Welcome to the<strong>HTML Formatter</strong>demo!</p></header><div class="row content-area" style="display: flex; flex-direction: row;"><div class="col left" style="width: 50%; padding: 10px;"><p data-id="left-paragraph" style="line-height: 1.5em; color: #444;">This is a<em>sample</em>paragraph with<a href="https://example.com" target="_blank">a link</a>and a<span style="color: red;">red text</span>inside.</p><ul class="list-group" style=" list-style: none; padding-left: 0; "><li class="list-item">Item 1</li><li class="list-item">Item 2 with<strong>bold</strong>text</li><li class="list-item">Item 3 with nested list:<ul><li>Subitem A</li><li>Subitem B</li></ul></li></ul></div><div class="col right" style="width: 50%; padding: 10px;"><img alt="Example Image" src="image.jpg" style=" width: 100%; border-radius: 8px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); "/><form action="/submit" method="POST" style="margin-top: 20px;"><input name="username" placeholder="Enter username" style="width: 100%; padding: 8px;" type="text"/><input style="margin-top: 10px;" type="submit" value="Submit"/></form></div></div><footer style=" margin-top: 40px; text-align: center; font-size: 12px; color: #999; ">© 2025 Web Formatter Inc. All rights reserved.</footer></section>
---- depth = 2 ----
<section id="main" data-role="page" class="container-fluid" style="background: #f0f0f0; padding: 20px; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;">
<header class="header-area" style="text-align: center; border-bottom: 2px solid #ccc; margin-bottom: 15px;">
<h1 style="font-size: 32px; color: #336699;">🔥 多语言支持 & Complex 标题 💡</h1>
<p style="font-size: 14px; color: gray; margin-top: -10px;">
Welcome to the
<strong>HTML Formatter</strong>
demo!
</p>
</header>
<div class="row content-area" style="display: flex; flex-direction: row;">
<div class="col left" style="width: 50%; padding: 10px;">
<p data-id="left-paragraph" style="line-height: 1.5em; color: #444;">
This is a
<em>sample</em>
paragraph with
<a href="https://example.com" target="_blank">a link</a>
and a
<span style="color: red;">red text</span>
inside.
</p>
<ul class="list-group" style="list-style: none; padding-left: 0;">
<li class="list-item">Item 1</li>
<li class="list-item">
Item 2 with
<strong>bold</strong>
text
</li>
<li class="list-item">
Item 3 with nested list:
<ul>
<li>Subitem A</li>
<li>Subitem B</li>
</ul>
</li>
</ul>
</div>
<div class="col right" style="width: 50%; padding: 10px;">
<img alt="Example Image" src="image.jpg" style=" width: 100%; border-radius: 8px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); "/>
<form method="POST" action="/submit" style="margin-top: 20px;">
<input name="username" placeholder="Enter username" style="width: 100%; padding: 8px;" type="text"/>
<input style="margin-top: 10px;" type="submit" value="Submit"/>
</form>
</div>
</div>
<footer style=" margin-top: 40px; text-align: center; font-size: 12px; color: #999; ">© 2025 Web Formatter Inc. All rights reserved.</footer>
</section>