The document describes a Perl script that saves entire web pages and their assets like images by parsing HTML and CSS. It handles relative links, saves external files locally, and removes scripts for security. The script is available on GitHub for others to use.
74. location / {
proxy_set_header Host $http_host;
set $upstream_socket "";
rewrite_by_lua '
local m = ngx.re.match(
ngx.var.host, "^([^.]+[.]dev)[.]"
)
if m then
ngx.var.upstream_socket
= "/tmp/"..m[1]..".sock"
else
ngx.exit(ngx.HTTP_NOT_FOUND)
end
';
proxy_pass http://unix:$upstream_socket;
}