diff options
| -rw-r--r-- | Makefile | 9 | ||||
| -rwxr-xr-x | convert.py | 47 |
2 files changed, 8 insertions, 48 deletions
@@ -15,11 +15,10 @@ CURL ?= curl BLOG_URL ?= https://blog.replicant.us/page -CONVERT ?= guix shell \ - python python-beautifulsoup4 \ - python-html2text \ - python-sh \ - -- python3 convert.py +GUIX_COMMIT ?= b25b94335a3ee8d68d2145da8e5ea0325ecea451 +GUIX_SHELL ?= guix time-machine --commit=$(GUIX_COMMIT) -- shell -C +GUIX_PACKAGES ?= python python-beautifulsoup4 python-html2text python-sh +CONVERT ?= $(GUIX_SHELL) $(GUIX_PACKAGES) -- python3 convert.py SENTINEL = @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # encoding: utf-8 # -# Copyright (C) 2020-2022 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> +# Copyright (C) 2020-2024 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,16 +19,6 @@ from bs4 import BeautifulSoup from html2text import config, HTML2Text -try: - # This has been removed in more recent - # versions of python-html2text. See commit - # b361467894fb277563b4547ec9d4df49f5e0c6e3 - # (b361467 Remove support for Python ≤ 3.4) - # in https://github.com/Alir3z4/html2text.git - from html2text.utils import wrapwrite -except: - pass - import os import re import sh @@ -103,20 +93,7 @@ def fix_lists(string): def convert(html_file_path): with open(html_file_path) as html_file: - try: - soup = BeautifulSoup(html_file, features="html5lib").article - except: - try: - # For some reason the lxml parser isn't found with - # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's - # probably better to use an html5 parser anyway as the - # Replicant blog (now?) uses the html doctype and the - # theme seems to include an html5.js file for the IE 9 - # browser. - soup = BeautifulSoup(html_file, features="lxml").article - except: - print("Cannot find html5lib or lxml parsers") - sys.exit(1) + soup = BeautifulSoup(html_file, features="html5lib").article # Format the output to be compatible with mail conventions but make sure # that the links are not split between two lines @@ -138,20 +115,7 @@ def convert(html_file_path): def _get_metadata(html_file_path, func): with open(html_file_path) as html_file: - try: - soup = BeautifulSoup(html_file, features="html5lib") - except: - try: - # For some reason the lxml parser isn't found with - # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's - # probably better to use an html5 parser anyway as the - # Replicant blog (now?) uses the html doctype and the - # theme seems to include an html5.js file for the IE 9 - # browser. - soup = BeautifulSoup(html_file, features="lxml").article - except: - print("Cannot find html5lib or lxml parsers") - sys.exit(1) + soup = BeautifulSoup(html_file, features="html5lib") return func(soup) def get_metadata(html_file_path): @@ -217,10 +181,7 @@ def main(): text = get_metadata(html_file_path) text += convert(html_file_path) - try: - wrapwrite(text) - except: - sys.stdout.write(text) + sys.stdout.write(text) if __name__ == '__main__': main() |
