diff options
Diffstat (limited to 'convert.py')
| -rwxr-xr-x | convert.py | 47 |
1 files changed, 4 insertions, 43 deletions
@@ -1,7 +1,7 @@ #!/usr/bin/env python3 # encoding: utf-8 # -# Copyright (C) 2020-2022 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> +# Copyright (C) 2020-2024 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,16 +19,6 @@ from bs4 import BeautifulSoup from html2text import config, HTML2Text -try: - # This has been removed in more recent - # versions of python-html2text. See commit - # b361467894fb277563b4547ec9d4df49f5e0c6e3 - # (b361467 Remove support for Python ≤ 3.4) - # in https://github.com/Alir3z4/html2text.git - from html2text.utils import wrapwrite -except: - pass - import os import re import sh @@ -103,20 +93,7 @@ def fix_lists(string): def convert(html_file_path): with open(html_file_path) as html_file: - try: - soup = BeautifulSoup(html_file, features="html5lib").article - except: - try: - # For some reason the lxml parser isn't found with - # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's - # probably better to use an html5 parser anyway as the - # Replicant blog (now?) uses the html doctype and the - # theme seems to include an html5.js file for the IE 9 - # browser. - soup = BeautifulSoup(html_file, features="lxml").article - except: - print("Cannot find html5lib or lxml parsers") - sys.exit(1) + soup = BeautifulSoup(html_file, features="html5lib").article # Format the output to be compatible with mail conventions but make sure # that the links are not split between two lines @@ -138,20 +115,7 @@ def convert(html_file_path): def _get_metadata(html_file_path, func): with open(html_file_path) as html_file: - try: - soup = BeautifulSoup(html_file, features="html5lib") - except: - try: - # For some reason the lxml parser isn't found with - # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's - # probably better to use an html5 parser anyway as the - # Replicant blog (now?) uses the html doctype and the - # theme seems to include an html5.js file for the IE 9 - # browser. - soup = BeautifulSoup(html_file, features="lxml").article - except: - print("Cannot find html5lib or lxml parsers") - sys.exit(1) + soup = BeautifulSoup(html_file, features="html5lib") return func(soup) def get_metadata(html_file_path): @@ -217,10 +181,7 @@ def main(): text = get_metadata(html_file_path) text += convert(html_file_path) - try: - wrapwrite(text) - except: - sys.stdout.write(text) + sys.stdout.write(text) if __name__ == '__main__': main() |
