aboutsummaryrefslogtreecommitdiffstats
path: root/convert.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert.py')
-rwxr-xr-xconvert.py47
1 files changed, 4 insertions, 43 deletions
diff --git a/convert.py b/convert.py
index 776fe5b..a2eeb20 100755
--- a/convert.py
+++ b/convert.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# encoding: utf-8
#
-# Copyright (C) 2020-2022 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
+# Copyright (C) 2020-2024 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -19,16 +19,6 @@
from bs4 import BeautifulSoup
from html2text import config, HTML2Text
-try:
- # This has been removed in more recent
- # versions of python-html2text. See commit
- # b361467894fb277563b4547ec9d4df49f5e0c6e3
- # (b361467 Remove support for Python ≤ 3.4)
- # in https://github.com/Alir3z4/html2text.git
- from html2text.utils import wrapwrite
-except:
- pass
-
import os
import re
import sh
@@ -103,20 +93,7 @@ def fix_lists(string):
def convert(html_file_path):
with open(html_file_path) as html_file:
- try:
- soup = BeautifulSoup(html_file, features="html5lib").article
- except:
- try:
- # For some reason the lxml parser isn't found with
- # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's
- # probably better to use an html5 parser anyway as the
- # Replicant blog (now?) uses the html doctype and the
- # theme seems to include an html5.js file for the IE 9
- # browser.
- soup = BeautifulSoup(html_file, features="lxml").article
- except:
- print("Cannot find html5lib or lxml parsers")
- sys.exit(1)
+ soup = BeautifulSoup(html_file, features="html5lib").article
# Format the output to be compatible with mail conventions but make sure
# that the links are not split between two lines
@@ -138,20 +115,7 @@ def convert(html_file_path):
def _get_metadata(html_file_path, func):
with open(html_file_path) as html_file:
- try:
- soup = BeautifulSoup(html_file, features="html5lib")
- except:
- try:
- # For some reason the lxml parser isn't found with
- # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's
- # probably better to use an html5 parser anyway as the
- # Replicant blog (now?) uses the html doctype and the
- # theme seems to include an html5.js file for the IE 9
- # browser.
- soup = BeautifulSoup(html_file, features="lxml").article
- except:
- print("Cannot find html5lib or lxml parsers")
- sys.exit(1)
+ soup = BeautifulSoup(html_file, features="html5lib")
return func(soup)
def get_metadata(html_file_path):
@@ -217,10 +181,7 @@ def main():
text = get_metadata(html_file_path)
text += convert(html_file_path)
- try:
- wrapwrite(text)
- except:
- sys.stdout.write(text)
+ sys.stdout.write(text)
if __name__ == '__main__':
main()