convert.py: remove non-determinism.

Introducing non-deterministic code paths in the conversion is a bad idea because we want the new blog to be as close as possible. And so if humans verify that the old and new website are similar, to be effective, the verification need to be done on top of a deterministic process. Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
author: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> 2024-02-28 17:09:10 +0100
committer: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org> 2025-06-15 01:30:21 +0200
commit: bbea73d1c21e11e35b252edf41c107536be8e98e (patch)
tree: 72e9ac4a2e4b87b0877a4d481177860316b60c66 /convert.py
parent: 048bb1c8ecc552fb4e91d361f108ede65cd7a9b9 (diff)
download: haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.tar.gz
haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.tar.bz2
haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.zip
1 files changed, 4 insertions, 43 deletions
diff --git a/convert.py b/convert.py
index 776fe5b..a2eeb20 100755
--- a/convert.py
+++ b/convert.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # encoding: utf-8
 #
-# Copyright (C) 2020-2022 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
+# Copyright (C) 2020-2024 Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -19,16 +19,6 @@
 from bs4 import BeautifulSoup
 from html2text import config, HTML2Text
 
-try:
-    # This has been removed in more recent
-    # versions of python-html2text. See commit
-    # b361467894fb277563b4547ec9d4df49f5e0c6e3
-    # (b361467 Remove support for Python ≤ 3.4)
-    # in https://github.com/Alir3z4/html2text.git
-    from html2text.utils import wrapwrite
-except:
-    pass
-
 import os
 import re
 import sh
@@ -103,20 +93,7 @@ def fix_lists(string):
 
 def convert(html_file_path):
     with open(html_file_path) as html_file:
-        try:
-            soup = BeautifulSoup(html_file, features="html5lib").article
-        except:
-            try:
-                # For some reason the lxml parser isn't found with
-                # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's
-                # probably better to use an html5 parser anyway as the
-                # Replicant blog (now?) uses the html doctype and the
-                # theme seems to include an html5.js file for the IE 9
-                # browser.
-                soup = BeautifulSoup(html_file, features="lxml").article
-            except:
-                print("Cannot find html5lib or lxml parsers")
-                sys.exit(1)
+        soup = BeautifulSoup(html_file, features="html5lib").article
 
     # Format the output to be compatible with mail conventions but make sure
     # that the links are not split between two lines
@@ -138,20 +115,7 @@ def convert(html_file_path):
 
 def _get_metadata(html_file_path, func):
     with open(html_file_path) as html_file:
-        try:
-            soup = BeautifulSoup(html_file, features="html5lib")
-        except:
-            try:
-                # For some reason the lxml parser isn't found with
-                # python-beautifulsoup4 4.9.3-3.0 on Parabola. It's
-                # probably better to use an html5 parser anyway as the
-                # Replicant blog (now?) uses the html doctype and the
-                # theme seems to include an html5.js file for the IE 9
-                # browser.
-                soup = BeautifulSoup(html_file, features="lxml").article
-            except:
-                print("Cannot find html5lib or lxml parsers")
-                sys.exit(1)
+        soup = BeautifulSoup(html_file, features="html5lib")
         return func(soup)
 
 def get_metadata(html_file_path):
@@ -217,10 +181,7 @@ def main():
     text = get_metadata(html_file_path)
     text += convert(html_file_path)
 
-    try:
-        wrapwrite(text)
-    except:
-        sys.stdout.write(text)
+    sys.stdout.write(text)
 
 if __name__ == '__main__':
     main()
author	Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>	2024-02-28 17:09:10 +0100
committer	Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>	2025-06-15 01:30:21 +0200
commit	bbea73d1c21e11e35b252edf41c107536be8e98e (patch)
tree	72e9ac4a2e4b87b0877a4d481177860316b60c66 /convert.py
parent	048bb1c8ecc552fb4e91d361f108ede65cd7a9b9 (diff)
download	haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.tar.gz haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.tar.bz2 haunt-blog-bbea73d1c21e11e35b252edf41c107536be8e98e.zip