Skip to content

Commit bf2ec20

Browse files
committedFeb 4, 2025·
ebook: Check html syntax
1 parent 529e9a8 commit bf2ec20

File tree

6 files changed

+58
-17
lines changed

6 files changed

+58
-17
lines changed
 

‎.github/workflows/check-and-ebook.yml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,20 @@ jobs:
4343
path: hash-chapters.txt
4444
key: chapter-hash-for-ebook-${{ github.ref_name }}-${{ steps.calculate-hash.outputs.hash }}
4545

46+
- name: Preparations
47+
run: ln -s python-requirements.txt requirements.txt
48+
4649
- name: Python set up
4750
uses: actions/setup-python@v5
4851
with:
4952
python-version: "3.12"
50-
# no packages besides pytest installed, so no caching needed
51-
# cache: "pip"
53+
cache: "pip"
54+
55+
- name: Python packages
56+
run: pip install -r python-requirements.txt
5257

5358
- name: Run pytest unittests
54-
run: |
55-
pip install pytest
56-
pytest
59+
run: pytest
5760

5861
- name: Check chapters for known issues
5962
run: python3 -O scripts/check_chapters.py
@@ -78,12 +81,17 @@ jobs:
7881
persist-credentials: false
7982
fetch-depth: 1 # 0 if you want to push to repo
8083

84+
- name: Preparations
85+
run: ln -s python-requirements.txt requirements.txt
86+
8187
- name: Python set up
8288
uses: actions/setup-python@v5
8389
with:
8490
python-version: "3.12"
85-
# no packages installed, so no caching
86-
# cache: "pip"
91+
cache: "pip"
92+
93+
- name: Install Python packages
94+
run: pip install -r python-requirements.txt
8795

8896
# - name: setup environment to DE lang
8997
# run: |

‎Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ ENV TZ=Europe/Berlin
99
# prevent keyboard input requests in apt install
1010
ENV DEBIAN_FRONTEND=noninteractive
1111

12+
# install packages and cleanup afterwards
1213
RUN apt-get update && apt-get dist-upgrade -y && \
13-
apt-get install -y python3 git texlive-xetex texlive-lang-greek texlive-lang-german latexmk texlive-extra-utils pandoc calibre imagemagick ghostscript && \
14+
apt-get install -y python3 python3-lxml git texlive-xetex texlive-lang-greek texlive-lang-german latexmk texlive-extra-utils pandoc calibre imagemagick ghostscript && \
1415
apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/{apt,dpkg,cache,log}/
1516

1617
# set working directory

‎python-requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lxml
2+
pytest

‎scripts/ebook/step_6.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import sys
1111
from pathlib import Path
1212

13+
from lxml import etree # pip install lxml
14+
1315
sys.path.append(str(Path(__file__).resolve().parent.parent))
1416
from check_chapters_settings import settings
1517

@@ -19,6 +21,17 @@
1921
target_file = Path("hpmor.html")
2022

2123

24+
def check_html(cont: str) -> None:
25+
"""Check html syntax."""
26+
parser = etree.XMLParser(recover=False) # Do not auto-fix errors
27+
try:
28+
etree.fromstring(cont, parser) # noqa: S320
29+
except etree.XMLSyntaxError as e:
30+
print("HTML Error:", e)
31+
sys.exit(1)
32+
# raise
33+
34+
2235
def fix_ellipsis(s: str) -> str:
2336
"""
2437
Fix ellipsis spacing for ebooks.
@@ -41,6 +54,8 @@ def fix_ellipsis(s: str) -> str:
4154
s = re.sub(r"…(?=<em>)", "… ", s)
4255
# before opening EN-quotes: add space
4356
# s = re.sub(r"…(?=[“])", "… ", s)
57+
# NO: before opening DE-quotes: add space
58+
# s = re.sub(r"…(?=[„])", "… ", s)
4459
return s
4560

4661

@@ -49,6 +64,8 @@ def fix_ellipsis(s: str) -> str:
4964

5065
with source_file.open(encoding="utf-8", newline="\n") as fh_in:
5166
cont = fh_in.read()
67+
print("checking source html")
68+
check_html(cont)
5269

5370
# remove strange leftovers from tex -> html conversion
5471
cont = re.sub(
@@ -86,15 +103,6 @@ def fix_ellipsis(s: str) -> str:
86103
# count=1,
87104
# )
88105

89-
# remove training slashes to satisfy https://validator.w3.org
90-
cont = cont.replace("<br />", "<br>")
91-
cont = cont.replace("<hr />", "<hr>")
92-
cont = re.sub(
93-
r"(<meta [^>]*) />",
94-
r"\1>",
95-
cont,
96-
)
97-
98106
# fix spaces around ellipsis
99107
cont = fix_ellipsis(cont)
100108

@@ -154,5 +162,17 @@ def fix_ellipsis(s: str) -> str:
154162
css = fh_in.read()
155163
cont = cont.replace("</style>\n", css + "\n</style>\n")
156164

165+
print("checking target html")
166+
check_html(cont)
167+
168+
# remove training slashes to satisfy https://validator.w3.org
169+
cont = cont.replace("<br />", "<br>")
170+
cont = cont.replace("<hr />", "<hr>")
171+
cont = re.sub(
172+
r"(<meta [^>]*) />",
173+
r"\1>",
174+
cont,
175+
)
176+
157177
with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
158178
fh_out.write(cont)

‎scripts/install_requirements_ebook.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#!/bin/sh
22

3+
# ensure we are in the hpmor root dir
4+
script_dir=$(cd $(dirname $0) && pwd)
5+
cd $script_dir/..
6+
37
sudo apt-get install texlive-extra-utils pandoc calibre imagemagick ghostscript
48
# pandoc calibre : for ebook converting
59
# texlive-extra-utils : for latexpand
610
# imagemagick ghostscript : for pdf title page to image conversion
11+
12+
pip install -r python-requirements.txt

‎scripts/install_requirements_pdf.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
#!/bin/sh
22

3+
# ensure we are in the hpmor root dir
4+
script_dir=$(cd $(dirname $0) && pwd)
5+
cd $script_dir/..
6+
37
sudo apt-get install texlive-xetex texlive-lang-greek texlive-lang-german latexmk

0 commit comments

Comments
 (0)
Please sign in to comment.