#!/usr/bin/env python3 from pathlib import Path import re import sys import shutil PAGE_NAME = 'offline-docs' STATIC_PATH = 'chrome://browser/content/manual/static' if len(sys.argv) < 3: print(f'Usage: {sys.argv[0]} lektor-out-directory target-directory') sys.exit(1) source = Path(sys.argv[1]) target = Path(sys.argv[2]) if not target.exists(): target.mkdir(exist_ok=True) static_re = re.compile('"(?:../)*static/([^"]+)"') link_re = re.compile('href="../([^"]+)"') def clean_urls(match): m = re.match(r'(?:../)?([^/#]+)[/]?[#]?(.*)', match.group(1)) slug = m.group(1) if m.group(2): anchor = '_' + m.group(2) else: anchor = '' return f'href="#{slug}{anchor}"' remove_images = [] for p in (source / 'static/images').glob('**/*'): if p.is_file(): rel = p.relative_to(source) remove_images.append(rel) htmls = source.rglob(f'{PAGE_NAME}/index.html') for page in htmls: with page.open(encoding='utf8') as f: contents = f.read() remove_images = list(filter( lambda im: contents.find(str(im)) == -1, remove_images)) contents = static_re.sub(f'"{STATIC_PATH}/\\1"', contents) contents = link_re.sub(clean_urls, contents) rel = page.relative_to(source) dest_name = str(list(rel.parents)[-2].name) if dest_name == PAGE_NAME: dest_name = 'en' dest_name += '.html' with (target / dest_name).open('w', encoding='utf8') as f: f.write(contents) def ignore_images(path, names): return [ n for n in names if Path(path, n).relative_to(source) in remove_images ] shutil.rmtree(target / 'static', ignore_errors=True) shutil.copytree(source / 'static', target / 'static', ignore=ignore_images)