diff --git a/README.md b/README.md index c23abff..d23196e 100755 --- a/README.md +++ b/README.md @@ -1,44 +1,62 @@ # mkht.php -mkht.php is a PHP script for building Gemini, Markdown and HTML/CSS sites from source documents in Gemini, Markdown Extra, HTML, PHP, CSS and Less. +mkht.php is a PHP script for building HTML/CSS sites from source documents in PHP, Gemini, Pandoc Markdown, HTML and CSS. + +For my personal use cases, this project include some specific tweaks that may not be fully or correctly documented. ## Usage -Place your pages tree in `/src/*/*.(gmi|md)`. +Place your pages tree in `/*.md`. -Optional files: -* `/config.ini` -* `/style.less` -* `/logo.png` -* `/head.inc.html` -* `/footer.inc.html` +`mkht.php [-f] [site_path] [destination]` -`mkht.php ` +`-f` forces generation of every file, erasing already generated files. -`destination` is optionnal and can be: -* `onion` if you want links ending with .onion when available +If `site_path` is not set, it will default to current directory. + +`destination` is optional and can be: +* `onion` if you want links ending with .onion when available (function `clearnetOrOnion`) ## Input -Pages in `/src` can use Gemini (if using `gmi` extension), Markdown, HTML and PHP. +Source pages must end in `.md` and can use Markdown, HTML and PHP. + +The following files have special meaning + +`/config.ini` +: some default settings can be changed by this file + +`/style.css` +: additional CSS + +`/head.inc.html` +: added just before `` + +`/header.inc.php` +: added just after `` + +`/end.inc.html` +: added just before `` Files starting with a dot or not ending in `.gmi`, `.md` or `.html` are ignored. -Files containing `draft` in their name are ignored for Atom feeds. +Files containing `draft` in their name (separated from other characters by `.`) are ignored. + +Security note: as PHP code is executed, input files need to be trusted ## Output -* `/*/*.gmi` (if using `.gmi` extension in /src) -* `/*/*.md` -* `/*/*.html` -* `/*/*.gz` +* `/target/*.gmi` (if using `.gmi` extension in /src) +* `/target/*.md` +* `/target/*.html` +* `/target/*.html.gz` Note that format translation is only done in the following order: Gemini > Markdown > HTML, which means that the last of these formats you will use will be the first that will be readable by hypertext browsers. (PHP is always executed first.) -## Data persistence +## Metadata persistence -IDs are attributed to titles according to their content, therefor modifying a title breaks links to page sections. +IDs are attributed to titles according to their content, therefore modifying a title breaks links to page sections. ### For atom feeds @@ -51,12 +69,6 @@ IDs are attributed to titles according to their content, therefor modifying a ti * gzip * pandoc -## Internal libraries used - -| Name | Description | Repository | -| --------------- | ---------------------------- | ----------------------------------------- | -| less.php | Less compiler in PHP | https://github.com/wikimedia/less.php | - ## License [AGPLv3+](LICENSE) diff --git a/mkht.php b/mkht.php index 48c2cd7..b218913 100755 --- a/mkht.php +++ b/mkht.php @@ -44,67 +44,54 @@ if (!isset($config['id'])) { file_put_contents(SITE . '/config.ini', 'id = "' . $config['id'] . '"' . LF, FILE_APPEND); } -if ($config['announce-css']) - copy(ROOT . '/style.css', SITE . '/mkht-php.css'); - // Determine whether links need to use Onion or DNS function clearnetOrOnion($clearnet_url, $onion_url) { return (DESTINATION === 'onion') ? $onion_url : $clearnet_url; } -$files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE . '/src', RecursiveDirectoryIterator::SKIP_DOTS)); +$feed = ''; -foreach($files as $file) { - $info = new SplFileInfo($file->getPathName()); - if ($info->getType() !== 'file' OR !in_array($info->getExtension(), ['gmi', 'md', 'html'], true) OR str_starts_with($info->getPathname(), '.')) +$nodes = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE, RecursiveDirectoryIterator::SKIP_DOTS)); + +foreach($nodes as $node) { + $node_info = new SplFileInfo($node->getPathName()); + $src = $node_info->getPathname(); + if (str_starts_with($src, SITE . '/target')) + continue; + $target = str_replace(SITE, SITE . '/target', $src); + $path_parts = pathinfo($target); + if (strstr($src, '/.') !== false) // Skip hidden nodes + continue; + if ($node_info->getType() !== 'file') + continue; + if (!file_exists($path_parts['dirname'])) // Create parent directory if needed + mkdir($path_parts['dirname'], 0755, true); + copy($src, $target); + if ($node_info->getExtension() !== 'md') continue; - $files_dates[$info->getPathname()] = $info->getMTime(); -} -asort($files_dates); + $files_dates[$src] = $node_info->getMTime(); -ob_start(); -?> - - - <?= $config['title'] ?? '' ?> - urn:publicid: -' . LF; -?> - - - - - $last_mod) { - $content = file_get_contents($src_page); - - preg_match('/^# ?(?.*)$/Dm', $content, $matches); - $title = $matches['title'] ?? NULL; - - $path_parts = pathinfo(str_replace('/src/', '/', $src_page)); + if (in_array('draft', explode('.', $path_parts['basename']), true)) + continue; $base_filepath = $path_parts['dirname'] . '/' . $path_parts['filename']; - if (!file_exists($base_filepath . '.html') OR (filemtime($src_page) > filemtime($base_filepath . '.html')) OR $opt['force']) { - echo 'Compiling ' . $src_page . ' ' . date("Y-m-d H:i:s", $last_mod) . LF; + $content = file_get_contents($src); - // Create parent directory if needed - if (!file_exists($path_parts['dirname'])) - mkdir($path_parts['dirname'], 0755, true); + preg_match('/^# (?<title>.*)$/Dm', $content, $matches); + $title = $matches['title'] ?? NULL; + + if (!file_exists($base_filepath . '.html') OR (filemtime($src) > filemtime($base_filepath . '.html')) OR $opt['force']) { + echo 'Compiling ' . $src . ' ' . date("Y-m-d H:i:s", $node_info->getMTime()) . LF; // Execute PHP code ob_start(); eval('?>' . $content); $content = ob_get_clean(); - file_put_contents($base_filepath . '.' . $path_parts['extension'], $content); // Convert Gemtext to Markdown - if ($path_parts['extension'] === 'gmi') { + if ($path_parts['extension'] === 'gmi' OR $path_parts['extension'] === 'md') { $content = preg_replace_callback( '/^=>\h*(?<addr>\S+)(:?\h+(?<title>\V+))?$/m', function ($matches) { @@ -114,11 +101,10 @@ foreach ($files_dates as $src_page => $last_mod) { }, $content, ); - file_put_contents($base_filepath . '.md', $content); } // Compile Markdown to HTML - $process = proc_open('pandoc --fail-if-warnings -f markdown_phpextra-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline -t html --wrap none', [ + $process = proc_open('pandoc --fail-if-warnings --section-divs -f markdown-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline+multiline_tables+fenced_divs+bracketed_spans+markdown_attribute -t html --wrap none', [ 0 => ['pipe', 'r'], 1 => ['pipe', 'w'], ], $pipes); @@ -134,7 +120,7 @@ foreach ($files_dates as $src_page => $last_mod) { // .md > .html for local links $content = preg_replace('/ href="([^:"]+)\.md"/', ' href="$1.html"', $content); - $relative_root_path = str_repeat('../', substr_count(str_replace(SITE, '', $path_parts['dirname']), '/')); + $relative_root_path = str_repeat('../', substr_count(str_replace(SITE . '/target', '', $path_parts['dirname']), '/')); ob_start(); @@ -179,25 +165,13 @@ foreach ($files_dates as $src_page => $last_mod) { if (file_exists(SITE . '/head.inc.html')) echo file_get_contents(SITE . '/head.inc.html'); - ?> +?> </head> <body> - <?php - if ($config['header']) { - ?> - <header> - <a href="./<?= $relative_root_path ?>"> - <?php - if (file_exists(SITE . '/img/logo.webp')) - echo '<img src="img/logo.webp" ' . getimagesize(SITE . '/img/logo.webp')[3] . ' alt="' . $config['title'] . '" />'; - else - echo $config['site-title']; - ?> - </a> - </header> - <?php - } +<?php + if (file_exists(SITE . '/header.inc.php')) + eval('?>' . file_get_contents(SITE . '/header.inc.php')); if ($config['center-index'] AND $path_parts['filename'] === 'index') echo '<div class="centered">' . $content . '</div>'; @@ -241,21 +215,40 @@ foreach ($files_dates as $src_page => $last_mod) { return ' href="' . ($config['base-url'][0] ?? '') . substr($path_parts['dirname'], strlen(SITE)) . '/' . $matches['relative_url'] . '"'; }, $atom_entry_content); - if (!in_array('draft', explode('.', $path_parts['basename']), true)) { - ob_start(); + ob_start(); ?> <entry> <title><?= $title ?> - + getMTime()) ?> ' . LF; + foreach ($config['base-url'] as $base_url) + echo ' ' . LF; ?> ' . LF); + +asort($files_dates); + +ob_start(); +?> + + + <?= $config['title'] ?? '' ?> + urn:publicid: +' . LF; +?> + + + + +' . LF); + +if ($config['announce-css']) + copy(ROOT . '/style.css', SITE . '/target/mkht-php.css');