Move / to /target and /src to /

2023-09-04 23:31:16 +02:00 · 2023-09-04 23:31:16 +02:00 · c7eaceb355
commit c7eaceb355
parent 2f00e1e46e
2 changed files with 98 additions and 93 deletions
--- a/README.md
+++ b/README.md
@ -1,44 +1,62 @@
 # mkht.php

-mkht.php is a PHP script for building Gemini, Markdown and HTML/CSS sites from source documents in Gemini, Markdown Extra, HTML, PHP, CSS and Less.
+mkht.php is a PHP script for building HTML/CSS sites from source documents in PHP, Gemini, Pandoc Markdown, HTML and CSS.
+
+For my personal use cases, this project include some specific tweaks that may not be fully or correctly documented.

 ## Usage

-Place your pages tree in `/src/*/*.(gmi|md)`.
+Place your pages tree in `/*.md`.

-Optional files:
-* `/config.ini`
-* `/style.less`
-* `/logo.png`
-* `/head.inc.html`
-* `/footer.inc.html`
+`mkht.php [-f] [site_path] [destination]`

-`mkht.php <site path> <destination>`
+`-f` forces generation of every file, erasing already generated files.

-`destination` is optionnal and can be:
-* `onion` if you want links ending with .onion when available
+If `site_path` is not set, it will default to current directory.
+
+`destination` is optional and can be:
+* `onion` if you want links ending with .onion when available (function `clearnetOrOnion`)

 ## Input

-Pages in `/src` can use Gemini (if using `gmi` extension), Markdown, HTML and PHP.
+Source pages must end in `.md` and can use Markdown, HTML and PHP.
+
+The following files have special meaning
+
+`/config.ini`
+: some default settings can be changed by this file
+
+`/style.css`
+: additional CSS
+
+`/head.inc.html`
+: added just before `</head>`
+
+`/header.inc.php`
+: added just after `<body>`
+
+`/end.inc.html`
+: added just before `</body>`

 Files starting with a dot or not ending in `.gmi`, `.md` or `.html` are ignored.

-Files containing `draft` in their name are ignored for Atom feeds.
+Files containing `draft` in their name (separated from other characters by `.`) are ignored.
+
+Security note: as PHP code is executed, input files need to be trusted

 ## Output

-* `/*/*.gmi` (if using `.gmi` extension in /src)
-* `/*/*.md`
-* `/*/*.html`
-* `/*/*.gz`
+* `/target/*.gmi` (if using `.gmi` extension in /src)
+* `/target/*.md`
+* `/target/*.html`
+* `/target/*.html.gz`

 Note that format translation is only done in the following order:
 Gemini > Markdown > HTML, which means that the last of these formats you will use will be the first that will be readable by hypertext browsers. (PHP is always executed first.)

-## Data persistence
+## Metadata persistence

-IDs are attributed to titles according to their content, therefor modifying a title breaks links to page sections.
+IDs are attributed to titles according to their content, therefore modifying a title breaks links to page sections.

 ### For atom feeds

@ -51,12 +69,6 @@ IDs are attributed to titles according to their content, therefor modifying a ti
 * gzip
 * pandoc

-## Internal libraries used
-
-| Name            | Description                  | Repository                                |
-| --------------- | ---------------------------- | ----------------------------------------- |
-| less.php        | Less compiler in PHP         | https://github.com/wikimedia/less.php     |
-
 ## License

 [AGPLv3+](LICENSE)
--- a/mkht.php
+++ b/mkht.php
@ -44,67 +44,54 @@ if (!isset($config['id'])) {
 	file_put_contents(SITE . '/config.ini', 'id = "' . $config['id'] . '"' . LF, FILE_APPEND);
 }

-if ($config['announce-css'])
-	copy(ROOT . '/style.css', SITE . '/mkht-php.css');
-
 // Determine whether links need to use Onion or DNS
 function clearnetOrOnion($clearnet_url, $onion_url) {
 	return (DESTINATION === 'onion') ? $onion_url : $clearnet_url;
 }

-$files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE . '/src', RecursiveDirectoryIterator::SKIP_DOTS));
+$feed = '';

-foreach($files as $file) {
-	$info = new SplFileInfo($file->getPathName());
-	if ($info->getType() !== 'file' OR !in_array($info->getExtension(), ['gmi', 'md', 'html'], true) OR str_starts_with($info->getPathname(), '.'))
+$nodes = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE, RecursiveDirectoryIterator::SKIP_DOTS));
+
+foreach($nodes as $node) {
+	$node_info = new SplFileInfo($node->getPathName());
+	$src = $node_info->getPathname();
+	if (str_starts_with($src, SITE . '/target'))
+		continue;
+	$target = str_replace(SITE, SITE . '/target', $src);
+	$path_parts = pathinfo($target);
+	if (strstr($src, '/.') !== false) // Skip hidden nodes
+		continue;
+	if ($node_info->getType() !== 'file')
+		continue;
+	if (!file_exists($path_parts['dirname'])) // Create parent directory if needed
+		mkdir($path_parts['dirname'], 0755, true);
+	copy($src, $target);
+	if ($node_info->getExtension() !== 'md')
 		continue;
-	$files_dates[$info->getPathname()] = $info->getMTime();
-}

-asort($files_dates);
+	$files_dates[$src] = $node_info->getMTime();

-ob_start();
-?>
-<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-	<title><?= $config['title'] ?? '' ?></title>
-	<id>urn:publicid:<?= $config['id'] ?></id>
-<?php
-foreach ($config['base-url'] as $url)
-	echo '	<link rel="self" type="application/atom+xml" href="' . $url . '/feed.atom"></link>' . LF;
-?>
-	<updated><?= date('c', $files_dates[array_key_last($files_dates)]) ?></updated>
-	<author>
-		<name><?= $config['author'] ?? '' ?></name>
-	</author>
-<?php
-$feed = ob_get_clean();
-
-foreach ($files_dates as $src_page => $last_mod) {
-	$content = file_get_contents($src_page);
-
-	preg_match('/^# ?(?<title>.*)$/Dm', $content, $matches);
-	$title = $matches['title'] ?? NULL;
-
-	$path_parts = pathinfo(str_replace('/src/', '/', $src_page));
+	if (in_array('draft', explode('.', $path_parts['basename']), true))
+		continue;

 	$base_filepath = $path_parts['dirname'] . '/' . $path_parts['filename'];

-	if (!file_exists($base_filepath . '.html') OR (filemtime($src_page) > filemtime($base_filepath . '.html')) OR $opt['force']) {
-		echo 'Compiling ' . $src_page . ' ' . date("Y-m-d H:i:s", $last_mod) . LF;
+	$content = file_get_contents($src);

-		// Create parent directory if needed
-		if (!file_exists($path_parts['dirname']))
-			mkdir($path_parts['dirname'], 0755, true);
+	preg_match('/^# (?<title>.*)$/Dm', $content, $matches);
+	$title = $matches['title'] ?? NULL;
+
+	if (!file_exists($base_filepath . '.html') OR (filemtime($src) > filemtime($base_filepath . '.html')) OR $opt['force']) {
+		echo 'Compiling ' . $src . ' ' . date("Y-m-d H:i:s", $node_info->getMTime()) . LF;

 		// Execute PHP code
 		ob_start();
 		eval('?>' . $content);
 		$content = ob_get_clean();
-		file_put_contents($base_filepath . '.' . $path_parts['extension'], $content);

 		// Convert Gemtext to Markdown
-		if ($path_parts['extension'] === 'gmi') {
+		if ($path_parts['extension'] === 'gmi' OR $path_parts['extension'] === 'md') {
 			$content = preg_replace_callback(
 				'/^=>\h*(?<addr>\S+)(:?\h+(?<title>\V+))?$/m',
 				function ($matches) {
@ -114,11 +101,10 @@ foreach ($files_dates as $src_page => $last_mod) {
 				},
 				$content,
 			);
-			file_put_contents($base_filepath . '.md', $content);
 		}

 		// Compile Markdown to HTML
-		$process = proc_open('pandoc --fail-if-warnings -f markdown_phpextra-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline -t html --wrap none', [
+		$process = proc_open('pandoc --fail-if-warnings --section-divs -f markdown-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline+multiline_tables+fenced_divs+bracketed_spans+markdown_attribute -t html --wrap none', [
 			0 => ['pipe', 'r'],
 			1 => ['pipe', 'w'],
 		], $pipes);
@ -134,7 +120,7 @@ foreach ($files_dates as $src_page => $last_mod) {
 		// .md > .html for local links
 		$content = preg_replace('/ href="([^:"]+)\.md"/', ' href="$1.html"', $content);

-		$relative_root_path = str_repeat('../', substr_count(str_replace(SITE, '', $path_parts['dirname']), '/'));
+		$relative_root_path = str_repeat('../', substr_count(str_replace(SITE . '/target', '', $path_parts['dirname']), '/'));

 		ob_start();

@ -179,25 +165,13 @@ foreach ($files_dates as $src_page => $last_mod) {

 		if (file_exists(SITE . '/head.inc.html'))
 			echo file_get_contents(SITE . '/head.inc.html');
-	?>
+?>
 			</head>

 			<body>
-	<?php
-		if ($config['header']) {
-	?>
-				<header>
-					<a href="./<?= $relative_root_path ?>">
-	<?php
-			if (file_exists(SITE . '/img/logo.webp'))
-				echo '<img src="img/logo.webp" ' . getimagesize(SITE . '/img/logo.webp')[3] . ' alt="' . $config['title'] . '" />';
-			else
-				echo $config['site-title'];
-	?>
-					</a>
-				</header>
-	<?php
-		}
+<?php
+		if (file_exists(SITE . '/header.inc.php'))
+			eval('?>' . file_get_contents(SITE . '/header.inc.php'));

 		if ($config['center-index'] AND $path_parts['filename'] === 'index')
 			echo '<div class="centered">' . $content . '</div>';
@ -241,21 +215,40 @@ foreach ($files_dates as $src_page => $last_mod) {
 		return ' href="' . ($config['base-url'][0] ?? '') . substr($path_parts['dirname'], strlen(SITE)) . '/' . $matches['relative_url'] . '"';
 	}, $atom_entry_content);

-	if (!in_array('draft', explode('.', $path_parts['basename']), true)) {
-		ob_start();
+	ob_start();
 ?>
 	<entry>
 		<title><?= $title ?></title>
 		<id><?= $public_id ?></id>
-		<updated><?= date('c', $last_mod) ?></updated>
+		<updated><?= date('c', $node_info->getMTime()) ?></updated>
 <?php
-		foreach ($config['base-url'] as $base_url)
-			echo '		<link rel="alternate" type="text/html" href="' . $base_url . $relative_addr . '"></link>' . LF;
+	foreach ($config['base-url'] as $base_url)
+		echo '		<link rel="alternate" type="text/html" href="' . $base_url . $relative_addr . '"></link>' . LF;
 ?>
 		<content type="html"><?= htmlspecialchars($atom_entry_content) ?></content>
 	</entry>
 <?php
-		$feed .= ob_get_clean();
-	}
+	$feed .= ob_get_clean();
 }
-file_put_contents(SITE . '/feed.atom', $feed . '</feed>' . LF);
+
+asort($files_dates);
+
+ob_start();
+?>
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+	<title><?= $config['title'] ?? '' ?></title>
+	<id>urn:publicid:<?= $config['id'] ?></id>
+<?php
+foreach ($config['base-url'] as $url)
+	echo '	<link rel="self" type="application/atom+xml" href="' . $url . '/feed.atom"></link>' . LF;
+?>
+	<updated><?= date('c', $files_dates[array_key_last($files_dates)]) ?></updated>
+	<author>
+		<name><?= $config['author'] ?? '' ?></name>
+	</author>
+<?php
+file_put_contents(SITE . '/target/feed.atom', ob_get_clean() . $feed . '</feed>' . LF);
+
+if ($config['announce-css'])
+	copy(ROOT . '/style.css', SITE . '/target/mkht-php.css');