Move / to /target and /src to /

This commit is contained in:
Miraty 2023-09-04 23:31:16 +02:00
parent 2f00e1e46e
commit c7eaceb355
2 changed files with 98 additions and 93 deletions

View file

@ -1,44 +1,62 @@
# mkht.php
mkht.php is a PHP script for building Gemini, Markdown and HTML/CSS sites from source documents in Gemini, Markdown Extra, HTML, PHP, CSS and Less.
mkht.php is a PHP script for building HTML/CSS sites from source documents in PHP, Gemini, Pandoc Markdown, HTML and CSS.
For my personal use cases, this project include some specific tweaks that may not be fully or correctly documented.
## Usage
Place your pages tree in `/src/*/*.(gmi|md)`.
Place your pages tree in `/*.md`.
Optional files:
* `/config.ini`
* `/style.less`
* `/logo.png`
* `/head.inc.html`
* `/footer.inc.html`
`mkht.php [-f] [site_path] [destination]`
`mkht.php <site path> <destination>`
`-f` forces generation of every file, erasing already generated files.
`destination` is optionnal and can be:
* `onion` if you want links ending with .onion when available
If `site_path` is not set, it will default to current directory.
`destination` is optional and can be:
* `onion` if you want links ending with .onion when available (function `clearnetOrOnion`)
## Input
Pages in `/src` can use Gemini (if using `gmi` extension), Markdown, HTML and PHP.
Source pages must end in `.md` and can use Markdown, HTML and PHP.
The following files have special meaning
`/config.ini`
: some default settings can be changed by this file
`/style.css`
: additional CSS
`/head.inc.html`
: added just before `</head>`
`/header.inc.php`
: added just after `<body>`
`/end.inc.html`
: added just before `</body>`
Files starting with a dot or not ending in `.gmi`, `.md` or `.html` are ignored.
Files containing `draft` in their name are ignored for Atom feeds.
Files containing `draft` in their name (separated from other characters by `.`) are ignored.
Security note: as PHP code is executed, input files need to be trusted
## Output
* `/*/*.gmi` (if using `.gmi` extension in /src)
* `/*/*.md`
* `/*/*.html`
* `/*/*.gz`
* `/target/*.gmi` (if using `.gmi` extension in /src)
* `/target/*.md`
* `/target/*.html`
* `/target/*.html.gz`
Note that format translation is only done in the following order:
Gemini > Markdown > HTML, which means that the last of these formats you will use will be the first that will be readable by hypertext browsers. (PHP is always executed first.)
## Data persistence
## Metadata persistence
IDs are attributed to titles according to their content, therefor modifying a title breaks links to page sections.
IDs are attributed to titles according to their content, therefore modifying a title breaks links to page sections.
### For atom feeds
@ -51,12 +69,6 @@ IDs are attributed to titles according to their content, therefor modifying a ti
* gzip
* pandoc
## Internal libraries used
| Name | Description | Repository |
| --------------- | ---------------------------- | ----------------------------------------- |
| less.php | Less compiler in PHP | https://github.com/wikimedia/less.php |
## License
[AGPLv3+](LICENSE)

129
mkht.php
View file

@ -44,67 +44,54 @@ if (!isset($config['id'])) {
file_put_contents(SITE . '/config.ini', 'id = "' . $config['id'] . '"' . LF, FILE_APPEND);
}
if ($config['announce-css'])
copy(ROOT . '/style.css', SITE . '/mkht-php.css');
// Determine whether links need to use Onion or DNS
function clearnetOrOnion($clearnet_url, $onion_url) {
return (DESTINATION === 'onion') ? $onion_url : $clearnet_url;
}
$files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE . '/src', RecursiveDirectoryIterator::SKIP_DOTS));
$feed = '';
foreach($files as $file) {
$info = new SplFileInfo($file->getPathName());
if ($info->getType() !== 'file' OR !in_array($info->getExtension(), ['gmi', 'md', 'html'], true) OR str_starts_with($info->getPathname(), '.'))
$nodes = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE, RecursiveDirectoryIterator::SKIP_DOTS));
foreach($nodes as $node) {
$node_info = new SplFileInfo($node->getPathName());
$src = $node_info->getPathname();
if (str_starts_with($src, SITE . '/target'))
continue;
$target = str_replace(SITE, SITE . '/target', $src);
$path_parts = pathinfo($target);
if (strstr($src, '/.') !== false) // Skip hidden nodes
continue;
if ($node_info->getType() !== 'file')
continue;
if (!file_exists($path_parts['dirname'])) // Create parent directory if needed
mkdir($path_parts['dirname'], 0755, true);
copy($src, $target);
if ($node_info->getExtension() !== 'md')
continue;
$files_dates[$info->getPathname()] = $info->getMTime();
}
asort($files_dates);
$files_dates[$src] = $node_info->getMTime();
ob_start();
?>
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title><?= $config['title'] ?? '' ?></title>
<id>urn:publicid:<?= $config['id'] ?></id>
<?php
foreach ($config['base-url'] as $url)
echo ' <link rel="self" type="application/atom+xml" href="' . $url . '/feed.atom"></link>' . LF;
?>
<updated><?= date('c', $files_dates[array_key_last($files_dates)]) ?></updated>
<author>
<name><?= $config['author'] ?? '' ?></name>
</author>
<?php
$feed = ob_get_clean();
foreach ($files_dates as $src_page => $last_mod) {
$content = file_get_contents($src_page);
preg_match('/^# ?(?<title>.*)$/Dm', $content, $matches);
$title = $matches['title'] ?? NULL;
$path_parts = pathinfo(str_replace('/src/', '/', $src_page));
if (in_array('draft', explode('.', $path_parts['basename']), true))
continue;
$base_filepath = $path_parts['dirname'] . '/' . $path_parts['filename'];
if (!file_exists($base_filepath . '.html') OR (filemtime($src_page) > filemtime($base_filepath . '.html')) OR $opt['force']) {
echo 'Compiling ' . $src_page . ' ' . date("Y-m-d H:i:s", $last_mod) . LF;
$content = file_get_contents($src);
// Create parent directory if needed
if (!file_exists($path_parts['dirname']))
mkdir($path_parts['dirname'], 0755, true);
preg_match('/^# (?<title>.*)$/Dm', $content, $matches);
$title = $matches['title'] ?? NULL;
if (!file_exists($base_filepath . '.html') OR (filemtime($src) > filemtime($base_filepath . '.html')) OR $opt['force']) {
echo 'Compiling ' . $src . ' ' . date("Y-m-d H:i:s", $node_info->getMTime()) . LF;
// Execute PHP code
ob_start();
eval('?>' . $content);
$content = ob_get_clean();
file_put_contents($base_filepath . '.' . $path_parts['extension'], $content);
// Convert Gemtext to Markdown
if ($path_parts['extension'] === 'gmi') {
if ($path_parts['extension'] === 'gmi' OR $path_parts['extension'] === 'md') {
$content = preg_replace_callback(
'/^=>\h*(?<addr>\S+)(:?\h+(?<title>\V+))?$/m',
function ($matches) {
@ -114,11 +101,10 @@ foreach ($files_dates as $src_page => $last_mod) {
},
$content,
);
file_put_contents($base_filepath . '.md', $content);
}
// Compile Markdown to HTML
$process = proc_open('pandoc --fail-if-warnings -f markdown_phpextra-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline -t html --wrap none', [
$process = proc_open('pandoc --fail-if-warnings --section-divs -f markdown-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline+multiline_tables+fenced_divs+bracketed_spans+markdown_attribute -t html --wrap none', [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
], $pipes);
@ -134,7 +120,7 @@ foreach ($files_dates as $src_page => $last_mod) {
// .md > .html for local links
$content = preg_replace('/ href="([^:"]+)\.md"/', ' href="$1.html"', $content);
$relative_root_path = str_repeat('../', substr_count(str_replace(SITE, '', $path_parts['dirname']), '/'));
$relative_root_path = str_repeat('../', substr_count(str_replace(SITE . '/target', '', $path_parts['dirname']), '/'));
ob_start();
@ -179,25 +165,13 @@ foreach ($files_dates as $src_page => $last_mod) {
if (file_exists(SITE . '/head.inc.html'))
echo file_get_contents(SITE . '/head.inc.html');
?>
?>
</head>
<body>
<?php
if ($config['header']) {
?>
<header>
<a href="./<?= $relative_root_path ?>">
<?php
if (file_exists(SITE . '/img/logo.webp'))
echo '<img src="img/logo.webp" ' . getimagesize(SITE . '/img/logo.webp')[3] . ' alt="' . $config['title'] . '" />';
else
echo $config['site-title'];
?>
</a>
</header>
<?php
}
<?php
if (file_exists(SITE . '/header.inc.php'))
eval('?>' . file_get_contents(SITE . '/header.inc.php'));
if ($config['center-index'] AND $path_parts['filename'] === 'index')
echo '<div class="centered">' . $content . '</div>';
@ -241,21 +215,40 @@ foreach ($files_dates as $src_page => $last_mod) {
return ' href="' . ($config['base-url'][0] ?? '') . substr($path_parts['dirname'], strlen(SITE)) . '/' . $matches['relative_url'] . '"';
}, $atom_entry_content);
if (!in_array('draft', explode('.', $path_parts['basename']), true)) {
ob_start();
ob_start();
?>
<entry>
<title><?= $title ?></title>
<id><?= $public_id ?></id>
<updated><?= date('c', $last_mod) ?></updated>
<updated><?= date('c', $node_info->getMTime()) ?></updated>
<?php
foreach ($config['base-url'] as $base_url)
echo ' <link rel="alternate" type="text/html" href="' . $base_url . $relative_addr . '"></link>' . LF;
foreach ($config['base-url'] as $base_url)
echo ' <link rel="alternate" type="text/html" href="' . $base_url . $relative_addr . '"></link>' . LF;
?>
<content type="html"><?= htmlspecialchars($atom_entry_content) ?></content>
</entry>
<?php
$feed .= ob_get_clean();
}
$feed .= ob_get_clean();
}
file_put_contents(SITE . '/feed.atom', $feed . '</feed>' . LF);
asort($files_dates);
ob_start();
?>
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title><?= $config['title'] ?? '' ?></title>
<id>urn:publicid:<?= $config['id'] ?></id>
<?php
foreach ($config['base-url'] as $url)
echo ' <link rel="self" type="application/atom+xml" href="' . $url . '/feed.atom"></link>' . LF;
?>
<updated><?= date('c', $files_dates[array_key_last($files_dates)]) ?></updated>
<author>
<name><?= $config['author'] ?? '' ?></name>
</author>
<?php
file_put_contents(SITE . '/target/feed.atom', ob_get_clean() . $feed . '</feed>' . LF);
if ($config['announce-css'])
copy(ROOT . '/style.css', SITE . '/target/mkht-php.css');