From a674ec5837f4c78206d4b0eb6521b8507732694b Mon Sep 17 00:00:00 2001
From: Martijn Dekker
Date: Wed, 9 Mar 2016 05:26:20 +0100
Subject: [PATCH 1/4] rebuild_tags(): use array for more robust file handling
---
bb.sh | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/bb.sh b/bb.sh
index 0956a53..82a9bf2 100755
--- a/bb.sh
+++ b/bb.sh
@@ -729,8 +729,8 @@ tags_in_post() {
}
# Finds all posts referenced in a number of tags.
-# Arguments are tags
-# Prints one line with space-separated tags to stdout
+# Arguments are tags.
+# Prints file names to stdout, one per line.
posts_with_tags() {
(($# < 1)) && return
set -- "${@/#/$prefix_tags}"
@@ -748,31 +748,35 @@ posts_with_tags() {
# rebuild_tags "one_post.html another_article.html" "example-tag another-tag"
# mind the quotes!
rebuild_tags() {
- if (($# < 2)); then
+ local IFS=$'\n' # word splitting only on newline; make $* expand with newline as separator
+ if (($# < 1)); then
# will process all files and tags
- files=$(ls -t ./*.html)
+ files=( $(ls -t ./*.html) )
all_tags=yes
else
# will process only given files and tags
- files=$(printf '%s\n' $1 | sort -u)
- files=$(ls -t $files)
- tags=$2
+ for ((i=1; i<=$#; i++)); do
+ [[ ${!i} == --tags ]] && break
+ done
+ files=( $(ls -t $(sort -u <<< "${*:1:$((i-1))}")) )
+ tags=( "${@:$((i+1)):$#}" )
+ all_tags=''
fi
echo -n "Rebuilding tag pages "
n=0
if [[ -n $all_tags ]]; then
- rm ./"$prefix_tags"*.html &> /dev/null
+ rm -f ./"$prefix_tags"*.html
else
- for i in $tags; do
- rm "./$prefix_tags$i.html" &> /dev/null
+ for i in "${tags[@]}"; do
+ rm -f "./$prefix_tags$i.html"
done
fi
# First we will process all files and create temporal tag files
# with just the content of the posts
tmpfile=tmp.$RANDOM
while [[ -f $tmpfile ]]; do tmpfile=tmp.$RANDOM; done
- while IFS='' read -r i; do
- is_boilerplate_file "$i" && continue;
+ for i in "${files[@]}"; do
+ is_boilerplate_file "$i" && continue
echo -n "."
if [[ -n $cut_do ]]; then
get_html_file_content 'entry' 'entry' 'cut' <"$i" | awk "/$cut_line/ { print \"$template_read_more
\" ; next } 1"
@@ -780,11 +784,12 @@ rebuild_tags() {
get_html_file_content 'entry' 'entry' <"$i"
fi >"$tmpfile"
for tag in $(tags_in_post "$i"); do
- if [[ -n $all_tags || " $tags " == *" $tag "* ]]; then
+ # if either all tags or array tags[] contains $tag...
+ if [[ -n $all_tags || $'\n'"${tags[*]}"$'\n' == *$'\n'"$tag"$'\n'* ]]; then
cat "$tmpfile" >> "$prefix_tags$tag".tmp.html
fi
done
- done <<< "$files"
+ done
rm "$tmpfile"
# Now generate the tag files with headers, footers, etc
while IFS='' read -r i; do
From 54cc0c80600035b3ac09f2a50a60f1cfb33b3942 Mon Sep 17 00:00:00 2001
From: Martijn Dekker
Date: Tue, 6 Sep 2016 03:24:01 +0200
Subject: [PATCH 2/4] More code refactoring. Limit word splitting and disable
globbing by default.
- Globally, now do word splitting (IFS) only on newline (which also makes
"$*" expand with newline separator instead of space).
- Disable globbing (pathmame expansion), to be re-enabled locally using
'set +f' where needed (typically in a subshell).
These changes help eliminate unexpected snags and security vulnerabilities
in case someone forgets to quote a variable somewhere. They should also make
the code "just work" with spaces and other special characters in file names
and tags (as long as they're not newline characters, but that can't happen
with regular use of the script as the newline is the separator). This means
that, as of this change, editing or completely emptying the convert_filename
filter should no longer pose any problems as far as bb.sh is concerned.
The changes to adapt the code to the above are mainly:
- Now that we do word splitting on newline only, we can go back to iterating
through files in a "for" loop instead of using "read" with a here-document,
which is more readable. However, to enable globbing locally, a technique
adaptation is needed, like:
for file in $(set +f; printf '%s\n' *.html)
or
for file in $(set +f; ls -t -- *.html) # sort by date, newest first
Given IFS=$'\n' and globbing disabled globally, this technique is robust
for all special characters in file names except for newlines.
- invoke_editor() function replaces direct $EDITOR calls, because we need to
locally word-split $EDITOR on spaces in case it contains arguments.
- parse_file(): rewrite tag parsing to handle possible spaces in tags
- tags_in_post(): output line-separated instead of space-separated tags;
further adjust sed script to handle possible spaces in tags
- rebuild_tags(): this function was refactored to use an array internally.
Instead of two combined strings, it now takes HTML files and tags as
separate arguments, separated by a single "--tag" argument. This allows
for spaces and other special characters in both file names and tags. (See
also commit a674ec5, which started this but didn't finish it).
---
bb.sh | 128 +++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 81 insertions(+), 47 deletions(-)
diff --git a/bb.sh b/bb.sh
index 09136c3..144f9b9 100755
--- a/bb.sh
+++ b/bb.sh
@@ -5,6 +5,18 @@
# https://github.com/carlesfe/bashblog/contributors
# Check out README.md for more details
+# Some shell settings for robustness by default. These help eliminate
+# unexpected snags and security vulnerabilities in case someone forgets to
+# quote a variable somewhere. They do require a few coding adaptations.
+
+IFS=$'\n' # Globally, we do word splitting only on newline (which also
+ # makes "$*" expand with newline separator instead of space).
+
+set -f # Disable globbing (pathname expansion). It can be re-enabled
+ # locally using 'set +f'; it's handy to do this in a subshell,
+ # for example in $(command substitution), as the globbing will
+ # be local to the subshell.
+
# Global variables
# It is recommended to perform a 'rebuild' after changing any of this in the code
@@ -252,6 +264,14 @@ get_html_file_content() {
}"
}
+# Invoke the editor specified by the $EDITOR environment variable. Use a
+# function for this as we need to locally word-split $EDITOR on spaces
+# (in case it contains arguments, like EDITOR='joe -nobackups).
+invoke_editor() {
+ local IFS=$' \t\n'
+ $EDITOR "$1"
+}
+
# Edit an existing, published .html file while keeping its original timestamp
# Please note that this function does not automatically republish anything, as
# it is usually called from 'main'.
@@ -270,7 +290,7 @@ edit() {
touch_timestamp=$(LC_ALL=C date -r "${1%%.*}.html" +'%Y%m%d%H%M')
tags_before=$(tags_in_post "${1%%.*}.html")
if [[ $2 == full ]]; then
- $EDITOR "$1"
+ invoke_editor "$1"
filename=$1
else
if [[ ${1##*.} == md ]]; then
@@ -280,7 +300,7 @@ edit() {
exit
fi
# editing markdown file
- $EDITOR "$1"
+ invoke_editor "$1"
TMPFILE=$(markdown "$1")
filename=${1%%.*}.html
else
@@ -290,7 +310,7 @@ edit() {
get_post_title "$1" > "$TMPFILE"
# Post text with plaintext tags
get_html_file_content 'text' 'text' <"$1" | sed "/^$template_tags_line_header/s|\\1|\\1|g" >> "$TMPFILE"
- $EDITOR "$TMPFILE"
+ invoke_editor "$TMPFILE"
filename=$1
fi
rm "$filename"
@@ -306,10 +326,10 @@ edit() {
chmod 644 "$filename"
echo "Posted $filename"
tags_after=$(tags_in_post "$filename")
- relevant_tags=$(echo "$tags_before $tags_after" | tr ',' ' ' | tr ' ' '\n' | sort -u | tr '\n' ' ')
- if [[ ! -z $relevant_tags ]]; then
- relevant_posts="$(posts_with_tags $relevant_tags) $filename"
- rebuild_tags "$relevant_posts" "$relevant_tags"
+ relevant_tags=$(sort -u <<< "$tags_before"$'\n'"$tags_after")
+ if [[ -n $relevant_tags ]]; then
+ relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename
+ rebuild_tags $relevant_posts --tags $relevant_tags
fi
}
@@ -475,10 +495,11 @@ create_html_page() {
parse_file() {
# Read for the title and check that the filename is ok
title=""
- while IFS='' read -r line; do
+ while read -r line; do
if [[ -z $title ]]; then
# remove extra
and
added by markdown
- title=$(echo "$line" | sed 's/<\/*p>//g')
+ title=${line#}
+ title=${title%
}
if [[ -n $3 ]]; then
filename=$3
else
@@ -498,13 +519,14 @@ parse_file() {
content=$filename.tmp
# Parse possible tags
elif [[ $line == "$template_tags_line_header"* ]]; then
- tags=$(echo "$line" | cut -d ":" -f 2- | sed -e 's/<\/p>//g' -e 's/^ *//' -e 's/ *$//' -e 's/, /,/g')
- IFS=, read -r -a array <<< "$tags"
-
echo -n "
$template_tags_line_header " >> "$content"
- for item in "${array[@]}"; do
- echo -n "$item, "
- done | sed 's/, $/<\/p>/g' >> "$content"
+ sed "s%
%%g
+ s/^.*:[[:blank:]]*//
+ s/[[:blank:]]\$//
+ s/[[:blank:]]*,[[:blank:]]*/,/g
+ s%\([^,]*\),%\1, %g
+ s%, \([^,]*\)\$%, \1
%
+ " <<< "$line" >> "$content"
else
echo "$line" >> "$content"
fi
@@ -565,7 +587,7 @@ EOF
filename=""
while [[ $post_status != "p" && $post_status != "P" ]]; do
[[ -n $filename ]] && rm "$filename" # Delete the generated html file, if any
- $EDITOR "$TMPFILE"
+ invoke_editor "$TMPFILE"
if [[ $fmt == md ]]; then
html_from_md=$(markdown "$TMPFILE")
parse_file "$html_from_md"
@@ -607,8 +629,8 @@ EOF
echo "Posted $filename"
relevant_tags=$(tags_in_post $filename)
if [[ -n $relevant_tags ]]; then
- relevant_posts="$(posts_with_tags $relevant_tags) $filename"
- rebuild_tags "$relevant_posts" "$relevant_tags"
+ relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename
+ rebuild_tags $relevant_posts --tags $relevant_tags
fi
}
@@ -623,7 +645,7 @@ all_posts() {
{
echo "$template_archive_title
"
prev_month=""
- while IFS='' read -r i; do
+ for i in $(set +f; ls -t ./*.html); do
is_boilerplate_file "$i" && continue
echo -n "." 1>&3
# Month headers
@@ -640,7 +662,7 @@ all_posts() {
# Date
date=$(LC_ALL=$date_locale date -r "$i" +"$date_format")
echo " $date"
- done < <(ls -t ./*.html)
+ done
echo "" 1>&3
echo ""
echo ""
@@ -663,7 +685,7 @@ all_tags() {
{
echo "$template_tags_title
"
echo ""
- for i in $prefix_tags*.html; do
+ for i in $(set +f; printf '%s\n' $prefix_tags*.html); do
[[ -f "$i" ]] || break
echo -n "." 1>&3
nposts=$(grep -c "<\!-- text begin -->" "$i")
@@ -696,7 +718,8 @@ rebuild_index() {
# Create the content file
{
n=0
- while IFS='' read -r i; do
+ for i in $(set +f; ls -t ./*.html) # sort by date, newest first
+ do
is_boilerplate_file "$i" && continue;
if ((n >= number_of_index_articles)); then break; fi
if [[ -n $cut_do ]]; then
@@ -706,7 +729,7 @@ rebuild_index() {
fi
echo -n "." 1>&3
n=$(( n + 1 ))
- done < <(ls -t ./*.html) # sort by date, newest first
+ done
feed=$blog_feed
if [[ -n $global_feedburner ]]; then feed=$global_feedburner; fi
@@ -723,9 +746,18 @@ rebuild_index() {
# Finds all tags referenced in one post.
# Accepts either filename as first argument, or post content at stdin
-# Prints one line with space-separated tags to stdout
+# Prints tags to stdout, one per line.
+# (Since we're doing global IFS word splitting on newline only,
+# something like 'for tag in $(tags_in_post $i)' will work.)
tags_in_post() {
- sed -n "/^$template_tags_line_header/{s/^
$template_tags_line_header//;s/<[^>]*>//g;s/[ ,]\+/ /g;p;}" "$1" | tr ', ' ' '
+ local newline=$'\n'
+ sed -n "/^
$template_tags_line_header/ {
+ s/^
$template_tags_line_header[[:blank:]]*//
+ s/[[:blank:]]*<[^>]*>[[:blank:]]*//g
+ s/[[:blank:]]*,[[:blank:]]*/,/g
+ s/,\+/\\$newline/g
+ p
+ }" "$1"
}
# Finds all posts referenced in a number of tags.
@@ -741,17 +773,15 @@ posts_with_tags() {
# Rebuilds tag_*.html files
# if no arguments given, rebuilds all of them
# if arguments given, they should have this format:
-# "FILE1 [FILE2 [...]]" "TAG1 [TAG2 [...]]"
+# FILE1 [FILE2 [...]] --tags TAG1 [TAG2 [...]]
# where FILEn are files with posts which should be used for rebuilding tags,
# and TAGn are names of tags which should be rebuilt.
# example:
-# rebuild_tags "one_post.html another_article.html" "example-tag another-tag"
-# mind the quotes!
+# rebuild_tags one_post.html another_article.html --tags example-tag another-tag
rebuild_tags() {
- local IFS=$'\n' # word splitting only on newline; make $* expand with newline as separator
if (($# < 1)); then
# will process all files and tags
- files=( $(ls -t ./*.html) )
+ files=( $(set +f; ls -t ./*.html) )
all_tags=yes
else
# will process only given files and tags
@@ -765,7 +795,7 @@ rebuild_tags() {
echo -n "Rebuilding tag pages "
n=0
if [[ -n $all_tags ]]; then
- rm -f ./"$prefix_tags"*.html
+ ( set +f; rm -f ./"$prefix_tags"*.html )
else
for i in "${tags[@]}"; do
rm -f "./$prefix_tags$i.html"
@@ -792,12 +822,12 @@ rebuild_tags() {
done
rm "$tmpfile"
# Now generate the tag files with headers, footers, etc
- while IFS='' read -r i; do
+ for i in $(set +f; ls -t ./"$prefix_tags"*.tmp.html 2>/dev/null); do
tagname=${i#./"$prefix_tags"}
tagname=${tagname%.tmp.html}
create_html_page "$i" "$prefix_tags$tagname.html" yes "$global_title — $template_tag_title \"$tagname\"" "$global_author"
rm "$i"
- done < <(ls -t ./"$prefix_tags"*.tmp.html 2>/dev/null)
+ done
echo
}
@@ -821,11 +851,12 @@ get_post_author() {
list_tags() {
if [[ $2 == -n ]]; then do_sort=1; else do_sort=0; fi
- ls ./$prefix_tags*.html &> /dev/null
- (($? != 0)) && echo "No posts yet. Use 'bb.sh post' to create one" && return
+ if ! (set +f; set -- $prefix_tags*.html; [[ -e $1 ]]); then
+ echo "No posts yet. Use 'bb.sh post' to create one"
+ return
+ fi
- lines=""
- for i in $prefix_tags*.html; do
+ for i in $(set +f; printf '%s\n' $prefix_tags*.html); do
[[ -f "$i" ]] || break
nposts=$(grep -c "<\!-- text begin -->" "$i")
tagname=${i#"$prefix_tags"}
@@ -844,17 +875,19 @@ list_tags() {
# Displays a list of the posts
list_posts() {
- ls ./*.html &> /dev/null
- (($? != 0)) && echo "No posts yet. Use 'bb.sh post' to create one" && return
+ if ! (set +f; set -- *.html; [[ -e $1 ]]); then
+ echo "No posts yet. Use 'bb.sh post' to create one"
+ return
+ fi
lines=""
n=1
- while IFS='' read -r i; do
+ for i in $(set +f; ls -t ./*.html); do
is_boilerplate_file "$i" && continue
line="$n # $(get_post_title "$i") # $(LC_ALL=$date_locale date -r "$i" +"$date_format")"
lines+=$line\\n
n=$(( n + 1 ))
- done < <(ls -t ./*.html)
+ done
echo -e "$lines" | column -t -s "#"
}
@@ -877,7 +910,7 @@ make_rss() {
echo ""
n=0
- while IFS='' read -r i; do
+ for i in $(set +f; ls -t ./*.html); do
is_boilerplate_file "$i" && continue
((n >= number_of_feed_articles)) && break # max 10 items
echo -n "." 1>&3
@@ -891,7 +924,7 @@ make_rss() {
echo "$(LC_ALL=C date -r "$i" +"%a, %d %b %Y %H:%M:%S %z")"
n=$(( n + 1 ))
- done < <(ls -t ./*.html)
+ done
echo ''
} 3>&1 >"$rssfile"
@@ -989,7 +1022,8 @@ create_css() {
rebuild_all_entries() {
echo -n "Rebuilding all entries "
- for i in ./*.html; do # no need to sort
+ for i in $(set +f; printf '%s\n' *.html) # no need to sort
+ do
is_boilerplate_file "$i" && continue;
contentfile=.tmp.$RANDOM
while [[ -f $contentfile ]]; do contentfile=.tmp.$RANDOM; done
@@ -1042,7 +1076,7 @@ reset() {
echo "Are you sure you want to delete all blog entries? Please write \"Yes, I am!\" "
read -r line
if [[ $line == "Yes, I am!" ]]; then
- rm .*.html ./*.html ./*.css ./*.rss &> /dev/null
+ (set +f; rm -f .*.html ./*.html ./*.css ./*.rss)
echo
echo "Deleted all posts, stylesheets and feeds."
echo "Kept your old '.backup.tar.gz' just in case, please delete it manually if needed."
@@ -1114,9 +1148,9 @@ do_main() {
fi
# Test for existing html files
- if ls ./*.html &> /dev/null; then
+ if (set +f; set -- *.html; [[ -e $1 ]]); then
# We're going to back up just in case
- tar -c -z -f ".backup.tar.gz" -- *.html &&
+ (set +f; tar -c -z -f ".backup.tar.gz" -- *.html) &&
chmod 600 ".backup.tar.gz"
elif [[ $1 == rebuild ]]; then
echo "Can't find any html files, nothing to rebuild"
From 2a29b22a7a7acb7d43e6ac6ca59f45de4100059b Mon Sep 17 00:00:00 2001
From: Martijn Dekker
Date: Tue, 6 Sep 2016 08:41:36 +0200
Subject: [PATCH 3/4] Fix renaming using 'bb.sh edit -n'. Suppress 'which'
errmsg.
edit(): The -n functionality (to rename files according to new title) was
broken. After renaming, files were accessed by the old name and not found,
or empty files were recreated under the old name, or both. Fixes:
- Move 'touch' commands for restoring time stamps to more opportune places.
- When renaming, save old file name to exclude it from $relevant_posts.
global_variables(): suppress GNU 'which' error message on setting markdown_bin.
---
bb.sh | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/bb.sh b/bb.sh
index 23bdab6..536efd2 100755
--- a/bb.sh
+++ b/bb.sh
@@ -159,7 +159,7 @@ global_variables() {
# Markdown location. Trying to autodetect by default.
# The invocation must support the signature 'markdown_bin in.md > out.html'
- markdown_bin=$(which Markdown.pl || which markdown)
+ markdown_bin=$(which Markdown.pl 2>/dev/null || which markdown 2>/dev/null)
}
# Check for the validity of some variables
@@ -296,6 +296,7 @@ edit() {
tags_before=$(tags_in_post "${1%%.*}.html")
if [[ $2 == full ]]; then
invoke_editor "$1"
+ touch -t "$touch_timestamp" "$1"
filename=$1
else
if [[ ${1##*.} == md ]]; then
@@ -306,6 +307,7 @@ edit() {
fi
# editing markdown file
invoke_editor "$1"
+ touch -t "$touch_timestamp" "$1"
TMPFILE=$(markdown "$1")
filename=${1%%.*}.html
else
@@ -320,21 +322,23 @@ edit() {
fi
rm "$filename"
if [[ $2 == keep ]]; then
+ old_filename=''
parse_file "$TMPFILE" "$edit_timestamp" "$filename"
else
+ old_filename=$filename # save old filename to exclude it from $relevant_posts
parse_file "$TMPFILE" "$edit_timestamp" # this command sets $filename as the html processed file
[[ ${1##*.} == md ]] && mv "$1" "${filename%%.*}.md" 2>/dev/null
fi
rm "$TMPFILE"
+ touch -t "$touch_timestamp" "$filename"
fi
- touch -t "$touch_timestamp" "$filename"
- touch -t "$touch_timestamp" "$1"
chmod 644 "$filename"
echo "Posted $filename"
tags_after=$(tags_in_post "$filename")
relevant_tags=$(sort -u <<< "$tags_before"$'\n'"$tags_after")
if [[ -n $relevant_tags ]]; then
relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename
+ [[ -n $old_filename ]] && relevant_posts=$(grep -vFx "$old_filename" <<<"$relevant_posts")
rebuild_tags $relevant_posts --tags $relevant_tags
fi
}
From f50a17cdabc370798d25e1ff46e49ee8159dccf0 Mon Sep 17 00:00:00 2001
From: Martijn Dekker
Date: Wed, 7 Sep 2016 18:27:34 +0200
Subject: [PATCH 4/4] tags_in_post(): bugfix for non-GNU 'sed'
The 'sed' script in tags_in_post() used a GNU-specific feature, `\+`.
This became unnecessary anyway after previous edits, so remove it.
Also replace whitespace-comma-whitespace by newline directly instead
of doing an intermediary replace.
---
bb.sh | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bb.sh b/bb.sh
index 536efd2..4b55df5 100755
--- a/bb.sh
+++ b/bb.sh
@@ -770,8 +770,7 @@ tags_in_post() {
sed -n "/^$template_tags_line_header/ {
s/^
$template_tags_line_header[[:blank:]]*//
s/[[:blank:]]*<[^>]*>[[:blank:]]*//g
- s/[[:blank:]]*,[[:blank:]]*/,/g
- s/,\+/\\$newline/g
+ s/[[:blank:]]*,[[:blank:]]*/\\$newline/g
p
}" "$1"
}