From a674ec5837f4c78206d4b0eb6521b8507732694b Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Wed, 9 Mar 2016 05:26:20 +0100 Subject: [PATCH 1/4] rebuild_tags(): use array for more robust file handling --- bb.sh | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/bb.sh b/bb.sh index 0956a53..82a9bf2 100755 --- a/bb.sh +++ b/bb.sh @@ -729,8 +729,8 @@ tags_in_post() { } # Finds all posts referenced in a number of tags. -# Arguments are tags -# Prints one line with space-separated tags to stdout +# Arguments are tags. +# Prints file names to stdout, one per line. posts_with_tags() { (($# < 1)) && return set -- "${@/#/$prefix_tags}" @@ -748,31 +748,35 @@ posts_with_tags() { # rebuild_tags "one_post.html another_article.html" "example-tag another-tag" # mind the quotes! rebuild_tags() { - if (($# < 2)); then + local IFS=$'\n' # word splitting only on newline; make $* expand with newline as separator + if (($# < 1)); then # will process all files and tags - files=$(ls -t ./*.html) + files=( $(ls -t ./*.html) ) all_tags=yes else # will process only given files and tags - files=$(printf '%s\n' $1 | sort -u) - files=$(ls -t $files) - tags=$2 + for ((i=1; i<=$#; i++)); do + [[ ${!i} == --tags ]] && break + done + files=( $(ls -t $(sort -u <<< "${*:1:$((i-1))}")) ) + tags=( "${@:$((i+1)):$#}" ) + all_tags='' fi echo -n "Rebuilding tag pages " n=0 if [[ -n $all_tags ]]; then - rm ./"$prefix_tags"*.html &> /dev/null + rm -f ./"$prefix_tags"*.html else - for i in $tags; do - rm "./$prefix_tags$i.html" &> /dev/null + for i in "${tags[@]}"; do + rm -f "./$prefix_tags$i.html" done fi # First we will process all files and create temporal tag files # with just the content of the posts tmpfile=tmp.$RANDOM while [[ -f $tmpfile ]]; do tmpfile=tmp.$RANDOM; done - while IFS='' read -r i; do - is_boilerplate_file "$i" && continue; + for i in "${files[@]}"; do + is_boilerplate_file "$i" && continue echo -n "." if [[ -n $cut_do ]]; then get_html_file_content 'entry' 'entry' 'cut' <"$i" | awk "/$cut_line/ { print \"

$template_read_more

\" ; next } 1" @@ -780,11 +784,12 @@ rebuild_tags() { get_html_file_content 'entry' 'entry' <"$i" fi >"$tmpfile" for tag in $(tags_in_post "$i"); do - if [[ -n $all_tags || " $tags " == *" $tag "* ]]; then + # if either all tags or array tags[] contains $tag... + if [[ -n $all_tags || $'\n'"${tags[*]}"$'\n' == *$'\n'"$tag"$'\n'* ]]; then cat "$tmpfile" >> "$prefix_tags$tag".tmp.html fi done - done <<< "$files" + done rm "$tmpfile" # Now generate the tag files with headers, footers, etc while IFS='' read -r i; do From 54cc0c80600035b3ac09f2a50a60f1cfb33b3942 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Tue, 6 Sep 2016 03:24:01 +0200 Subject: [PATCH 2/4] More code refactoring. Limit word splitting and disable globbing by default. - Globally, now do word splitting (IFS) only on newline (which also makes "$*" expand with newline separator instead of space). - Disable globbing (pathmame expansion), to be re-enabled locally using 'set +f' where needed (typically in a subshell). These changes help eliminate unexpected snags and security vulnerabilities in case someone forgets to quote a variable somewhere. They should also make the code "just work" with spaces and other special characters in file names and tags (as long as they're not newline characters, but that can't happen with regular use of the script as the newline is the separator). This means that, as of this change, editing or completely emptying the convert_filename filter should no longer pose any problems as far as bb.sh is concerned. The changes to adapt the code to the above are mainly: - Now that we do word splitting on newline only, we can go back to iterating through files in a "for" loop instead of using "read" with a here-document, which is more readable. However, to enable globbing locally, a technique adaptation is needed, like: for file in $(set +f; printf '%s\n' *.html) or for file in $(set +f; ls -t -- *.html) # sort by date, newest first Given IFS=$'\n' and globbing disabled globally, this technique is robust for all special characters in file names except for newlines. - invoke_editor() function replaces direct $EDITOR calls, because we need to locally word-split $EDITOR on spaces in case it contains arguments. - parse_file(): rewrite tag parsing to handle possible spaces in tags - tags_in_post(): output line-separated instead of space-separated tags; further adjust sed script to handle possible spaces in tags - rebuild_tags(): this function was refactored to use an array internally. Instead of two combined strings, it now takes HTML files and tags as separate arguments, separated by a single "--tag" argument. This allows for spaces and other special characters in both file names and tags. (See also commit a674ec5, which started this but didn't finish it). --- bb.sh | 128 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 47 deletions(-) diff --git a/bb.sh b/bb.sh index 09136c3..144f9b9 100755 --- a/bb.sh +++ b/bb.sh @@ -5,6 +5,18 @@ # https://github.com/carlesfe/bashblog/contributors # Check out README.md for more details +# Some shell settings for robustness by default. These help eliminate +# unexpected snags and security vulnerabilities in case someone forgets to +# quote a variable somewhere. They do require a few coding adaptations. + +IFS=$'\n' # Globally, we do word splitting only on newline (which also + # makes "$*" expand with newline separator instead of space). + +set -f # Disable globbing (pathname expansion). It can be re-enabled + # locally using 'set +f'; it's handy to do this in a subshell, + # for example in $(command substitution), as the globbing will + # be local to the subshell. + # Global variables # It is recommended to perform a 'rebuild' after changing any of this in the code @@ -252,6 +264,14 @@ get_html_file_content() { }" } +# Invoke the editor specified by the $EDITOR environment variable. Use a +# function for this as we need to locally word-split $EDITOR on spaces +# (in case it contains arguments, like EDITOR='joe -nobackups). +invoke_editor() { + local IFS=$' \t\n' + $EDITOR "$1" +} + # Edit an existing, published .html file while keeping its original timestamp # Please note that this function does not automatically republish anything, as # it is usually called from 'main'. @@ -270,7 +290,7 @@ edit() { touch_timestamp=$(LC_ALL=C date -r "${1%%.*}.html" +'%Y%m%d%H%M') tags_before=$(tags_in_post "${1%%.*}.html") if [[ $2 == full ]]; then - $EDITOR "$1" + invoke_editor "$1" filename=$1 else if [[ ${1##*.} == md ]]; then @@ -280,7 +300,7 @@ edit() { exit fi # editing markdown file - $EDITOR "$1" + invoke_editor "$1" TMPFILE=$(markdown "$1") filename=${1%%.*}.html else @@ -290,7 +310,7 @@ edit() { get_post_title "$1" > "$TMPFILE" # Post text with plaintext tags get_html_file_content 'text' 'text' <"$1" | sed "/^

$template_tags_line_header/s|\\1|\\1|g" >> "$TMPFILE" - $EDITOR "$TMPFILE" + invoke_editor "$TMPFILE" filename=$1 fi rm "$filename" @@ -306,10 +326,10 @@ edit() { chmod 644 "$filename" echo "Posted $filename" tags_after=$(tags_in_post "$filename") - relevant_tags=$(echo "$tags_before $tags_after" | tr ',' ' ' | tr ' ' '\n' | sort -u | tr '\n' ' ') - if [[ ! -z $relevant_tags ]]; then - relevant_posts="$(posts_with_tags $relevant_tags) $filename" - rebuild_tags "$relevant_posts" "$relevant_tags" + relevant_tags=$(sort -u <<< "$tags_before"$'\n'"$tags_after") + if [[ -n $relevant_tags ]]; then + relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename + rebuild_tags $relevant_posts --tags $relevant_tags fi } @@ -475,10 +495,11 @@ create_html_page() { parse_file() { # Read for the title and check that the filename is ok title="" - while IFS='' read -r line; do + while read -r line; do if [[ -z $title ]]; then # remove extra

and

added by markdown - title=$(echo "$line" | sed 's/<\/*p>//g') + title=${line#

} + title=${title%

} if [[ -n $3 ]]; then filename=$3 else @@ -498,13 +519,14 @@ parse_file() { content=$filename.tmp # Parse possible tags elif [[ $line == "

$template_tags_line_header"* ]]; then - tags=$(echo "$line" | cut -d ":" -f 2- | sed -e 's/<\/p>//g' -e 's/^ *//' -e 's/ *$//' -e 's/, /,/g') - IFS=, read -r -a array <<< "$tags" - echo -n "

$template_tags_line_header " >> "$content" - for item in "${array[@]}"; do - echo -n "$item, " - done | sed 's/, $/<\/p>/g' >> "$content" + sed "s%

%%g + s/^.*:[[:blank:]]*// + s/[[:blank:]]\$// + s/[[:blank:]]*,[[:blank:]]*/,/g + s%\([^,]*\),%\1, %g + s%, \([^,]*\)\$%, \1

% + " <<< "$line" >> "$content" else echo "$line" >> "$content" fi @@ -565,7 +587,7 @@ EOF filename="" while [[ $post_status != "p" && $post_status != "P" ]]; do [[ -n $filename ]] && rm "$filename" # Delete the generated html file, if any - $EDITOR "$TMPFILE" + invoke_editor "$TMPFILE" if [[ $fmt == md ]]; then html_from_md=$(markdown "$TMPFILE") parse_file "$html_from_md" @@ -607,8 +629,8 @@ EOF echo "Posted $filename" relevant_tags=$(tags_in_post $filename) if [[ -n $relevant_tags ]]; then - relevant_posts="$(posts_with_tags $relevant_tags) $filename" - rebuild_tags "$relevant_posts" "$relevant_tags" + relevant_posts=$(posts_with_tags $relevant_tags)$'\n'$filename + rebuild_tags $relevant_posts --tags $relevant_tags fi } @@ -623,7 +645,7 @@ all_posts() { { echo "

$template_archive_title

" prev_month="" - while IFS='' read -r i; do + for i in $(set +f; ls -t ./*.html); do is_boilerplate_file "$i" && continue echo -n "." 1>&3 # Month headers @@ -640,7 +662,7 @@ all_posts() { # Date date=$(LC_ALL=$date_locale date -r "$i" +"$date_format") echo " $date" - done < <(ls -t ./*.html) + done echo "" 1>&3 echo "" echo "" @@ -663,7 +685,7 @@ all_tags() { { echo "

$template_tags_title

" echo "