Rewrite split script in perl

This commit is contained in:
Ben Edgington
2022-06-14 10:47:46 +01:00
parent beb7dcb5cf
commit 16a785b86f
3 changed files with 108 additions and 136 deletions

View File

@@ -1,135 +0,0 @@
#!/usr/bin/gawk -f
# Note: run this via "update.sh" rather than directly.
#
# Split the master markdown file into separate files for each page.
#
# - Creates a directory structure reflecting the path hierarchy of the
# pages.
# - Prepends Gatsby frontmatter to each file.
# - Rewrites links for image files so that the origial file can use
# one path and the split files another.
#
# Rules:
# - New pages can start at markdown heading levels 1, 2, or 3.
# - To start a new page, append " <!-- /path/to/page -->" to the heading.
# - The file containing the page will end up in "./md/pages/path/to/page.md"
# - For the page to be marked "hidden", append a "*" to the path.
# - Images matching /<img src="md.*"/ will have their paths naively rewritten.
BEGIN{
n = 0
filename_prefix = "md/pages"
filename = filename_prefix "/error.md"
h_part = ""
h_chapter = ""
h_part_no = -1 # Number parts from 0
h_chapter_no = 0
h_section_no = 0
}
# Headings with an HTML comment at the end trigger a new page
/^(# |## |### ).* <!-- .* -->$/ {
# Start a new page
if (n > 0) {
close (filename)
}
n++
# Generate frontmatter contents
name = gensub(/^#+ (.*) <!-- .* -->$/, "\\1", "1")
h_path = gensub(/^#+ .* <!-- (.*) -->$/, "\\1", "1")
heading = gensub (/^(#+ .*) <!-- .* -->$/, "\\1", "1")
# Is this page hidden?
if (h_path ~ /\*$/) {
h_path = substr(h_path, 1, length(h_path) - 1)
h_hide = "true"
} else {
h_hide = "false"
}
# Make filesystem path for writing the file
file_path = h_path
sub(/\/[^/]+$/, "", file_path)
system("mkdir -p " filename_prefix file_path " 2>/dev/null")
filename = filename_prefix h_path ".md"
print filename
switch ($0) {
case /^# /:
h_part = name
h_chapter = ""
h_section = ""
h_part_no++
h_chapter_no = 0
idx = h_part_no
break
case /^## /:
h_chapter = name
h_section = ""
h_chapter_no++
h_section_no = 0
idx = h_part_no "," h_chapter_no
break
case /^### /:
h_section = name
h_section_no++
idx = h_part_no "," h_chapter_no "," h_section_no
break
default:
print "Internal error"
exit (1)
}
print "---" > filename
print "hide: " h_hide > filename
print "path: " h_path > filename
print "titles: [\"" h_part "\",\"" h_chapter "\",\"" h_section "\"]" > filename
print "index: [" idx "]" > filename
print "sequence: " n > filename
print "---" > filename
if (h_section != "") {
print "\n<div class=\"section-header\">\n" > filename
print "# " h_part "\n" > filename
print "## " h_chapter > filename
print "\n</div>\n" > filename
print "### " h_section > filename
} else if (h_chapter != "") {
print "\n<div class=\"chapter-header\">\n" > filename
print "# " h_part > filename
print "\n</div>\n" > filename
print "## " h_chapter > filename
} else {
print "# " h_part > filename
}
next
}
# Rewrite image paths to reflect the directory hierarchy: Markdown format
/!\[.*\]\(md.+\)/ {
prefix = substr(h_path, 2)
gsub(/[^/]*/, "..", prefix)
sub(/\(md/, "(" prefix, $0)
print > filename
next
}
# Rewrite image paths to reflect the directory hierarchy: HTML format (deprecated)
/<img src="md.*"/ {
prefix = substr(h_path, 2)
gsub(/[^/]*/, "..", prefix)
sub(/src="md/, "src=\"" prefix, $0)
print > filename
next
}
# Pass through everything else as-is
{
print > filename
}

107
bin/build/split.pl Executable file
View File

@@ -0,0 +1,107 @@
#!/usr/bin/perl
use strict;
use warnings;
use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END);
use File::Path qw(make_path);
my ($file) = @ARGV;
die "Usage: $0 FILE\n" if not $file;
open my $fh, '<', $file or die "Can't open $file: $!";
my $outFilePrefix = 'md/pages';
my $sequence = 0;
my $thisPart = '';
my $thisChapter = '';
my $thisSection = '';
my $thisPartNo = -1; # Number parts from 0
my $thisChapterNo = 0;
my $thisSectionNo = 0;
my $thisPath;
my $idx;
my $ofh;
while( my $line = <$fh>) {
if ($line =~ /^(#{1,3} )(.*) <!-- ([^*]*)(\*?) -->$/) {
my $level = $1;
my $title = $2;
my $path = $3;
my $hide = $4 eq '*' ? 'true' : 'false';
$sequence++;
my $outFile = $outFilePrefix . $path . '.md';
my $outDirectory = $outFile =~ s|/[^/]+$||gr;
unless(-e $outDirectory or make_path($outDirectory)) {
die "Unable to create $outDirectory\n";
}
close $ofh if defined $ofh;
open $ofh, '>', $outFile or die "Can't open $file for writing: $!";
$thisPath = $path;
if ($level eq '# ') {
$thisPart = $title;
$thisChapter = '';
$thisSection = '';
$thisPartNo++;
$thisChapterNo = 0;
$idx = $thisPartNo;
} elsif ($level eq '## ') {
$thisChapter = $title;
$thisSection = '';
$thisChapterNo++;
$thisSectionNo = 0;
$idx = $thisPartNo . ',' . $thisChapterNo;
} elsif ($level eq '### ') {
$thisSection = $title;
$thisSectionNo++;
$idx = $thisPartNo . ',' . $thisChapterNo . ',' . $thisSectionNo;
} else {
die "Internal error: can't determine heading level.";
}
print $ofh
"---\n",
"hide: $hide\n",
"path: $path\n",
"titles: [\"$thisPart\",\"$thisChapter\",\"$thisSection\"]\n",
"index: [$idx]\n",
"sequence: $sequence\n",
"---\n";
if ($thisSection ne '') {
print $ofh
"\n<div class=\"section-header\">\n\n",
"# $thisPart\n\n",
"## $thisChapter\n",
"\n</div>\n\n",
"### $thisSection\n";
} elsif ($thisChapter ne '') {
print $ofh
"\n<div class=\"chapter-header\">\n\n",
"# $thisPart\n",
"\n</div>\n\n",
"## $thisChapter\n";
} else {
print $ofh "# $thisPart\n";
}
} else {
die "Error: first line of input must be a new page marker" if not defined $ofh;
# Rewrite any markdown image paths to reflect the directory hierarchy
(my $prefix = substr $thisPath, 2) =~ s|[^/]+|..|g;
$line =~ s/\(md/($prefix/;
print $ofh $line;
}
}
close $ofh if defined $ofh;

View File

@@ -7,7 +7,7 @@ cd "$(dirname "$0")/../../src"
# Build the markdown pages
rm -rf md/pages/
../bin/build/split.awk book.md
../bin/build/split.pl book.md
# Build the one page annotated spec
rm -f md/annotated.md