From 9cb56dabb676391a9382731347e8d2b07b9437a5 Mon Sep 17 00:00:00 2001 From: Peter Mikkelsen Date: Sun, 7 Apr 2024 13:25:49 +0200 Subject: big cleanup --- bin/aux/addwuser.rc | 33 - bin/aux/bpst.rc | 64 -- bin/aux/gensitemaptxt.rc | 14 - bin/aux/runtsts.rc | 16 - bin/contrib/fix-rc-scripts | 27 - bin/contrib/hgweb.config | 12 - bin/contrib/hgwebdir.cgi | 47 - bin/contrib/markdown.pl | 1447 ------------------------- bin/contrib/md2html.awk | 427 -------- bin/contrib/rc-httpd/handlers/authorize | 6 - bin/contrib/rc-httpd/handlers/cgi | 46 - bin/contrib/rc-httpd/handlers/dir-index | 111 -- bin/contrib/rc-httpd/handlers/error | 43 - bin/contrib/rc-httpd/handlers/redirect | 30 - bin/contrib/rc-httpd/handlers/serve-static | 43 - bin/contrib/rc-httpd/handlers/static-or-cgi | 14 - bin/contrib/rc-httpd/handlers/static-or-index | 5 - bin/contrib/rc-httpd/lib/urldecode.awk | 39 - bin/contrib/rc-httpd/rc-httpd | 102 -- bin/contrib/rc-httpd/select-handler | 20 - bin/contrib/tcp80 | 7 - bin/contrib/urldecode.awk | 39 - bin/contrib/urlencode.awk | 126 --- bin/contrib/webserver.rc | 30 - bin/md2html.awk | 427 ++++++++ bin/werc.rc | 5 - 26 files changed, 427 insertions(+), 2753 deletions(-) delete mode 100755 bin/aux/addwuser.rc delete mode 100755 bin/aux/bpst.rc delete mode 100755 bin/aux/gensitemaptxt.rc delete mode 100755 bin/aux/runtsts.rc delete mode 100755 bin/contrib/fix-rc-scripts delete mode 100755 bin/contrib/hgweb.config delete mode 100755 bin/contrib/hgwebdir.cgi delete mode 100755 bin/contrib/markdown.pl delete mode 100755 bin/contrib/md2html.awk delete mode 100755 bin/contrib/rc-httpd/handlers/authorize delete mode 100755 bin/contrib/rc-httpd/handlers/cgi delete mode 100755 bin/contrib/rc-httpd/handlers/dir-index delete mode 100755 bin/contrib/rc-httpd/handlers/error delete mode 100755 bin/contrib/rc-httpd/handlers/redirect delete mode 100755 bin/contrib/rc-httpd/handlers/serve-static delete mode 100755 bin/contrib/rc-httpd/handlers/static-or-cgi delete mode 100755 bin/contrib/rc-httpd/handlers/static-or-index delete mode 100755 bin/contrib/rc-httpd/lib/urldecode.awk delete mode 100755 bin/contrib/rc-httpd/rc-httpd delete mode 100755 bin/contrib/rc-httpd/select-handler delete mode 100755 bin/contrib/tcp80 delete mode 100755 bin/contrib/urldecode.awk delete mode 100755 bin/contrib/urlencode.awk delete mode 100755 bin/contrib/webserver.rc create mode 100755 bin/md2html.awk (limited to 'bin') diff --git a/bin/aux/addwuser.rc b/bin/aux/addwuser.rc deleted file mode 100755 index 9364d39..0000000 --- a/bin/aux/addwuser.rc +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/rc - -if(! ~ $#werc_root 0) - cd $werc_root - -fn usage { - if(! ~ $#* 0) - echo $0: $* >[1=2] - echo 'Usage:' $0 'user_name user_password [groups ...]' >[1=2] - exit usage -} - -if(! test -d etc/users/) - usage 'Run for root of werc installation or set $werc_root' - -user_name=$1 -shift -user_pass=$1 -shift -user_groups=$* - -if(~ $"user_name '' || ~ $"user_pass '') - usage - -mkdir etc/users/$user_name -echo $user_pass > etc/users/$user_name/password - -if(! ~ $#user_groups 0) - for(g in $user_groups) { - mkdir -p etc/users/$g - echo $user_name >> etc/users/$g/members - } - diff --git a/bin/aux/bpst.rc b/bin/aux/bpst.rc deleted file mode 100755 index e60d034..0000000 --- a/bin/aux/bpst.rc +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/rc - -path=( $PLAN9/bin $path ) -base=. - -if(~ $#user 0) - user=`{whoami} - -file=(); title=(); -bloguser=$user -while(! ~ $#* 0) { - switch($1) { - case -u - base=/gsoc/www/people/$user/blog/ - case -b - shift - base=$1 - case -f - shift - file=$1 - } - shift -} - -if(~ $"EDITOR '') - EDITOR=vi - -if(~ $#file 0 || ! test -f $file) { - file=/tmp/blogtmp.$pid - rm $file >[2]/dev/null - touch $file -} - -$EDITOR $file -ispell $file -rm $file.bak >[2]/dev/null - -fn mkbpost { - umask 002 # Let group write - bptext=$1 - if(! ~ $#2 0) - bpid=`{echo -n '-'^$"bpid | sed 's/'$forbidden_uri_chars'+/_/g; 1q'} - d=`{/bin/date +%F|sed 's,-,/,g'} - - ddir=$blagh_root^$d^'/' - n=`{ls $ddir >[2]/dev/null |wc -l} - - mkdir -p $ddir/$"n^$"bpid/ - { - # TODO: Enable metadata - #echo '* Posted:' `{date} - #if(! ~ $#logged_user 0) - # echo '* Author: '$logged_user - cat $bptext - }> $ddir/$"n^$"bpid/index.md -} - -forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.]' -blagh_root=$base - -if(test -s $file) - mkbpost $file -if not - echo Empty file! diff --git a/bin/aux/gensitemaptxt.rc b/bin/aux/gensitemaptxt.rc deleted file mode 100755 index a1b349d..0000000 --- a/bin/aux/gensitemaptxt.rc +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/rc -# DEPRECATED: sitemap.tpl now generates and updates a sitemap.txt when requested, and is also more smart than this simplistic script. - -for(d in sites/*/) { -echo $d -9 du -a $d | awk '/\.(md|html)$/ { print $2 }; {}' | 9 sed -e 's/\.(md|html)$//' -e 's,/index$,/,' -e 's,^sites/,http://,' > $d/sitemap.txt - -if(! test -f $d/robots.txt) { - echo generating missing robots.txt for $d - echo $d|sed 's,sites/,Sitemap: http://,; s/$/sitemap.txt/;' > $d/robots.txt - cat $d/robots.txt -} - -} diff --git a/bin/aux/runtsts.rc b/bin/aux/runtsts.rc deleted file mode 100755 index b5b1df7..0000000 --- a/bin/aux/runtsts.rc +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/rc - -tstdom='http://test.cat-v.org' - -cd sites/tst.cat-v.org - -tstfiles=`{du -a |awk '/\.tst$/ { print $2 }; {} ' | sed 's/^\.//; s/\.tst$//'} - -for(f in $tstfiles) { - ifs=' -' { tsts=`{cat ./$f.tst} } - - for(t in $tsts) { - echo tst $t - } -} diff --git a/bin/contrib/fix-rc-scripts b/bin/contrib/fix-rc-scripts deleted file mode 100755 index beb21c5..0000000 --- a/bin/contrib/fix-rc-scripts +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/local/plan9/bin/rc - -# Fix rc shell scripts to find rc without launching env every time. -# Invoke with rc and plan9 versions of grep and ed in $PATH - -# If your system lacks which (e.g. some gnu/linux) -# substitute the full path to rc in this line: -rc=/usr/local/plan9/bin/rc -firstline='#!'$"rc - -if(~ $#* 0) files = * -if not files = $* - -myname = `{basename $0} - -for(file in $files) { - if(test -d $file) $0 $file/* - if not if(~ $file *$myname) {} - if not if(sed 1q $file | grep '^#!/.*[/ ]rc$' > /dev/null) { - { - echo 1c - echo $firstline - echo . - echo wq - } | ed $file > /dev/null - } -} diff --git a/bin/contrib/hgweb.config b/bin/contrib/hgweb.config deleted file mode 100755 index fba802b..0000000 --- a/bin/contrib/hgweb.config +++ /dev/null @@ -1,12 +0,0 @@ -[web] -style = gitweb -allow_archive = bz2 - -#[paths] -#w9 = /gsoc/hg/w9/ - -[collections] -#allow_archive = bz2 zip -/gsoc/hg = /gsoc/hg/ -#/var/hg = /var/hg/ - diff --git a/bin/contrib/hgwebdir.cgi b/bin/contrib/hgwebdir.cgi deleted file mode 100755 index 5fe4b16..0000000 --- a/bin/contrib/hgwebdir.cgi +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -# An example CGI script to export multiple hgweb repos, edit as necessary - -# send python tracebacks to the browser if an error occurs: -import cgitb -cgitb.enable() - -# adjust python path if not a system-wide install: -#import sys -#sys.path.insert(0, "/path/to/python/lib") - -# If you'd like to serve pages with UTF-8 instead of your default -# locale charset, you can do so by uncommenting the following lines. -# Note that this will cause your .hgrc files to be interpreted in -# UTF-8 and all your repo files to be displayed using UTF-8. -# -#import os -#os.environ["HGENCODING"] = "UTF-8" - -from mercurial.hgweb.hgwebdir_mod import hgwebdir -from mercurial.hgweb.request import wsgiapplication -import mercurial.hgweb.wsgicgi as wsgicgi - -# The config file looks like this. You can have paths to individual -# repos, collections of repos in a directory tree, or both. -# -# [paths] -# virtual/path = /real/path -# virtual/path = /real/path -# -# [collections] -# /prefix/to/strip/off = /root/of/tree/full/of/repos -# -# collections example: say directory tree /foo contains repos /foo/bar, -# /foo/quux/baz. Give this config section: -# [collections] -# /foo = /foo -# Then repos will list as bar and quux/baz. -# -# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples -# or use a dictionary with entries like 'virtual/path': '/real/path' - -def make_web_app(): - return hgwebdir("hgweb.config") - -wsgicgi.launch(wsgiapplication(make_web_app)) diff --git a/bin/contrib/markdown.pl b/bin/contrib/markdown.pl deleted file mode 100755 index 3758a87..0000000 --- a/bin/contrib/markdown.pl +++ /dev/null @@ -1,1447 +0,0 @@ -#!/usr/bin/env perl -# -# Markdown -- A text-to-HTML conversion tool for web writers -# -# Copyright (c) 2004 John Gruber -# -# -package Markdown; -require 5.006_000; -use strict; -use warnings; - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION); -$VERSION = '1.0.1'; -# Tue 14 Dec 2004 - -## Disabled; causes problems under Perl 5.6.1: -# use utf8; -# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html - - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; - - -# -# Globals: -# - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my $g_nested_brackets; -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_html_blocks; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "Markdown", - description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", - doc_link => 'http://daringfireball.net/projects/markdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('markdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('markdown' => { - label => 'Markdown', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('markdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('markdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('markdown_with_smartypants' => { - label => 'Markdown With SmartyPants', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n"; - print "Copyright 2004 John Gruber\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print Markdown($text); - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialChars(), so that any *'s or _'s in the -# and tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip link definitions, store in hashes. - $text = _StripLinkDefinitions($text); - - $text = _RunBlockGamut($text); - - $text = _UnescapeSpecialChars($text); - - return $text . "\n"; -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - } - {}mx) { - $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } - } - - return $text; -} - - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - #

- #
- # tags for inner block must be indented. - #
- #
- # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `
` and stop at the first `
`. - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_a) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - - - # - # Now match more liberally, simply from `\n` to `\n` - # - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_b) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .* # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - # Special case just for
. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. - $text = _HashHTMLBlocks($text); - - $text = _FormParagraphs($text); - - return $text; -} - - -sub _RunSpanGamut { -# -# These are all the transformations that occur *within* block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoCodeSpans($text); - - $text = _EscapeSpecialChars($text); - - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); - - # Make links out of things like `` - # Must come after _DoAnchors(), because you can use < and > - # delimiters in inline links like [this](). - $text = _DoAutoLinks($text); - - $text = _EncodeAmpsAndAngles($text); - - $text = _DoItalicsAndBold($text); - - # Do hard breaks: - $text =~ s/ {2,}\n/ or tags. -# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - - foreach my $cur_token (@$tokens) { - if ($cur_token->[0] eq "tag") { - # Within tags, encode * and _ so they don't conflict - # with their use in Markdown for italics and strong. - # We're replacing each such character with its - # corresponding MD5 checksum value; this is likely - # overkill, but it should prevent us from colliding - # with the escape values by accident. - $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; - $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; - $text .= $cur_token->[1]; - } else { - my $t = $cur_token->[1]; - $t = _EncodeBackslashEscapes($t); - $text .= $t; - } - } - return $text; -} - - -sub _DoAnchors { -# -# Turn Markdown link shortcuts into XHTML
tags. -# - my $text = shift; - - # - # First, handle reference-style links: [link text] [id] - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $link_text; # for shortcut links like [this][]. - } - - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "? # href = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # Title = $6 - \5 # matching quote - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $url = $3; - my $title = $6; - - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = " tags. -# - my $text = shift; - - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $alt_text; # for shortcut links like ![this][]. - } - - $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "\"$alt_text\"";? # src url = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $url = $3; - my $title = ''; - if (defined($6)) { - $title = $6; - } - - $alt_text =~ s/"/"/g; - $title =~ s/"/"/g; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "\"$alt_text\"";" . _RunSpanGamut($1) . "\n\n"; - }egmx; - - $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ - "

" . _RunSpanGamut($1) . "

\n\n"; - }egmx; - - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - # - $text =~ s{ - ^(\#{1,6}) # $1 = string of #'s - [ \t]* - (.+?) # $2 = Header text - [ \t]* - \#* # optional closing #'s (not counted) - \n+ - }{ - my $h_level = length($1); - "" . _RunSpanGamut($2) . "\n\n"; - }egmx; - - return $text; -} - - -sub _DoLists { -# -# Form HTML ordered (numbered) and unordered (bulleted) lists. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - - # Re-usable pattern to match any entirel ul or ol list: - my $whole_list = qr{ - ( # $1 = whole list - ( # $2 - [ ]{0,$less_than_tab} - (${marker_any}) # $3 = first list item marker - [ \t]+ - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - ${marker_any}[ \t]+ - ) - ) - ) - }mx; - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _ProcessListItems(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if ($g_list_level) { - $text =~ s{ - ^ - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } - else { - $text =~ s{ - (?:(?<=\n\n)|\A\n?) - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } - - - return $text; -} - - -sub _ProcessListItems { -# -# Process the contents of a single ordered or unordered list, splitting it -# into individual list items. -# - - my $list_str = shift; - my $marker_any = shift; - - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - - $g_list_level++; - - # trim trailing blank lines: - $list_str =~ s/\n{2,}\z/\n/; - - - $list_str =~ s{ - (\n)? # leading line = $1 - (^[ \t]*) # leading whitespace = $2 - ($marker_any) [ \t]+ # list marker = $3 - ((?s:.+?) # list item text = $4 - (\n{1,2})) - (?= \n* (\z | \2 ($marker_any) [ \t]+)) - }{ - my $item = $4; - my $leading_line = $1; - my $leading_space = $2; - - if ($leading_line or ($item =~ m/\n{2,}/)) { - $item = _RunBlockGamut(_Outdent($item)); - } - else { - # Recursion for sub-lists: - $item = _DoLists(_Outdent($item)); - chomp $item; - $item = _RunSpanGamut($item); - } - - "
  • " . $item . "
  • \n"; - }egmx; - - $g_list_level--; - return $list_str; -} - - - -sub _DoCodeBlocks { -# -# Process Markdown `
    ` blocks.
    -#	
    -
    -	my $text = shift;
    -
    -	$text =~ s{
    -			(?:\n\n|\A)
    -			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    -			  (?:
    -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    -			    .*\n+
    -			  )+
    -			)
    -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    -		}{
    -			my $codeblock = $1;
    -			my $result; # return value
    -
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    -			$codeblock =~ s/\A\n+//; # trim leading newlines
    -			$codeblock =~ s/\s+\z//; # trim trailing whitespace
    -
    -			$result = "\n\n
    " . $codeblock . "\n
    \n\n"; - - $result; - }egmx; - - return $text; -} - - -sub _DoCodeSpans { -# -# * Backtick quotes are used for spans. -# -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: -# -# Just type ``foo `bar` baz`` at the prompt. -# -# Will translate to: -# -#

    Just type foo `bar` baz at the prompt.

    -# -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. -# -# * You can use spaces to get literal backticks at the edges: -# -# ... type `` `bar` `` ... -# -# Turns to: -# -# ... type `bar` ... -# - - my $text = shift; - - $text =~ s@ - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?$c
    "; - @egsx; - - return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# - local $_ = shift; - - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&/g; - - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - - - # Do the angle bracket song and dance: - s! < !<!gx; - s! > !>!gx; - - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; - - return $_; -} - - -sub _DoItalicsAndBold { - my $text = shift; - - # must go first: - $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } - {$2}gsx; - - $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } - {$2}gsx; - - return $text; -} - - -sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with
     content, so we need to fix that:
    -			$bq =~ s{
    -					(\s*
    .+?
    ) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; - - "
    \n$bq\n
    \n\n"; - }egmx; - - - return $text; -} - - -sub _FormParagraphs { -# -# Params: -# $text - string to process with html

    tags -# - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap

    tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/

    /; - $_ .= "

    "; - } - } - - # - # Unhashify HTML blocks - # - foreach (@grafs) { - if (defined( $g_html_blocks{$_} )) { - $_ = $g_html_blocks{$_}; - } - } - - return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - - my $text = shift; - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{<}gi; - - return $text; -} - - -sub _EncodeBackslashEscapes { -# -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. -# - local $_ = shift; - - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. - s! \\` !$g_escape_table{'`'}!gx; - s! \\\* !$g_escape_table{'*'}!gx; - s! \\_ !$g_escape_table{'_'}!gx; - s! \\\{ !$g_escape_table{'{'}!gx; - s! \\\} !$g_escape_table{'}'}!gx; - s! \\\[ !$g_escape_table{'['}!gx; - s! \\\] !$g_escape_table{']'}!gx; - s! \\\( !$g_escape_table{'('}!gx; - s! \\\) !$g_escape_table{')'}!gx; - s! \\> !$g_escape_table{'>'}!gx; - s! \\\# !$g_escape_table{'#'}!gx; - s! \\\+ !$g_escape_table{'+'}!gx; - s! \\\- !$g_escape_table{'-'}!gx; - s! \\\. !$g_escape_table{'.'}!gx; - s{ \\! }{$g_escape_table{'!'}}gx; - - return $_; -} - - -sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp):[^'">\s]+)>}{
    $1}gi; - - # Email addresses: - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; - - return $text; -} - - -sub _EncodeEmailAddress { -# -# Input: an email address, e.g. "foo@example.com" -# -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: -# -# foo -# @example.com -# -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: -# - - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; - - $addr = qq{$addr}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - - return $addr; -} - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# - my $text = shift; - - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } - return $text; -} - - -sub _TokenizeHTML { -# -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as , or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. -# -# -# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# -# - - my $str = shift; - my $pos = 0; - my $len = length $str; - my @tokens; - - my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); - my $match = qr/(?s: ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags - - while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; - } - push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; - \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# - my $text = shift; - - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; -} - - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# -# - my $text = shift; - - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; -} - - -1; - -__END__ - - -=pod - -=head1 NAME - -B - - -=head1 SYNOPSIS - -B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] - [ I ... ] - - -=head1 DESCRIPTION - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like
    and as well). - -For more information about Markdown's syntax, see: - - http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - -
    - -instead of Markdown's default XHTML style tags, e.g.: - -
    - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - - support@daringfireball.net - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output Markdown actually produced. - - -=head1 VERSION HISTORY - -See the readme file for detailed release notes for this version. - -1.0.1 - 14 Dec 2004 - -1.0 - 28 Aug 2004 - - -=head1 AUTHOR - - John Gruber - http://daringfireball.net - - PHP port and other contributions by Michel Fortin - http://michelf.com - - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2003-2004 John Gruber - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut diff --git a/bin/contrib/md2html.awk b/bin/contrib/md2html.awk deleted file mode 100755 index 81d1241..0000000 --- a/bin/contrib/md2html.awk +++ /dev/null @@ -1,427 +0,0 @@ -#!/bin/awk -f -# -# by: Jesus Galan (yiyus) 2009 -# -# Usage: md2html.awk file.md > file.html -# See: http://4l77.com/src/md2html.awk - -function eschtml(t) { - gsub("&", "\\&", t); - gsub("<", "\\<", t); - return t; -} - -function oprint(t){ - if(nr == 0) - print t; - else - otext = otext "\n" t; -} - -function subref(id){ - for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); - if(nr == 0 && otext) { - print otext; - otext = ""; - } -} - -function nextil(t) { - if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) - return t; - t1 = substr(t, 1, RSTART - 1); - tag = substr(t, RSTART, RLENGTH); - t2 = substr(t, RSTART + RLENGTH); - if(ilcode && tag != "`") - return eschtml(t1 tag) nextil(t2); - # Backslash escaping - if(tag == "\\"){ - if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ - tag = substr(t2, 1, 1); - t2 = substr(t2, 2); - } - return t1 tag nextil(t2); - } - # Dashes - if(tag == "-"){ - if(sub(/^-/, "", t2)) - tag = "—"; - return t1 tag nextil(t2); - } - # Inline Code - if(tag == "`"){ - if(sub(/^`/, "", t2)){ - if(!match(t2, /``/)) - return t1 "”" nextil(t2); - ilcode2 = !ilcode2; - } - else if(ilcode2) - return t1 tag nextil(t2); - tag = ""; - if(ilcode){ - t1 = eschtml(t1); - tag = ""; - } - ilcode = !ilcode; - return t1 tag nextil(t2); - } - if(tag == "<"){ - # Autolinks - if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ - url = eschtml(substr(t2, 1, RLENGTH - 1)); - t2 = substr(t2, RLENGTH + 1); - linktext = url; - if(match(url, /@/) && !match(url, /^mailto:/)) - url = "mailto:" url; - return t1 "" linktext "" nextil(t2); - } - # Html tags - if(match(t2, /^[A-Za-z\/!][^>]*>/)){ - tag = tag substr(t2, RSTART, RLENGTH); - t2 = substr(t2, RLENGTH + 1); - return t1 tag nextil(t2); - } - return t1 "<" nextil(t2); - } - # Html special entities - if(tag == "&"){ - if(match(t2, /^#?[A-Za-z0-9]+;/)){ - tag = tag substr(t2, RSTART, RLENGTH); - t2 = substr(t2, RLENGTH + 1); - return t1 tag nextil(t2); - } - return t1 "&" nextil(t2); - } - # Images - if(tag == "!["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) - return t1 tag nextil(t2); - match(t2, /^[^\]]*/); - alt = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(match(t2, /^\(/)){ - # Inline - sub(/^\(/, "", t2); - match(t2, /^[^\)]+/); - url = eschtml(substr(t2, 1, RLENGTH)); - t2 = substr(t2, RLENGTH + 2); - title = ""; - if(match(url, /[ ]+\".*\"[ ]*$/)) { - title = substr(url, RSTART, RLENGTH); - url = substr(url, 1, RSTART - 1); - match(title, /\".*\"/); - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; - } - if(match(url, /^<.*>$/)) - url = substr(url, 2, RLENGTH - 2); - return t1 "\""" nextil(t2); - } - else{ - # Referenced - sub(/^ ?\[/, "", t2); - id = alt; - if(match(t2, /^[^\]]+/)) - id = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(ref[id]) - r = ref[id]; - else{ - r = "<<" id; - nr++; - } - return t1 "\""" nextil(t2); - } - } - # Links - if(tag == "["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) - return t1 tag nextil(t2); - match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); - linktext = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(match(t2, /^\(/)){ - # Inline - match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); - url = substr(t2, 2, RLENGTH - 1); - pt2 = substr(t2, RLENGTH + 2); - title = ""; - if(match(url, /[ ]+\".*\"[ ]*$/)) { - title = substr(url, RSTART, RLENGTH); - url = substr(url, 1, RSTART - 1); - match(title, /\".*\"/); - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; - } - if(match(url, /^<.*>$/)) - url = substr(url, 2, RLENGTH - 2); - url = eschtml(url); - return t1 "" nextil(linktext) "" nextil(pt2); - } - else{ - # Referenced - sub(/^ ?\[/, "", t2); - id = linktext; - if(match(t2, /^[^\]]+/)) - id = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(ref[id]) - r = ref[id]; - else{ - r = "<<" id; - nr++; - } - pt2 = t2; - return t1 "" nextil(linktext) "" nextil(pt2); - } - } - # Emphasis - if(match(tag, /[*_]/)){ - ntag = tag; - if(sub("^" tag, "", t2)){ - if(stag[ns] == tag && match(t2, "^" tag)) - t2 = tag t2; - else - ntag = tag tag - } - n = length(ntag); - tag = (n == 2) ? "strong" : "em"; - if(match(t1, / $/) && match(t2, /^ /)) - return t1 tag nextil(t2); - if(stag[ns] == ntag){ - tag = "/" tag; - ns--; - } - else - stag[++ns] = ntag; - tag = "<" tag ">"; - return t1 tag nextil(t2); - } -} - -function inline(t) { - ilcode = 0; - ilcode2 = 0; - ns = 0; - - return nextil(t); -} - -function printp(tag) { - if(!match(text, /^[ ]*$/)){ - text = inline(text); - if(tag != "") - oprint("<" tag ">" text ""); - else - oprint(text); - } - text = ""; -} - -BEGIN { - blank = 0; - code = 0; - hr = 0; - html = 0; - nl = 0; - nr = 0; - otext = ""; - text = ""; - par = "p"; -} - -# References -!code && /^ *\[[^\]]*\]:[ ]+/ { - sub(/^ *\[/, ""); - match($0, /\]/); - id = substr($0, 1, RSTART - 1); - sub(id "\\]:[ ]+", ""); - title = ""; - if(match($0, /\".*\"$/)) - title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); - sub(/[ ]+\".*\"$/, ""); - url = eschtml($0); - ref[id] = url title; - - subref(id); - next; -} - -# html -!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ -isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { - if(code) - oprint(""); - for(; !text && block[nl] == "blockquote"; nl--) - oprint(""); - match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ - isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); - htag = substr($0, 2, RLENGTH - 1); - if(!match($0, "(<\\/" htag ">)|((^
    $)")) - html = 1; - if(html && match($0, /^
    $/ || -(hr && />$/)) { - html = 0; - hr = 0; - oprint($0); - next; -} - -html { - oprint($0); - next; -} - -# List and quote blocks - -# Remove indentation -{ - for(nnl = 0; nnl < nl; nnl++) - if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ - (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) - break; -} -nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } -# Quote blocks -{ - while(sub(/^> /, "")) - nblock[++nnl] = "blockquote"; -} -# Horizontal rules -{ hr = 0; } -(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { - if(code){ - oprint(""); - code = 0; - } - blank = 0; - nnl = 0; - hr = 1; -} -# List items -block[nl] ~ /[ou]l/ && /^$/ { - blank = 1; - next; -} -{ newli = 0; } -!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { - sub(/^ ? ? ?[*+-]( +| )/, ""); - nnl++; - nblock[nnl] = "ul"; - newli = 1; -} -(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { - sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); - nnl++; - nblock[nnl] = "ol"; - newli = 1; -} -newli { - if(blank && nnl == nl && !par) - par = "p"; - blank = 0; - printp(par); - if(nnl == nl && block[nl] == nblock[nl]) - oprint("
  • "); -} -blank && ! /^$/ { - if(match(block[nnl], /[ou]l/) && !par) - par = "p"; - printp(par); - par = "p"; - blank = 0; -} - -# Close old blocks and open new ones -nnl != nl || nblock[nl] != block[nl] { - if(code){ - oprint(""); - code = 0; - } - printp(par); - b = (nnl > nl) ? nblock[nnl] : block[nnl]; - par = (match(b, /[ou]l/)) ? "" : "p"; -} -nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { - for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ - if(match(block[nl], /[ou]l/)) - oprint("
  • "); - oprint(""); - } -} -nnl > nl { - for(; nl < nnl; nl++){ - block[nl + 1] = nblock[nl + 1]; - oprint("<" block[nl + 1] ">"); - if(match(block[nl + 1], /[ou]l/)) - oprint("
  • "); - } -} -hr { - oprint("
    "); - next; -} - -# Code blocks -code && /^$/ { - if(blanK) - oprint(""); - blank = 1; - next; -} -!text && sub(/^( | )/, "") { - if(blanK) - oprint(""); - blank = 0; - if(!code) - oprint("
    ");
    -	code = 1;
    -	$0 = eschtml($0);
    -	oprint($0);
    -	next;
    -}
    -code {
    -	oprint("
    "); - code = 0; -} - -# Setex-style Headers -text && /^=+$/ {printp("h1"); next;} -text && /^-+$/ {printp("h2"); next;} - -# Atx-Style headers -/^#+/ && (!newli || par=="p" || /^##/) { - for(n = 0; n < 6 && sub(/^# */, ""); n++) - sub(/#$/, ""); - par = "h" n; -} - -# Paragraph -/^$/ { - printp(par); - par = "p"; - next; -} - -# Add text -{ text = (text ? text " " : "") $0; } - -END { - if(code){ - oprint(""); - code = 0; - } - printp(par); - for(; nl > 0; nl--){ - if(match(block[nl], /[ou]l/)) - oprint("
  • "); - oprint(""); - } - gsub(/<<[^\"]*/, "", otext); - print(otext); -} diff --git a/bin/contrib/rc-httpd/handlers/authorize b/bin/contrib/rc-httpd/handlers/authorize deleted file mode 100755 index ea4db3e..0000000 --- a/bin/contrib/rc-httpd/handlers/authorize +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/rc -if(~ $REMOTE_USER ''){ - extra_headers=($extra_headers 'WWW-Authenticate: Basic realm="'$"SERVER_NAME'"') - error 401 - exit -} diff --git a/bin/contrib/rc-httpd/handlers/cgi b/bin/contrib/rc-httpd/handlers/cgi deleted file mode 100755 index 2c9a9b9..0000000 --- a/bin/contrib/rc-httpd/handlers/cgi +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/rc -fn filter_headers{ - response=(200 OK) - lines='' - done=false - while(~ $done false){ - line=`{getline} - head=`{echo $line | awk '{print tolower($1)}'} - if(~ $head status:*) - response=`{echo $line | awk '{$1="" ; print}'} - if not if(~ $line '') - done=true - if not - lines=$"lines^$"line^$cr^' -' - } - echo 'HTTP/1.1' $"response^$cr - echo -n $"lines - do_log $response(1) -} - -fn run_cgi { - path=$cgi_path exec $"cgi_bin $params || echo 'Status: 500' -} - -cgi_bin=$1 -cgi_dir=. -if(! ~ $#* 1) - cgi_dir=$*($#*) -if not if(~ $"cgi_bin /*){ - cgi_dir=`{basename -d $"cgi_bin} - cgi_dir=$"cgi_dir -} -if(! ~ $"cgi_bin */*) - cgi_bin=./$"cgi_bin -if(! builtin cd $"cgi_dir >[2]/dev/null || ! test -x $"cgi_bin){ - error 500 - exit -} - -run_cgi | { - filter_headers - emit_extra_headers - echo $cr - exec cat -} diff --git a/bin/contrib/rc-httpd/handlers/dir-index b/bin/contrib/rc-httpd/handlers/dir-index deleted file mode 100755 index 00ff8ce..0000000 --- a/bin/contrib/rc-httpd/handlers/dir-index +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/rc -PATH_INFO=`{echo $PATH_INFO | urldecode.awk} -full_path=$"FS_ROOT^$"PATH_INFO -full_path=$"full_path -if(! test -d $full_path){ - error 404 - exit -} -if(! test -r $full_path -x $full_path){ - error 503 - exit -} -do_log 200 -builtin cd $full_path -if(~ $"NOINDEXFILE ^ $"NOINDEX ''){ - ifile=index.htm* - if(! ~ $ifile(1) *'*'){ - PATH_INFO=$ifile(1) - FS_ROOT='' - exec serve-static - } -} -title=`{echo $SITE_TITLE | sed s,%s,^$"PATH_INFO^,} -title=$"title -lso=() -switch($2){ -case size - # ls has no option to sort by size - # could pipe it through sort, I suppose -case date - lso=-t -} -echo 'HTTP/1.1 200 OK'^$cr -emit_extra_headers -echo 'Content-type: text/html'^$cr -echo $cr -echo ' - -'^$title^' - - -' -echo '

    '^$title^'

    ' -if(! ~ $PATH_INFO /) - echo 'Parent directory' -echo '
    ' -ls -lQ $lso | awk ' -function urlencode(loc){ - # very minimal encoding, just enough for our static-file purposes - url=loc - gsub("%", "%25", url) # this one first! - gsub("\\$", "%24", url) - gsub("&", "%26", url) - gsub("\\+", "%2B", url) - gsub("\\?", "%3F", url) - gsub(" ", "%20", url) - gsub("\"", "%22", url) - gsub("#", "%23", url) - return url -} -function hrsize(size){ - if(size > 1073741824) return sprintf("%.1fGB", size/1073741824) - if(size > 10485760) return sprintf("%iMB", size/1048576) - if(size > 1048576) return sprintf("%.1fMB", size/1048576) - if(size > 10240) return sprintf("%iKB", size/1024) - if(size > 1024) return sprintf("%.1fKB", size/1024) - return sprintf("%iB", size) -} -/^(-|a)/ { - print "" - print "" - print "" - print "" - print "" - $1="" ; $2="" ; $3="" ; $4="" ; $5="" ; $6="" ; $7="" ; $8="" ; $9="" - sub("^ *?", "") - print "" - print "" - $0="" -} -/^d/ { - print "" - print "" - print "" - print "" - print "" - $1="" ; $2="" ; $3="" ; $4="" ; $5="" ; $6="" ; $7="" ; $8="" ; $9="" - sub("^ *?", "") - print "" - print "" -}' -echo '
    "hrsize($6)""$7""$8""$9""$0"
    "$7""$8""$9""$0"/
    - - -' diff --git a/bin/contrib/rc-httpd/handlers/error b/bin/contrib/rc-httpd/handlers/error deleted file mode 100755 index 282d870..0000000 --- a/bin/contrib/rc-httpd/handlers/error +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/rc -# DO NOT make this script callable directly from the web! -fn do_error{ - echo 'HTTP/1.1 '^$1^$cr - emit_extra_headers - echo 'Content-type: text/html'^$cr - echo $cr - echo ' - -'^$1^' - - -

    '^$1^'

    ' - echo $2 - echo '

    rc-httpd at' $SERVER_NAME '' - echo ' - - - ' -} - -fn 401{ - do_error '401 Unauthorized' \ - 'The requested path '^$"location^' requires authorization.' -} - -fn 404{ - do_error '404 Not Found' \ - 'The requested path '^$"location^' was not found on this server.' -} - -fn 500{ - do_error '500 Internal Server Error' \ - 'The server has encountered an internal misconfiguration and is unable to satisfy your request.' -} - -fn 503{ - do_error '503 Forbidden' \ - 'You do not have permission to access '^$"location^' on this server.' -} - -do_log $1 -$1 diff --git a/bin/contrib/rc-httpd/handlers/redirect b/bin/contrib/rc-httpd/handlers/redirect deleted file mode 100755 index e223091..0000000 --- a/bin/contrib/rc-httpd/handlers/redirect +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/rc -if(~ $#2 0){ - error 500 - exit -} -switch($1){ -case perm* - do_log 301 - echo 'HTTP/1.1 301 Moved Permanently'^$cr -case temp* - do_log 302 - echo 'HTTP/1.1 302 Moved Temporarily'^$cr -case seeother - do_log 303 - echo 'HTTP/1.1 303 See Other'^$cr -case * - error 500 - exit -} -echo 'Location: ' ^ $2 ^ $cr -emit_extra_headers -echo 'Content-type: text/html'^$cr -echo $cr -echo '' -if(~ $#3 0) - echo 'Browser did not accept redirect.' -if not - echo $3 -echo 'Click here' -echo '' diff --git a/bin/contrib/rc-httpd/handlers/serve-static b/bin/contrib/rc-httpd/handlers/serve-static deleted file mode 100755 index 00cc70a..0000000 --- a/bin/contrib/rc-httpd/handlers/serve-static +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/rc -full_path=`{echo $"FS_ROOT^$"PATH_INFO | urldecode.awk} -full_path=$"full_path -if(~ $full_path */) - error 503 -if(test -d $full_path){ - redirect perm $"location^'/' \ - 'URL not quite right, and browser did not accept redirect.' - exit -} -if(! test -e $full_path){ - error 404 - exit -} -if(! test -r $full_path){ - error 503 - exit -} -do_log 200 -switch($full_path){ -case *.html *.htm - type=text/html -case *.css - type=text/css -case *.txt - type='text/plain; charset=utf-8' -case *.jpg *.jpeg - type=image/jpeg -case *.gif - type=image/gif -case *.png - type=image/png -case * - type=`{file -m $full_path || file -i $full_path} # GROSS -} -max_age=3600 # 1 hour -echo 'HTTP/1.1 200 OK'^$cr -emit_extra_headers -echo 'Content-type: '^$type^'; charset=utf-8'^$cr -echo 'Content-length: '^`{ls -l $full_path | awk '{print $6}'} -echo 'Cache-control: max-age='^$max_age^$cr -echo $cr -exec cat $full_path diff --git a/bin/contrib/rc-httpd/handlers/static-or-cgi b/bin/contrib/rc-httpd/handlers/static-or-cgi deleted file mode 100755 index 4d8a2d4..0000000 --- a/bin/contrib/rc-httpd/handlers/static-or-cgi +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/rc -cgiargs=$* - -fn error{ - if(~ $1 404) - exec cgi $cgiargs - if not - $rc_httpd_dir/handlers/error $1 -} - -if(~ $location */) - exec cgi $cgiargs -if not - exec serve-static diff --git a/bin/contrib/rc-httpd/handlers/static-or-index b/bin/contrib/rc-httpd/handlers/static-or-index deleted file mode 100755 index f0904f8..0000000 --- a/bin/contrib/rc-httpd/handlers/static-or-index +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/rc -if(~ $PATH_INFO */) - exec dir-index $params -if not - exec serve-static diff --git a/bin/contrib/rc-httpd/lib/urldecode.awk b/bin/contrib/rc-httpd/lib/urldecode.awk deleted file mode 100755 index 1dadd00..0000000 --- a/bin/contrib/rc-httpd/lib/urldecode.awk +++ /dev/null @@ -1,39 +0,0 @@ -# taken from werc -BEGIN { - hextab ["0"] = 0; hextab ["8"] = 8; - hextab ["1"] = 1; hextab ["9"] = 9; - hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10 - hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11; - hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12; - hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13; - hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14; - hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15; -} -{ - decoded = "" - i = 1 - len = length ($0) - while ( i <= len ) { - c = substr ($0, i, 1) - if ( c == "%" ) { - if ( i+2 <= len ) { - c1 = substr ($0, i+1, 1) - c2 = substr ($0, i+2, 1) - if ( hextab [c1] == "" || hextab [c2] == "" ) { - print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2" - } else { - code = 0 + hextab [c1] * 16 + hextab [c2] + 0 - c = sprintf ("%c", code) - i = i + 2 - } - } else { - print "WARNING: invalid % encoding: " substr ($0, i, len - i) - } - } else if ( c == "+" ) { - c = " " - } - decoded = decoded c - ++i - } - printf "%s", decoded -} diff --git a/bin/contrib/rc-httpd/rc-httpd b/bin/contrib/rc-httpd/rc-httpd deleted file mode 100755 index 8e4fad9..0000000 --- a/bin/contrib/rc-httpd/rc-httpd +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/rc -rc_httpd_dir=/home/sl/www/werc/bin/contrib/rc-httpd -libdir = $rc_httpd_dir/lib -path=($PLAN9/bin $rc_httpd_dir/handlers $PATH) -cgi_path=$PLAN9/bin -SERVER_PORT=80 # default for CGI scripts, may be overridden by the Host header -extra_headers='Server: rc-httpd' -cr= - -fn do_log{ - echo `{date} :: $SERVER_NAME :: $request :: \ - $HTTP_USER_AGENT :: $1 :: $HTTP_REFERER >[1=2] -} - -fn emit_extra_headers{ - for(header in $extra_headers) - echo $"header^$cr -} - -fn getline{ read | sed 's/'^$"cr^'$//g' } - -fn terminate{ - echo `{date} connection terminated >[1=2] - exit terminate -} - -fn trim_input{ dd -bs 1 -count $CONTENT_LENGTH } - -request=`{getline} -if(~ $#request 0) - terminate -REQUEST_METHOD=$request(1) -REQUEST_URI=$request(2) -reqlines='' -HTTP_COOKIE='' -REMOTE_USER='' -done=false -chunked=no -while(~ $"done false){ - line=`{getline} - if(~ $#line 0) - done=true - reqlines=$"reqlines$"line' -' - h=`{echo $line | awk '{print tolower($1)}'} - switch($h){ - case '' - done=true - case host: - SERVER_NAME=$line(2) - case referer: - HTTP_REFERER=$line(2) - case user-agent: - HTTP_USER_AGENT=`{echo $line | sed 's;[^:]+:[ ]+;;'} - case content-length: - CONTENT_LENGTH=$line(2) - case content-type: - CONTENT_TYPE=$line(2) - case cookie: - cookie=`{echo $line | sed 's;^[^:]+:[ ]*;;'} - HTTP_COOKIE=$"HTTP_COOKIE^$"cookie^'; ' - case authorization: - REMOTE_USER=`{auth/httpauth $line(3)} - case transfer-encoding: - ~ $line(2) chunked && chunked=yes - } -} -if(~ $REQUEST_URI *://* //*){ - SERVER_NAME=`{echo $REQUEST_URI | sed ' - s;^[^:]+:;; - s;^//([^/]+).*;\1;'} - REQUEST_URI=`{echo $REQUEST_URI | sed ' - s;^[^:]+:;; - s;^//[^/]+/?;/;'} -} -QUERY_STRING=`{echo $REQUEST_URI | sed 's;[^?]*\??;;'} -params=`{echo $QUERY_STRING | sed 's;\+; ;g'} -location=`{echo $REQUEST_URI | sed 's;\?.*;;'} -location=`{echo $location | sed ' - s;[^/]+/\.\./;/;g - s;/\./;/;g - s;//+;/;g -'} -SERVER_NAME=`{echo $SERVER_NAME | sed 's;^(\[[^\]]+\]|[^:]+)\:([0-9]+)$;\1 \2;'} -if(~ $#SERVER_NAME 2){ - SERVER_PORT=$SERVER_NAME(2) - SERVER_NAME=$SERVER_NAME(1) -} -if(~ $REQUEST_METHOD (PUT POST)){ - if(! ~ $"CONTENT_LENGTH '') - trim_input | exec $rc_httpd_dir/select-handler - if not{ - if(~ $chunked yes){ - echo 'HTTP/1.1 411 Length required'^$cr - echo $cr - exit - } - exec $rc_httpd_dir/select-handler - } -} -if not - . $rc_httpd_dir/select-handler diff --git a/bin/contrib/rc-httpd/select-handler b/bin/contrib/rc-httpd/select-handler deleted file mode 100755 index ec819d4..0000000 --- a/bin/contrib/rc-httpd/select-handler +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/rc -rfork n - -# Route requests to werc. -# Change paths to match your system. - -if(~ $SERVER_NAME 9base.werc.cat-v.org) - PLAN9=/usr/local/9base -if(~ $SERVER_NAME frontbase.werc.cat-v.org) - PLAN9=/usr/local/plan9front -if(~ $SERVER_NAME plan9port.werc.cat-v.org) - PLAN9=/usr/local/plan9 - -if(~ $SERVER_NAME *){ - PATH_INFO=$location - FS_ROOT=/home/sl/www/werc/sites/$SERVER_NAME - exec static-or-cgi /home/sl/www/werc/bin/werc.rc -} -if not - error 503 diff --git a/bin/contrib/tcp80 b/bin/contrib/tcp80 deleted file mode 100755 index ae111a0..0000000 --- a/bin/contrib/tcp80 +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/rc -# For use with listen(8). -# Change paths to match your system. -# Eitdit rc-httpd/rc-httpd to match your system. -PLAN9=/usr/local/plan9 -PATH=($PATH /home/sl/www/werc/bin/contrib) -exec /home/sl/www/werc/bin/contrib/rc-httpd/rc-httpd >>[2]/var/log/rc-httpd diff --git a/bin/contrib/urldecode.awk b/bin/contrib/urldecode.awk deleted file mode 100755 index bd791e3..0000000 --- a/bin/contrib/urldecode.awk +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/awk -f -BEGIN { - hextab ["0"] = 0; hextab ["8"] = 8; - hextab ["1"] = 1; hextab ["9"] = 9; - hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10 - hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11; - hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12; - hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13; - hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14; - hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15; -} -{ - decoded = "" - i = 1 - len = length ($0) - while ( i <= len ) { - c = substr ($0, i, 1) - if ( c == "%" ) { - if ( i+2 <= len ) { - c1 = substr ($0, i+1, 1) - c2 = substr ($0, i+2, 1) - if ( hextab [c1] == "" || hextab [c2] == "" ) { - print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2" - } else { - code = 0 + hextab [c1] * 16 + hextab [c2] + 0 - c = sprintf ("%c", code) - i = i + 2 - } - } else { - print "WARNING: invalid % encoding: " substr ($0, i, len - i) - } - } else if ( c == "+" ) { - c = " " - } - decoded = decoded c - ++i - } - print decoded -} diff --git a/bin/contrib/urlencode.awk b/bin/contrib/urlencode.awk deleted file mode 100755 index d4d354d..0000000 --- a/bin/contrib/urlencode.awk +++ /dev/null @@ -1,126 +0,0 @@ -# Taken from http://www.shelldorado.com/scripts/cmds/urlencode -########################################################################## -# Title : urlencode - encode URL data -# Author : Heiner Steven (heiner.steven@odn.de) -# Date : 2000-03-15 -# Requires : awk -# Categories : File Conversion, WWW, CGI -# SCCS-Id. : @(#) urlencode 1.4 06/10/29 -########################################################################## -# Description -# Encode data according to -# RFC 1738: "Uniform Resource Locators (URL)" and -# RFC 1866: "Hypertext Markup Language - 2.0" (HTML) -# -# This encoding is used i.e. for the MIME type -# "application/x-www-form-urlencoded" -# -# Notes -# o The default behaviour is not to encode the line endings. This -# may not be what was intended, because the result will be -# multiple lines of output (which cannot be used in an URL or a -# HTTP "POST" request). If the desired output should be one -# line, use the "-l" option. -# -# o The "-l" option assumes, that the end-of-line is denoted by -# the character LF (ASCII 10). This is not true for Windows or -# Mac systems, where the end of a line is denoted by the two -# characters CR LF (ASCII 13 10). -# We use this for symmetry; data processed in the following way: -# cat | urlencode -l | urldecode -l -# should (and will) result in the original data -# -# o Large lines (or binary files) will break many AWK -# implementations. If you get the message -# awk: record `...' too long -# record number xxx -# consider using GNU AWK (gawk). -# -# o urlencode will always terminate it's output with an EOL -# character -# -# Thanks to Stefan Brozinski for pointing out a bug related to non-standard -# locales. -# -# See also -# urldecode -########################################################################## - -PN=`basename "$0"` # Program name -VER='1.4' - -: ${AWK=awk} - -Usage () { - echo >&2 "$PN - encode URL data, $VER -usage: $PN [-l] [file ...] - -l: encode line endings (result will be one line of output) - -The default is to encode each input line on its own." - exit 1 -} - -Msg () { - for MsgLine - do echo "$PN: $MsgLine" >&2 - done -} - -Fatal () { Msg "$@"; exit 1; } - -set -- `getopt hl "$@" 2>/dev/null` || Usage -[ $# -lt 1 ] && Usage # "getopt" detected an error - -EncodeEOL=no -while [ $# -gt 0 ] -do - case "$1" in - -l) EncodeEOL=yes;; - --) shift; break;; - -h) Usage;; - -*) Usage;; - *) break;; # First file name - esac - shift -done - -LANG=C export LANG -$AWK ' - BEGIN { - # We assume an awk implementation that is just plain dumb. - # We will convert an character to its ASCII value with the - # table ord[], and produce two-digit hexadecimal output - # without the printf("%02X") feature. - - EOL = "%0A" # "end of line" string (encoded) - split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ") - hextab [0] = 0 - for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0 - if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0 - } - { - encoded = "" - for ( i=1; i<=length ($0); ++i ) { - c = substr ($0, i, 1) - if ( c ~ /[a-zA-Z0-9.-]/ ) { - encoded = encoded c # safe character - } else if ( c == " " ) { - encoded = encoded "+" # special handling - } else { - # unsafe character, encode it as a two-digit hex-number - lo = ord [c] % 16 - hi = int (ord [c] / 16); - encoded = encoded "%" hextab [hi] hextab [lo] - } - } - if ( EncodeEOL ) { - printf ("%s", encoded EOL) - } else { - print encoded - } - } - END { - #if ( EncodeEOL ) print "" - } -' "$@" - diff --git a/bin/contrib/webserver.rc b/bin/contrib/webserver.rc deleted file mode 100755 index 8044565..0000000 --- a/bin/contrib/webserver.rc +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/rc - -# A web server in rc by maht -# Originally from http://www.proweb.co.uk/~matt/rc/webserver.rc - -ifs=' ' -request=`{sed 1q} - -url=$request(2) -file=`{echo $url | sed 's/http:\/\/[^\/]*//' | tr -d \012} - -if(test -d $file){ - file=$file ^'/index.html' -} -if(test -e $file) { - response='200' -} -if not { - response='404' - file='404.html' -} - -echo 'HTTP/1.1 ' ^$response -echo 'Date: ' `{date} -echo 'Server: rc shell' -echo 'Content-Length: ' `{cat $file | wc -c | tr -d ' '} -echo 'Content-Type: ' `{file -i $file | awk '{ print $2 }'} -echo 'Connection: close' -echo -cat $file diff --git a/bin/md2html.awk b/bin/md2html.awk new file mode 100755 index 0000000..81d1241 --- /dev/null +++ b/bin/md2html.awk @@ -0,0 +1,427 @@ +#!/bin/awk -f +# +# by: Jesus Galan (yiyus) 2009 +# +# Usage: md2html.awk file.md > file.html +# See: http://4l77.com/src/md2html.awk + +function eschtml(t) { + gsub("&", "\\&", t); + gsub("<", "\\<", t); + return t; +} + +function oprint(t){ + if(nr == 0) + print t; + else + otext = otext "\n" t; +} + +function subref(id){ + for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); + if(nr == 0 && otext) { + print otext; + otext = ""; + } +} + +function nextil(t) { + if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) + return t; + t1 = substr(t, 1, RSTART - 1); + tag = substr(t, RSTART, RLENGTH); + t2 = substr(t, RSTART + RLENGTH); + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + # Backslash escaping + if(tag == "\\"){ + if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ + tag = substr(t2, 1, 1); + t2 = substr(t2, 2); + } + return t1 tag nextil(t2); + } + # Dashes + if(tag == "-"){ + if(sub(/^-/, "", t2)) + tag = "—"; + return t1 tag nextil(t2); + } + # Inline Code + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = ""; + if(ilcode){ + t1 = eschtml(t1); + tag = ""; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "<"){ + # Autolinks + if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ + url = eschtml(substr(t2, 1, RLENGTH - 1)); + t2 = substr(t2, RLENGTH + 1); + linktext = url; + if(match(url, /@/) && !match(url, /^mailto:/)) + url = "mailto:" url; + return t1 "" linktext "" nextil(t2); + } + # Html tags + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + # Html special entities + if(tag == "&"){ + if(match(t2, /^#?[A-Za-z0-9]+;/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "&" nextil(t2); + } + # Images + if(tag == "!["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*/); + alt = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + sub(/^\(/, "", t2); + match(t2, /^[^\)]+/); + url = eschtml(substr(t2, 1, RLENGTH)); + t2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + return t1 "\""" nextil(t2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = alt; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + return t1 "\""" nextil(t2); + } + } + # Links + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); + linktext = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); + url = substr(t2, 2, RLENGTH - 1); + pt2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + url = eschtml(url); + return t1 "" nextil(linktext) "" nextil(pt2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = linktext; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + pt2 = t2; + return t1 "" nextil(linktext) "" nextil(pt2); + } + } + # Emphasis + if(match(tag, /[*_]/)){ + ntag = tag; + if(sub("^" tag, "", t2)){ + if(stag[ns] == tag && match(t2, "^" tag)) + t2 = tag t2; + else + ntag = tag tag + } + n = length(ntag); + tag = (n == 2) ? "strong" : "em"; + if(match(t1, / $/) && match(t2, /^ /)) + return t1 tag nextil(t2); + if(stag[ns] == ntag){ + tag = "/" tag; + ns--; + } + else + stag[++ns] = ntag; + tag = "<" tag ">"; + return t1 tag nextil(t2); + } +} + +function inline(t) { + ilcode = 0; + ilcode2 = 0; + ns = 0; + + return nextil(t); +} + +function printp(tag) { + if(!match(text, /^[ ]*$/)){ + text = inline(text); + if(tag != "") + oprint("<" tag ">" text ""); + else + oprint(text); + } + text = ""; +} + +BEGIN { + blank = 0; + code = 0; + hr = 0; + html = 0; + nl = 0; + nr = 0; + otext = ""; + text = ""; + par = "p"; +} + +# References +!code && /^ *\[[^\]]*\]:[ ]+/ { + sub(/^ *\[/, ""); + match($0, /\]/); + id = substr($0, 1, RSTART - 1); + sub(id "\\]:[ ]+", ""); + title = ""; + if(match($0, /\".*\"$/)) + title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); + sub(/[ ]+\".*\"$/, ""); + url = eschtml($0); + ref[id] = url title; + + subref(id); + next; +} + +# html +!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { + if(code) + oprint("

    "); + for(; !text && block[nl] == "blockquote"; nl--) + oprint(""); + match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ + isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); + htag = substr($0, 2, RLENGTH - 1); + if(!match($0, "(<\\/" htag ">)|((^
    $)")) + html = 1; + if(html && match($0, /^
    $/ || +(hr && />$/)) { + html = 0; + hr = 0; + oprint($0); + next; +} + +html { + oprint($0); + next; +} + +# List and quote blocks + +# Remove indentation +{ + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; +} +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +# Quote blocks +{ + while(sub(/^> /, "")) + nblock[++nnl] = "blockquote"; +} +# Horizontal rules +{ hr = 0; } +(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { + if(code){ + oprint("
    "); + code = 0; + } + blank = 0; + nnl = 0; + hr = 1; +} +# List items +block[nl] ~ /[ou]l/ && /^$/ { + blank = 1; + next; +} +{ newli = 0; } +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { + sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); + nnl++; + nblock[nnl] = "ol"; + newli = 1; +} +newli { + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("
  • "); +} +blank && ! /^$/ { + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; +} + +# Close old blocks and open new ones +nnl != nl || nblock[nl] != block[nl] { + if(code){ + oprint(""); + code = 0; + } + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nnl]; + par = (match(b, /[ou]l/)) ? "" : "p"; +} +nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + if(match(block[nl], /[ou]l/)) + oprint("
  • "); + oprint(""); + } +} +nnl > nl { + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("
  • "); + } +} +hr { + oprint("
    "); + next; +} + +# Code blocks +code && /^$/ { + if(blanK) + oprint(""); + blank = 1; + next; +} +!text && sub(/^( | )/, "") { + if(blanK) + oprint(""); + blank = 0; + if(!code) + oprint("
    ");
    +	code = 1;
    +	$0 = eschtml($0);
    +	oprint($0);
    +	next;
    +}
    +code {
    +	oprint("
    "); + code = 0; +} + +# Setex-style Headers +text && /^=+$/ {printp("h1"); next;} +text && /^-+$/ {printp("h2"); next;} + +# Atx-Style headers +/^#+/ && (!newli || par=="p" || /^##/) { + for(n = 0; n < 6 && sub(/^# */, ""); n++) + sub(/#$/, ""); + par = "h" n; +} + +# Paragraph +/^$/ { + printp(par); + par = "p"; + next; +} + +# Add text +{ text = (text ? text " " : "") $0; } + +END { + if(code){ + oprint(""); + code = 0; + } + printp(par); + for(; nl > 0; nl--){ + if(match(block[nl], /[ou]l/)) + oprint("
  • "); + oprint(""); + } + gsub(/<<[^\"]*/, "", otext); + print(otext); +} diff --git a/bin/werc.rc b/bin/werc.rc index 0d006a3..1f83e96 100755 --- a/bin/werc.rc +++ b/bin/werc.rc @@ -20,7 +20,6 @@ path=(. /bin ./bin) res_tail='' http_content_type='text/html' ll_add handlers_bar_left nav_tree -werc_apps=( apps/* ) werc_root=`{pwd} sitesdir=sites @@ -29,9 +28,6 @@ sitesdir=sites if(test -f etc/initrc.local) . ./etc/initrc.local -for(a in $werc_apps) - . ./$a/app.rc - fn werc_exec_request { site=$SERVER_NAME base_url=http://$site:$SERVER_PORT @@ -61,7 +57,6 @@ fn werc_exec_request { if(~ $local_path */) { if(test -d $local_path) local_path=$local_path^'index' - # XXX: This redir might step on apps with synthetic dirs. if not if(ls `{basename -d $local_path}^* >/dev/null >[2]/dev/null) perm_redirect `{echo $req_path|sed 's,/+$,,'} } -- cgit v1.2.3