diff options
author | glenda <glenda@9front.local> | 2020-11-15 15:13:27 +0000 |
---|---|---|
committer | glenda <glenda@9front.local> | 2020-11-15 15:13:27 +0000 |
commit | 39318169e0b50551db511851829f9337c5fa6313 (patch) | |
tree | 65a0ef5c1da9677532fa8105293d017919473057 /bin/contrib |
Import site to git
Diffstat (limited to 'bin/contrib')
-rwxr-xr-x | bin/contrib/fix-rc-scripts | 27 | ||||
-rwxr-xr-x | bin/contrib/hgweb.config | 12 | ||||
-rwxr-xr-x | bin/contrib/hgwebdir.cgi | 47 | ||||
-rwxr-xr-x | bin/contrib/markdown.pl | 1447 | ||||
-rwxr-xr-x | bin/contrib/md2html.awk | 427 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/authorize | 6 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/cgi | 46 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/dir-index | 111 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/error | 43 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/redirect | 30 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/serve-static | 43 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/static-or-cgi | 14 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/handlers/static-or-index | 5 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/lib/urldecode.awk | 39 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/rc-httpd | 102 | ||||
-rwxr-xr-x | bin/contrib/rc-httpd/select-handler | 20 | ||||
-rwxr-xr-x | bin/contrib/tcp80 | 7 | ||||
-rwxr-xr-x | bin/contrib/urldecode.awk | 39 | ||||
-rwxr-xr-x | bin/contrib/urlencode.awk | 126 | ||||
-rwxr-xr-x | bin/contrib/webserver.rc | 30 |
20 files changed, 2621 insertions, 0 deletions
diff --git a/bin/contrib/fix-rc-scripts b/bin/contrib/fix-rc-scripts new file mode 100755 index 0000000..beb21c5 --- /dev/null +++ b/bin/contrib/fix-rc-scripts @@ -0,0 +1,27 @@ +#!/usr/local/plan9/bin/rc + +# Fix rc shell scripts to find rc without launching env every time. +# Invoke with rc and plan9 versions of grep and ed in $PATH + +# If your system lacks which (e.g. some gnu/linux) +# substitute the full path to rc in this line: +rc=/usr/local/plan9/bin/rc +firstline='#!'$"rc + +if(~ $#* 0) files = * +if not files = $* + +myname = `{basename $0} + +for(file in $files) { + if(test -d $file) $0 $file/* + if not if(~ $file *$myname) {} + if not if(sed 1q $file | grep '^#!/.*[/ ]rc$' > /dev/null) { + { + echo 1c + echo $firstline + echo . + echo wq + } | ed $file > /dev/null + } +} diff --git a/bin/contrib/hgweb.config b/bin/contrib/hgweb.config new file mode 100755 index 0000000..fba802b --- /dev/null +++ b/bin/contrib/hgweb.config @@ -0,0 +1,12 @@ +[web] +style = gitweb +allow_archive = bz2 + +#[paths] +#w9 = /gsoc/hg/w9/ + +[collections] +#allow_archive = bz2 zip +/gsoc/hg = /gsoc/hg/ +#/var/hg = /var/hg/ + diff --git a/bin/contrib/hgwebdir.cgi b/bin/contrib/hgwebdir.cgi new file mode 100755 index 0000000..5fe4b16 --- /dev/null +++ b/bin/contrib/hgwebdir.cgi @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# +# An example CGI script to export multiple hgweb repos, edit as necessary + +# send python tracebacks to the browser if an error occurs: +import cgitb +cgitb.enable() + +# adjust python path if not a system-wide install: +#import sys +#sys.path.insert(0, "/path/to/python/lib") + +# If you'd like to serve pages with UTF-8 instead of your default +# locale charset, you can do so by uncommenting the following lines. +# Note that this will cause your .hgrc files to be interpreted in +# UTF-8 and all your repo files to be displayed using UTF-8. +# +#import os +#os.environ["HGENCODING"] = "UTF-8" + +from mercurial.hgweb.hgwebdir_mod import hgwebdir +from mercurial.hgweb.request import wsgiapplication +import mercurial.hgweb.wsgicgi as wsgicgi + +# The config file looks like this. You can have paths to individual +# repos, collections of repos in a directory tree, or both. +# +# [paths] +# virtual/path = /real/path +# virtual/path = /real/path +# +# [collections] +# /prefix/to/strip/off = /root/of/tree/full/of/repos +# +# collections example: say directory tree /foo contains repos /foo/bar, +# /foo/quux/baz. Give this config section: +# [collections] +# /foo = /foo +# Then repos will list as bar and quux/baz. +# +# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples +# or use a dictionary with entries like 'virtual/path': '/real/path' + +def make_web_app(): + return hgwebdir("hgweb.config") + +wsgicgi.launch(wsgiapplication(make_web_app)) diff --git a/bin/contrib/markdown.pl b/bin/contrib/markdown.pl new file mode 100755 index 0000000..3758a87 --- /dev/null +++ b/bin/contrib/markdown.pl @@ -0,0 +1,1447 @@ +#!/usr/bin/env perl +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004 John Gruber +# <http://daringfireball.net/projects/markdown/> +# +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.1'; +# Tue 14 Dec 2004 + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright 2004 John Gruber\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text); + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialChars(), so that any *'s or _'s in the <a> +# and <img> tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); + + $text = _RunBlockGamut($text); + + $text = _UnescapeSpecialChars($text); + + return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + <?(\S+?)>? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + } + + return $text; +} + + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap <p>s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + # <div> + # <div> + # tags for inner block must be indented. + # </div> + # </div> + # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `<div>` and stop at the first `</div>`. + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + </\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + + + # + # Now match more liberally, simply from `\n<tag>` to `</tag>\n` + # + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .*</\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + # Special case just for <hr />. It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + <! + (--.*?--\s*)+ + > + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do Horizontal Rules: + $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + + $text = _DoLists($text); + + $text = _DoCodeBlocks($text); + + $text = _DoBlockQuotes($text); + + # We already ran _HashHTMLBlocks() before, in Markdown(), but that + # was to escape raw HTML in the original Markdown source. This time, + # we're escaping the markup we've just created, so that we don't wrap + # <p> tags around block-level tags. + $text = _HashHTMLBlocks($text); + + $text = _FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoCodeSpans($text); + + $text = _EscapeSpecialChars($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); + + # Make links out of things like `<http://example.com/>` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](<url>). + $text = _DoAutoLinks($text); + + $text = _EncodeAmpsAndAngles($text); + + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; + + return $text; +} + + +sub _EscapeSpecialChars { + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + + $text = ''; # rebuild $text from the tokens +# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. +# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; + + foreach my $cur_token (@$tokens) { + if ($cur_token->[0] eq "tag") { + # Within tags, encode * and _ so they don't conflict + # with their use in Markdown for italics and strong. + # We're replacing each such character with its + # corresponding MD5 checksum value; this is likely + # overkill, but it should prevent us from colliding + # with the escape values by accident. + $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; + $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; + $text .= $cur_token->[1]; + } else { + my $t = $cur_token->[1]; + $t = _EncodeBackslashEscapes($t); + $text .= $t; + } + } + return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML <a> tags. +# + my $text = shift; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<a href=\"$url\""; + if ( defined $g_titles{$link_id} ) { + my $title = $g_titles{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= ">$link_text</a>"; + } + else { + $result = $whole_match; + } + $result; + }xsge; + + # + # Next, inline-style links: [link text](url "optional title") + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + \( # literal paren + [ \t]* + <?(.*?)>? # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<a href=\"$url\""; + + if (defined $title) { + $title =~ s/"/"/g; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + + $result .= ">$link_text</a>"; + + $result; + }xsge; + + return $text; +} + + +sub _DoImages { +# +# Turn Markdown image shortcuts into <img> tags. +# + my $text = shift; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<img src=\"$url\" alt=\"$alt_text\""; + if (defined $g_titles{$link_id}) { + my $title = $g_titles{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $g_empty_element_suffix; + } + else { + # If there's no such link ID, leave intact: + $result = $whole_match; + } + + $result; + }xsge; + + # + # Next, handle inline images:  + # Don't forget: encode * and _ + + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \( # literal paren + [ \t]* + <?(\S+?)>? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $url = $3; + my $title = ''; + if (defined($6)) { + $title = $6; + } + + $alt_text =~ s/"/"/g; + $title =~ s/"/"/g; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<img src=\"$url\" alt=\"$alt_text\""; + if (defined $title) { + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $g_empty_element_suffix; + + $result; + }xsge; + + return $text; +} + + +sub _DoHeaders { + my $text = shift; + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ + "<h1>" . _RunSpanGamut($1) . "</h1>\n\n"; + }egmx; + + $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ + "<h2>" . _RunSpanGamut($1) . "</h2>\n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + \#* # optional closing #'s (not counted) + \n+ + }{ + my $h_level = length($1); + "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n"; + }egmx; + + return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if ($g_list_level) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "</$list_type>\n"; + $result; + }egmx; + } + else { + $text =~ s{ + (?:(?<=\n\n)|\A\n?) + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "</$list_type>\n"; + $result; + }egmx; + } + + + return $text; +} + + +sub _ProcessListItems { +# +# Process the contents of a single ordered or unordered list, splitting it +# into individual list items. +# + + my $list_str = shift; + my $marker_any = shift; + + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $g_list_level++; + + # trim trailing blank lines: + $list_str =~ s/\n{2,}\z/\n/; + + + $list_str =~ s{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ($marker_any) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ($marker_any) [ \t]+)) + }{ + my $item = $4; + my $leading_line = $1; + my $leading_space = $2; + + if ($leading_line or ($item =~ m/\n{2,}/)) { + $item = _RunBlockGamut(_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = _DoLists(_Outdent($item)); + chomp $item; + $item = _RunSpanGamut($item); + } + + "<li>" . $item . "</li>\n"; + }egmx; + + $g_list_level--; + return $list_str; +} + + + +sub _DoCodeBlocks { +# +# Process Markdown `<pre><code>` blocks. +# + + my $text = shift; + + $text =~ s{ + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + }{ + my $codeblock = $1; + my $result; # return value + + $codeblock = _EncodeCode(_Outdent($codeblock)); + $codeblock = _Detab($codeblock); + $codeblock =~ s/\A\n+//; # trim leading newlines + $codeblock =~ s/\s+\z//; # trim trailing whitespace + + $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n"; + + $result; + }egmx; + + return $text; +} + + +sub _DoCodeSpans { +# +# * Backtick quotes are used for <code></code> spans. +# +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: +# +# Just type ``foo `bar` baz`` at the prompt. +# +# Will translate to: +# +# <p>Just type <code>foo `bar` baz</code> at the prompt.</p> +# +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. +# +# * You can use spaces to get literal backticks at the edges: +# +# ... type `` `bar` `` ... +# +# Turns to: +# +# ... type <code>`bar`</code> ... +# + + my $text = shift; + + $text =~ s@ + (`+) # $1 = Opening run of ` + (.+?) # $2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + @ + my $c = "$2"; + $c =~ s/^[ \t]*//g; # leading whitespace + $c =~ s/[ \t]*$//g; # trailing whitespace + $c = _EncodeCode($c); + "<code>$c</code>"; + @egsx; + + return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# + local $_ = shift; + + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; + + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } + } + + + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; + + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; + + return $_; +} + + +sub _DoItalicsAndBold { + my $text = shift; + + # <strong> must go first: + $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } + {<strong>$2</strong>}gsx; + + $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } + {<em>$2</em>}gsx; + + return $text; +} + + +sub _DoBlockQuotes { + my $text = shift; + + $text =~ s{ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = _RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /g; + # These leading spaces screw with <pre> content, so we need to fix that: + $bq =~ s{ + (\s*<pre>.+?</pre>) + }{ + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "<blockquote>\n$bq\n</blockquote>\n\n"; + }egmx; + + + return $text; +} + + +sub _FormParagraphs { +# +# Params: +# $text - string to process with html <p> tags +# + my $text = shift; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap <p> tags. + # + foreach (@grafs) { + unless (defined( $g_html_blocks{$_} )) { + $_ = _RunSpanGamut($_); + s/^([ \t]*)/<p>/; + $_ .= "</p>"; + } + } + + # + # Unhashify HTML blocks + # + foreach (@grafs) { + if (defined( $g_html_blocks{$_} )) { + $_ = $g_html_blocks{$_}; + } + } + + return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + + my $text = shift; + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; + + return $text; +} + + +sub _EncodeBackslashEscapes { +# +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. +# + local $_ = shift; + + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\` !$g_escape_table{'`'}!gx; + s! \\\* !$g_escape_table{'*'}!gx; + s! \\_ !$g_escape_table{'_'}!gx; + s! \\\{ !$g_escape_table{'{'}!gx; + s! \\\} !$g_escape_table{'}'}!gx; + s! \\\[ !$g_escape_table{'['}!gx; + s! \\\] !$g_escape_table{']'}!gx; + s! \\\( !$g_escape_table{'('}!gx; + s! \\\) !$g_escape_table{')'}!gx; + s! \\> !$g_escape_table{'>'}!gx; + s! \\\# !$g_escape_table{'#'}!gx; + s! \\\+ !$g_escape_table{'+'}!gx; + s! \\\- !$g_escape_table{'-'}!gx; + s! \\\. !$g_escape_table{'.'}!gx; + s{ \\! }{$g_escape_table{'!'}}gx; + + return $_; +} + + +sub _DoAutoLinks { + my $text = shift; + + $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi; + + # Email addresses: <address@domain.foo> + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; + + return $text; +} + + +sub _EncodeEmailAddress { +# +# Input: an email address, e.g. "foo@example.com" +# +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: +# +# <a href="mailto:foo@e +# xample.com">foo +# @example.com</a> +# +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: <http://tinyurl.com/yu7ue> +# + + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; + + $addr = qq{<a href="$addr">$addr</a>}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + + return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# + my $text = shift; + + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } + return $text; +} + + +sub _TokenizeHTML { +# +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as <a href="<MTFoo>">, or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. +# +# +# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +# <http://www.bradchoate.com/past/mtregex.php> +# + + my $str = shift; + my $pos = 0; + my $len = length $str; + my @tokens; + + my $depth = 6; + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags + + while ($str =~ m/($match)/g) { + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; + } + push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; + \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# + my $text = shift; + + $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> +# + my $text = shift; + + $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + return $text; +} + + +1; + +__END__ + + +=pod + +=head1 NAME + +B<Markdown> + + +=head1 SYNOPSIS + +B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] + [ I<file> ... ] + + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like <div> and <table> as well). + +For more information about Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + + <br> + +instead of Markdown's default XHTML style tags, e.g.: + + <br /> + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + + support@daringfireball.net + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output Markdown actually produced. + + +=head1 VERSION HISTORY + +See the readme file for detailed release notes for this version. + +1.0.1 - 14 Dec 2004 + +1.0 - 28 Aug 2004 + + +=head1 AUTHOR + + John Gruber + http://daringfireball.net + + PHP port and other contributions by Michel Fortin + http://michelf.com + + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2003-2004 John Gruber +<http://daringfireball.net/> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut diff --git a/bin/contrib/md2html.awk b/bin/contrib/md2html.awk new file mode 100755 index 0000000..81d1241 --- /dev/null +++ b/bin/contrib/md2html.awk @@ -0,0 +1,427 @@ +#!/bin/awk -f +# +# by: Jesus Galan (yiyus) 2009 +# +# Usage: md2html.awk file.md > file.html +# See: http://4l77.com/src/md2html.awk + +function eschtml(t) { + gsub("&", "\\&", t); + gsub("<", "\\<", t); + return t; +} + +function oprint(t){ + if(nr == 0) + print t; + else + otext = otext "\n" t; +} + +function subref(id){ + for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); + if(nr == 0 && otext) { + print otext; + otext = ""; + } +} + +function nextil(t) { + if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) + return t; + t1 = substr(t, 1, RSTART - 1); + tag = substr(t, RSTART, RLENGTH); + t2 = substr(t, RSTART + RLENGTH); + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + # Backslash escaping + if(tag == "\\"){ + if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ + tag = substr(t2, 1, 1); + t2 = substr(t2, 2); + } + return t1 tag nextil(t2); + } + # Dashes + if(tag == "-"){ + if(sub(/^-/, "", t2)) + tag = "—"; + return t1 tag nextil(t2); + } + # Inline Code + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = "<code>"; + if(ilcode){ + t1 = eschtml(t1); + tag = "</code>"; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "<"){ + # Autolinks + if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ + url = eschtml(substr(t2, 1, RLENGTH - 1)); + t2 = substr(t2, RLENGTH + 1); + linktext = url; + if(match(url, /@/) && !match(url, /^mailto:/)) + url = "mailto:" url; + return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2); + } + # Html tags + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + # Html special entities + if(tag == "&"){ + if(match(t2, /^#?[A-Za-z0-9]+;/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "&" nextil(t2); + } + # Images + if(tag == "!["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*/); + alt = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + sub(/^\(/, "", t2); + match(t2, /^[^\)]+/); + url = eschtml(substr(t2, 1, RLENGTH)); + t2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = alt; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2); + } + } + # Links + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); + linktext = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); + url = substr(t2, 2, RLENGTH - 1); + pt2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + url = eschtml(url); + return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = linktext; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + pt2 = t2; + return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2); + } + } + # Emphasis + if(match(tag, /[*_]/)){ + ntag = tag; + if(sub("^" tag, "", t2)){ + if(stag[ns] == tag && match(t2, "^" tag)) + t2 = tag t2; + else + ntag = tag tag + } + n = length(ntag); + tag = (n == 2) ? "strong" : "em"; + if(match(t1, / $/) && match(t2, /^ /)) + return t1 tag nextil(t2); + if(stag[ns] == ntag){ + tag = "/" tag; + ns--; + } + else + stag[++ns] = ntag; + tag = "<" tag ">"; + return t1 tag nextil(t2); + } +} + +function inline(t) { + ilcode = 0; + ilcode2 = 0; + ns = 0; + + return nextil(t); +} + +function printp(tag) { + if(!match(text, /^[ ]*$/)){ + text = inline(text); + if(tag != "") + oprint("<" tag ">" text "</" tag ">"); + else + oprint(text); + } + text = ""; +} + +BEGIN { + blank = 0; + code = 0; + hr = 0; + html = 0; + nl = 0; + nr = 0; + otext = ""; + text = ""; + par = "p"; +} + +# References +!code && /^ *\[[^\]]*\]:[ ]+/ { + sub(/^ *\[/, ""); + match($0, /\]/); + id = substr($0, 1, RSTART - 1); + sub(id "\\]:[ ]+", ""); + title = ""; + if(match($0, /\".*\"$/)) + title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); + sub(/[ ]+\".*\"$/, ""); + url = eschtml($0); + ref[id] = url title; + + subref(id); + next; +} + +# html +!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { + if(code) + oprint("</pre></code>"); + for(; !text && block[nl] == "blockquote"; nl--) + oprint("</blockquote>"); + match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ + isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); + htag = substr($0, 2, RLENGTH - 1); + if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)")) + html = 1; + if(html && match($0, /^<hr/)) + hr = 1; + oprint($0); + next; +} + +html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ || +(hr && />$/)) { + html = 0; + hr = 0; + oprint($0); + next; +} + +html { + oprint($0); + next; +} + +# List and quote blocks + +# Remove indentation +{ + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; +} +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +# Quote blocks +{ + while(sub(/^> /, "")) + nblock[++nnl] = "blockquote"; +} +# Horizontal rules +{ hr = 0; } +(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { + if(code){ + oprint("</pre></code>"); + code = 0; + } + blank = 0; + nnl = 0; + hr = 1; +} +# List items +block[nl] ~ /[ou]l/ && /^$/ { + blank = 1; + next; +} +{ newli = 0; } +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { + sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); + nnl++; + nblock[nnl] = "ol"; + newli = 1; +} +newli { + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("</li><li>"); +} +blank && ! /^$/ { + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; +} + +# Close old blocks and open new ones +nnl != nl || nblock[nl] != block[nl] { + if(code){ + oprint("</pre></code>"); + code = 0; + } + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nnl]; + par = (match(b, /[ou]l/)) ? "" : "p"; +} +nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + if(match(block[nl], /[ou]l/)) + oprint("</li>"); + oprint("</" block[nl] ">"); + } +} +nnl > nl { + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("<li>"); + } +} +hr { + oprint("<hr>"); + next; +} + +# Code blocks +code && /^$/ { + if(blanK) + oprint(""); + blank = 1; + next; +} +!text && sub(/^( | )/, "") { + if(blanK) + oprint(""); + blank = 0; + if(!code) + oprint("<code><pre>"); + code = 1; + $0 = eschtml($0); + oprint($0); + next; +} +code { + oprint("</pre></code>"); + code = 0; +} + +# Setex-style Headers +text && /^=+$/ {printp("h1"); next;} +text && /^-+$/ {printp("h2"); next;} + +# Atx-Style headers +/^#+/ && (!newli || par=="p" || /^##/) { + for(n = 0; n < 6 && sub(/^# */, ""); n++) + sub(/#$/, ""); + par = "h" n; +} + +# Paragraph +/^$/ { + printp(par); + par = "p"; + next; +} + +# Add text +{ text = (text ? text " " : "") $0; } + +END { + if(code){ + oprint("</pre></code>"); + code = 0; + } + printp(par); + for(; nl > 0; nl--){ + if(match(block[nl], /[ou]l/)) + oprint("</li>"); + oprint("</" block[nl] ">"); + } + gsub(/<<[^\"]*/, "", otext); + print(otext); +} diff --git a/bin/contrib/rc-httpd/handlers/authorize b/bin/contrib/rc-httpd/handlers/authorize new file mode 100755 index 0000000..ea4db3e --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/authorize @@ -0,0 +1,6 @@ +#!/bin/rc +if(~ $REMOTE_USER ''){ + extra_headers=($extra_headers 'WWW-Authenticate: Basic realm="'$"SERVER_NAME'"') + error 401 + exit +} diff --git a/bin/contrib/rc-httpd/handlers/cgi b/bin/contrib/rc-httpd/handlers/cgi new file mode 100755 index 0000000..2c9a9b9 --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/cgi @@ -0,0 +1,46 @@ +#!/bin/rc +fn filter_headers{ + response=(200 OK) + lines='' + done=false + while(~ $done false){ + line=`{getline} + head=`{echo $line | awk '{print tolower($1)}'} + if(~ $head status:*) + response=`{echo $line | awk '{$1="" ; print}'} + if not if(~ $line '') + done=true + if not + lines=$"lines^$"line^$cr^' +' + } + echo 'HTTP/1.1' $"response^$cr + echo -n $"lines + do_log $response(1) +} + +fn run_cgi { + path=$cgi_path exec $"cgi_bin $params || echo 'Status: 500' +} + +cgi_bin=$1 +cgi_dir=. +if(! ~ $#* 1) + cgi_dir=$*($#*) +if not if(~ $"cgi_bin /*){ + cgi_dir=`{basename -d $"cgi_bin} + cgi_dir=$"cgi_dir +} +if(! ~ $"cgi_bin */*) + cgi_bin=./$"cgi_bin +if(! builtin cd $"cgi_dir >[2]/dev/null || ! test -x $"cgi_bin){ + error 500 + exit +} + +run_cgi | { + filter_headers + emit_extra_headers + echo $cr + exec cat +} diff --git a/bin/contrib/rc-httpd/handlers/dir-index b/bin/contrib/rc-httpd/handlers/dir-index new file mode 100755 index 0000000..00ff8ce --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/dir-index @@ -0,0 +1,111 @@ +#!/bin/rc +PATH_INFO=`{echo $PATH_INFO | urldecode.awk} +full_path=$"FS_ROOT^$"PATH_INFO +full_path=$"full_path +if(! test -d $full_path){ + error 404 + exit +} +if(! test -r $full_path -x $full_path){ + error 503 + exit +} +do_log 200 +builtin cd $full_path +if(~ $"NOINDEXFILE ^ $"NOINDEX ''){ + ifile=index.htm* + if(! ~ $ifile(1) *'*'){ + PATH_INFO=$ifile(1) + FS_ROOT='' + exec serve-static + } +} +title=`{echo $SITE_TITLE | sed s,%s,^$"PATH_INFO^,} +title=$"title +lso=() +switch($2){ +case size + # ls has no option to sort by size + # could pipe it through sort, I suppose +case date + lso=-t +} +echo 'HTTP/1.1 200 OK'^$cr +emit_extra_headers +echo 'Content-type: text/html'^$cr +echo $cr +echo '<html> +<head> +<title>'^$title^'</title> +<style type="text/css"> + .size { + text-align: right; + padding-right: 4pt; + } + .day { + text-align: right; + padding-right: 3pt; + } + .datetime { + text-align: right; + } + .name { + text-align: right; + padding-left: 3pt; + } +</style> +</head> +<body>' +echo '<h1>'^$title^'</h1>' +if(! ~ $PATH_INFO /) + echo '<a href="../">Parent directory</a>' +echo '<table>' +ls -lQ $lso | awk ' +function urlencode(loc){ + # very minimal encoding, just enough for our static-file purposes + url=loc + gsub("%", "%25", url) # this one first! + gsub("\\$", "%24", url) + gsub("&", "%26", url) + gsub("\\+", "%2B", url) + gsub("\\?", "%3F", url) + gsub(" ", "%20", url) + gsub("\"", "%22", url) + gsub("#", "%23", url) + return url +} +function hrsize(size){ + if(size > 1073741824) return sprintf("%.1fGB", size/1073741824) + if(size > 10485760) return sprintf("%iMB", size/1048576) + if(size > 1048576) return sprintf("%.1fMB", size/1048576) + if(size > 10240) return sprintf("%iKB", size/1024) + if(size > 1024) return sprintf("%.1fKB", size/1024) + return sprintf("%iB", size) +} +/^(-|a)/ { + print "<tr>" + print "<td class=\"size\">"hrsize($6)"</td>" + print "<td class=\"month\">"$7"</td>" + print "<td class=\"day\">"$8"</td>" + print "<td class=\"datetime\">"$9"</td>" + $1="" ; $2="" ; $3="" ; $4="" ; $5="" ; $6="" ; $7="" ; $8="" ; $9="" + sub("^ *?", "") + print "<td><a class=\"file name\" href=\""urlencode($0)"\">"$0"</a></td>" + print "</tr>" + $0="" +} +/^d/ { + print "<tr>" + print "<td class=\"size\"> </td>" + print "<td class=\"month\">"$7"</td>" + print "<td class=\"day\">"$8"</td>" + print "<td class=\"datetime\">"$9"</td>" + $1="" ; $2="" ; $3="" ; $4="" ; $5="" ; $6="" ; $7="" ; $8="" ; $9="" + sub("^ *?", "") + print "<td><a class=\"dir name\" href=\""urlencode($0)"/\">"$0"/</a></td>" + print "</tr>" +}' +echo '</table> + +</body> +</html>' diff --git a/bin/contrib/rc-httpd/handlers/error b/bin/contrib/rc-httpd/handlers/error new file mode 100755 index 0000000..282d870 --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/error @@ -0,0 +1,43 @@ +#!/bin/rc +# DO NOT make this script callable directly from the web! +fn do_error{ + echo 'HTTP/1.1 '^$1^$cr + emit_extra_headers + echo 'Content-type: text/html'^$cr + echo $cr + echo '<html> +<head> +<title>'^$1^'</title> +</head> +<body> +<h1>'^$1^'</h1>' + echo $2 + echo '<p><i>rc-httpd at' $SERVER_NAME '</i>' + echo ' + </body> + </html> + ' +} + +fn 401{ + do_error '401 Unauthorized' \ + 'The requested path '^$"location^' requires authorization.' +} + +fn 404{ + do_error '404 Not Found' \ + 'The requested path '^$"location^' was not found on this server.' +} + +fn 500{ + do_error '500 Internal Server Error' \ + 'The server has encountered an internal misconfiguration and is unable to satisfy your request.' +} + +fn 503{ + do_error '503 Forbidden' \ + 'You do not have permission to access '^$"location^' on this server.' +} + +do_log $1 +$1 diff --git a/bin/contrib/rc-httpd/handlers/redirect b/bin/contrib/rc-httpd/handlers/redirect new file mode 100755 index 0000000..e223091 --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/redirect @@ -0,0 +1,30 @@ +#!/bin/rc +if(~ $#2 0){ + error 500 + exit +} +switch($1){ +case perm* + do_log 301 + echo 'HTTP/1.1 301 Moved Permanently'^$cr +case temp* + do_log 302 + echo 'HTTP/1.1 302 Moved Temporarily'^$cr +case seeother + do_log 303 + echo 'HTTP/1.1 303 See Other'^$cr +case * + error 500 + exit +} +echo 'Location: ' ^ $2 ^ $cr +emit_extra_headers +echo 'Content-type: text/html'^$cr +echo $cr +echo '<html><body>' +if(~ $#3 0) + echo 'Browser did not accept redirect.' +if not + echo $3 +echo '<a href="'^$"location^'/">Click here</a>' +echo '</body></html>' diff --git a/bin/contrib/rc-httpd/handlers/serve-static b/bin/contrib/rc-httpd/handlers/serve-static new file mode 100755 index 0000000..00cc70a --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/serve-static @@ -0,0 +1,43 @@ +#!/bin/rc +full_path=`{echo $"FS_ROOT^$"PATH_INFO | urldecode.awk} +full_path=$"full_path +if(~ $full_path */) + error 503 +if(test -d $full_path){ + redirect perm $"location^'/' \ + 'URL not quite right, and browser did not accept redirect.' + exit +} +if(! test -e $full_path){ + error 404 + exit +} +if(! test -r $full_path){ + error 503 + exit +} +do_log 200 +switch($full_path){ +case *.html *.htm + type=text/html +case *.css + type=text/css +case *.txt + type='text/plain; charset=utf-8' +case *.jpg *.jpeg + type=image/jpeg +case *.gif + type=image/gif +case *.png + type=image/png +case * + type=`{file -m $full_path || file -i $full_path} # GROSS +} +max_age=3600 # 1 hour +echo 'HTTP/1.1 200 OK'^$cr +emit_extra_headers +echo 'Content-type: '^$type^'; charset=utf-8'^$cr +echo 'Content-length: '^`{ls -l $full_path | awk '{print $6}'} +echo 'Cache-control: max-age='^$max_age^$cr +echo $cr +exec cat $full_path diff --git a/bin/contrib/rc-httpd/handlers/static-or-cgi b/bin/contrib/rc-httpd/handlers/static-or-cgi new file mode 100755 index 0000000..4d8a2d4 --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/static-or-cgi @@ -0,0 +1,14 @@ +#!/bin/rc +cgiargs=$* + +fn error{ + if(~ $1 404) + exec cgi $cgiargs + if not + $rc_httpd_dir/handlers/error $1 +} + +if(~ $location */) + exec cgi $cgiargs +if not + exec serve-static diff --git a/bin/contrib/rc-httpd/handlers/static-or-index b/bin/contrib/rc-httpd/handlers/static-or-index new file mode 100755 index 0000000..f0904f8 --- /dev/null +++ b/bin/contrib/rc-httpd/handlers/static-or-index @@ -0,0 +1,5 @@ +#!/bin/rc +if(~ $PATH_INFO */) + exec dir-index $params +if not + exec serve-static diff --git a/bin/contrib/rc-httpd/lib/urldecode.awk b/bin/contrib/rc-httpd/lib/urldecode.awk new file mode 100755 index 0000000..1dadd00 --- /dev/null +++ b/bin/contrib/rc-httpd/lib/urldecode.awk @@ -0,0 +1,39 @@ +# taken from werc +BEGIN { + hextab ["0"] = 0; hextab ["8"] = 8; + hextab ["1"] = 1; hextab ["9"] = 9; + hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10 + hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11; + hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12; + hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13; + hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14; + hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15; +} +{ + decoded = "" + i = 1 + len = length ($0) + while ( i <= len ) { + c = substr ($0, i, 1) + if ( c == "%" ) { + if ( i+2 <= len ) { + c1 = substr ($0, i+1, 1) + c2 = substr ($0, i+2, 1) + if ( hextab [c1] == "" || hextab [c2] == "" ) { + print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2" + } else { + code = 0 + hextab [c1] * 16 + hextab [c2] + 0 + c = sprintf ("%c", code) + i = i + 2 + } + } else { + print "WARNING: invalid % encoding: " substr ($0, i, len - i) + } + } else if ( c == "+" ) { + c = " " + } + decoded = decoded c + ++i + } + printf "%s", decoded +} diff --git a/bin/contrib/rc-httpd/rc-httpd b/bin/contrib/rc-httpd/rc-httpd new file mode 100755 index 0000000..8e4fad9 --- /dev/null +++ b/bin/contrib/rc-httpd/rc-httpd @@ -0,0 +1,102 @@ +#!/bin/rc +rc_httpd_dir=/home/sl/www/werc/bin/contrib/rc-httpd +libdir = $rc_httpd_dir/lib +path=($PLAN9/bin $rc_httpd_dir/handlers $PATH) +cgi_path=$PLAN9/bin +SERVER_PORT=80 # default for CGI scripts, may be overridden by the Host header +extra_headers='Server: rc-httpd' +cr=
+ +fn do_log{ + echo `{date} :: $SERVER_NAME :: $request :: \ + $HTTP_USER_AGENT :: $1 :: $HTTP_REFERER >[1=2] +} + +fn emit_extra_headers{ + for(header in $extra_headers) + echo $"header^$cr +} + +fn getline{ read | sed 's/'^$"cr^'$//g' } + +fn terminate{ + echo `{date} connection terminated >[1=2] + exit terminate +} + +fn trim_input{ dd -bs 1 -count $CONTENT_LENGTH } + +request=`{getline} +if(~ $#request 0) + terminate +REQUEST_METHOD=$request(1) +REQUEST_URI=$request(2) +reqlines='' +HTTP_COOKIE='' +REMOTE_USER='' +done=false +chunked=no +while(~ $"done false){ + line=`{getline} + if(~ $#line 0) + done=true + reqlines=$"reqlines$"line' +' + h=`{echo $line | awk '{print tolower($1)}'} + switch($h){ + case '' + done=true + case host: + SERVER_NAME=$line(2) + case referer: + HTTP_REFERER=$line(2) + case user-agent: + HTTP_USER_AGENT=`{echo $line | sed 's;[^:]+:[ ]+;;'} + case content-length: + CONTENT_LENGTH=$line(2) + case content-type: + CONTENT_TYPE=$line(2) + case cookie: + cookie=`{echo $line | sed 's;^[^:]+:[ ]*;;'} + HTTP_COOKIE=$"HTTP_COOKIE^$"cookie^'; ' + case authorization: + REMOTE_USER=`{auth/httpauth $line(3)} + case transfer-encoding: + ~ $line(2) chunked && chunked=yes + } +} +if(~ $REQUEST_URI *://* //*){ + SERVER_NAME=`{echo $REQUEST_URI | sed ' + s;^[^:]+:;; + s;^//([^/]+).*;\1;'} + REQUEST_URI=`{echo $REQUEST_URI | sed ' + s;^[^:]+:;; + s;^//[^/]+/?;/;'} +} +QUERY_STRING=`{echo $REQUEST_URI | sed 's;[^?]*\??;;'} +params=`{echo $QUERY_STRING | sed 's;\+; ;g'} +location=`{echo $REQUEST_URI | sed 's;\?.*;;'} +location=`{echo $location | sed ' + s;[^/]+/\.\./;/;g + s;/\./;/;g + s;//+;/;g +'} +SERVER_NAME=`{echo $SERVER_NAME | sed 's;^(\[[^\]]+\]|[^:]+)\:([0-9]+)$;\1 \2;'} +if(~ $#SERVER_NAME 2){ + SERVER_PORT=$SERVER_NAME(2) + SERVER_NAME=$SERVER_NAME(1) +} +if(~ $REQUEST_METHOD (PUT POST)){ + if(! ~ $"CONTENT_LENGTH '') + trim_input | exec $rc_httpd_dir/select-handler + if not{ + if(~ $chunked yes){ + echo 'HTTP/1.1 411 Length required'^$cr + echo $cr + exit + } + exec $rc_httpd_dir/select-handler + } +} +if not + . $rc_httpd_dir/select-handler diff --git a/bin/contrib/rc-httpd/select-handler b/bin/contrib/rc-httpd/select-handler new file mode 100755 index 0000000..ec819d4 --- /dev/null +++ b/bin/contrib/rc-httpd/select-handler @@ -0,0 +1,20 @@ +#!/bin/rc +rfork n + +# Route requests to werc. +# Change paths to match your system. + +if(~ $SERVER_NAME 9base.werc.cat-v.org) + PLAN9=/usr/local/9base +if(~ $SERVER_NAME frontbase.werc.cat-v.org) + PLAN9=/usr/local/plan9front +if(~ $SERVER_NAME plan9port.werc.cat-v.org) + PLAN9=/usr/local/plan9 + +if(~ $SERVER_NAME *){ + PATH_INFO=$location + FS_ROOT=/home/sl/www/werc/sites/$SERVER_NAME + exec static-or-cgi /home/sl/www/werc/bin/werc.rc +} +if not + error 503 diff --git a/bin/contrib/tcp80 b/bin/contrib/tcp80 new file mode 100755 index 0000000..ae111a0 --- /dev/null +++ b/bin/contrib/tcp80 @@ -0,0 +1,7 @@ +#!/bin/rc +# For use with listen(8). +# Change paths to match your system. +# Eitdit rc-httpd/rc-httpd to match your system. +PLAN9=/usr/local/plan9 +PATH=($PATH /home/sl/www/werc/bin/contrib) +exec /home/sl/www/werc/bin/contrib/rc-httpd/rc-httpd >>[2]/var/log/rc-httpd diff --git a/bin/contrib/urldecode.awk b/bin/contrib/urldecode.awk new file mode 100755 index 0000000..bd791e3 --- /dev/null +++ b/bin/contrib/urldecode.awk @@ -0,0 +1,39 @@ +#!/bin/awk -f +BEGIN { + hextab ["0"] = 0; hextab ["8"] = 8; + hextab ["1"] = 1; hextab ["9"] = 9; + hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10 + hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11; + hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12; + hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13; + hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14; + hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15; +} +{ + decoded = "" + i = 1 + len = length ($0) + while ( i <= len ) { + c = substr ($0, i, 1) + if ( c == "%" ) { + if ( i+2 <= len ) { + c1 = substr ($0, i+1, 1) + c2 = substr ($0, i+2, 1) + if ( hextab [c1] == "" || hextab [c2] == "" ) { + print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2" + } else { + code = 0 + hextab [c1] * 16 + hextab [c2] + 0 + c = sprintf ("%c", code) + i = i + 2 + } + } else { + print "WARNING: invalid % encoding: " substr ($0, i, len - i) + } + } else if ( c == "+" ) { + c = " " + } + decoded = decoded c + ++i + } + print decoded +} diff --git a/bin/contrib/urlencode.awk b/bin/contrib/urlencode.awk new file mode 100755 index 0000000..d4d354d --- /dev/null +++ b/bin/contrib/urlencode.awk @@ -0,0 +1,126 @@ +# Taken from http://www.shelldorado.com/scripts/cmds/urlencode +########################################################################## +# Title : urlencode - encode URL data +# Author : Heiner Steven (heiner.steven@odn.de) +# Date : 2000-03-15 +# Requires : awk +# Categories : File Conversion, WWW, CGI +# SCCS-Id. : @(#) urlencode 1.4 06/10/29 +########################################################################## +# Description +# Encode data according to +# RFC 1738: "Uniform Resource Locators (URL)" and +# RFC 1866: "Hypertext Markup Language - 2.0" (HTML) +# +# This encoding is used i.e. for the MIME type +# "application/x-www-form-urlencoded" +# +# Notes +# o The default behaviour is not to encode the line endings. This +# may not be what was intended, because the result will be +# multiple lines of output (which cannot be used in an URL or a +# HTTP "POST" request). If the desired output should be one +# line, use the "-l" option. +# +# o The "-l" option assumes, that the end-of-line is denoted by +# the character LF (ASCII 10). This is not true for Windows or +# Mac systems, where the end of a line is denoted by the two +# characters CR LF (ASCII 13 10). +# We use this for symmetry; data processed in the following way: +# cat | urlencode -l | urldecode -l +# should (and will) result in the original data +# +# o Large lines (or binary files) will break many AWK +# implementations. If you get the message +# awk: record `...' too long +# record number xxx +# consider using GNU AWK (gawk). +# +# o urlencode will always terminate it's output with an EOL +# character +# +# Thanks to Stefan Brozinski for pointing out a bug related to non-standard +# locales. +# +# See also +# urldecode +########################################################################## + +PN=`basename "$0"` # Program name +VER='1.4' + +: ${AWK=awk} + +Usage () { + echo >&2 "$PN - encode URL data, $VER +usage: $PN [-l] [file ...] + -l: encode line endings (result will be one line of output) + +The default is to encode each input line on its own." + exit 1 +} + +Msg () { + for MsgLine + do echo "$PN: $MsgLine" >&2 + done +} + +Fatal () { Msg "$@"; exit 1; } + +set -- `getopt hl "$@" 2>/dev/null` || Usage +[ $# -lt 1 ] && Usage # "getopt" detected an error + +EncodeEOL=no +while [ $# -gt 0 ] +do + case "$1" in + -l) EncodeEOL=yes;; + --) shift; break;; + -h) Usage;; + -*) Usage;; + *) break;; # First file name + esac + shift +done + +LANG=C export LANG +$AWK ' + BEGIN { + # We assume an awk implementation that is just plain dumb. + # We will convert an character to its ASCII value with the + # table ord[], and produce two-digit hexadecimal output + # without the printf("%02X") feature. + + EOL = "%0A" # "end of line" string (encoded) + split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ") + hextab [0] = 0 + for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0 + if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0 + } + { + encoded = "" + for ( i=1; i<=length ($0); ++i ) { + c = substr ($0, i, 1) + if ( c ~ /[a-zA-Z0-9.-]/ ) { + encoded = encoded c # safe character + } else if ( c == " " ) { + encoded = encoded "+" # special handling + } else { + # unsafe character, encode it as a two-digit hex-number + lo = ord [c] % 16 + hi = int (ord [c] / 16); + encoded = encoded "%" hextab [hi] hextab [lo] + } + } + if ( EncodeEOL ) { + printf ("%s", encoded EOL) + } else { + print encoded + } + } + END { + #if ( EncodeEOL ) print "" + } +' "$@" + diff --git a/bin/contrib/webserver.rc b/bin/contrib/webserver.rc new file mode 100755 index 0000000..8044565 --- /dev/null +++ b/bin/contrib/webserver.rc @@ -0,0 +1,30 @@ +#!/bin/rc + +# A web server in rc by maht +# Originally from http://www.proweb.co.uk/~matt/rc/webserver.rc + +ifs=' ' +request=`{sed 1q} + +url=$request(2) +file=`{echo $url | sed 's/http:\/\/[^\/]*//' | tr -d \012} + +if(test -d $file){ + file=$file ^'/index.html' +} +if(test -e $file) { + response='200' +} +if not { + response='404' + file='404.html' +} + +echo 'HTTP/1.1 ' ^$response +echo 'Date: ' `{date} +echo 'Server: rc shell' +echo 'Content-Length: ' `{cat $file | wc -c | tr -d ' '} +echo 'Content-Type: ' `{file -i $file | awk '{ print $2 }'} +echo 'Connection: close' +echo +cat $file |