#!/usr/bin/perl # Copyright 2008 The Go Authors. All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. # Modified version of RE2's make_perl_groups.pl. # Generate table entries giving character ranges # for POSIX/Perl character classes. Rather than # figure out what the definition is, it is easier to ask # Perl about each letter from 0-128 and write down # its answer. @posixclasses = ( "[:alnum:]", "[:alpha:]", "[:ascii:]", "[:blank:]", "[:cntrl:]", "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]", "[:punct:]", "[:space:]", "[:upper:]", "[:word:]", "[:xdigit:]", ); @perlclasses = ( "\\d", "\\s", "\\w", ); %overrides = ( # Prior to Perl 5.18, \s did not match vertical tab. # RE2 preserves that original behaviour. "\\s:11" => 0, ); sub ComputeClass($) { my @ranges; my ($class) = @_; my $regexp = "[$class]"; my $start = -1; for (my $i=0; $i<=129; $i++) { if ($i == 129) { $i = 256; } if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) { if ($start < 0) { $start = $i; } } else { if ($start >= 0) { push @ranges, [$start, $i-1]; } $start = -1; } } return @ranges; } sub PrintClass($$@) { my ($cname, $name, @ranges) = @_; print "var code$cname = []rune{ /* $name */\n"; for (my $i=0; $i<@ranges; $i++) { my @a = @{$ranges[$i]}; printf "\t0x%x, 0x%x,\n", $a[0], $a[1]; } print "}\n\n"; my $n = @ranges; $negname = $name; if ($negname =~ /:/) { $negname =~ s/:/:^/; } else { $negname =~ y/a-z/A-Z/; } return "\t`$name`: {+1, code$cname},\n" . "\t`$negname`: {-1, code$cname},\n"; } my $gen = 0; sub PrintClasses($@) { my ($cname, @classes) = @_; my @entries; foreach my $cl (@classes) { my @ranges = ComputeClass($cl); push @entries, PrintClass(++$gen, $cl, @ranges); } print "var ${cname}Group = map[string]charGroup{\n"; foreach my $e (@entries) { print $e; } print "}\n"; my $count = @entries; } print <perl_groups.go package syntax EOF PrintClasses("perl", @perlclasses); PrintClasses("posix", @posixclasses);