...
Run Format

Text file src/regexp/syntax/make_perl_groups.pl

Documentation: regexp/syntax

     1	#!/usr/bin/perl
     2	# Copyright 2008 The Go Authors. All rights reserved.
     3	# Use of this source code is governed by a BSD-style
     4	# license that can be found in the LICENSE file.
     5	
     6	# Modified version of RE2's make_perl_groups.pl.
     7	
     8	# Generate table entries giving character ranges
     9	# for POSIX/Perl character classes.  Rather than
    10	# figure out what the definition is, it is easier to ask
    11	# Perl about each letter from 0-128 and write down
    12	# its answer.
    13	
    14	@posixclasses = (
    15		"[:alnum:]",
    16		"[:alpha:]",
    17		"[:ascii:]",
    18		"[:blank:]",
    19		"[:cntrl:]",
    20		"[:digit:]",
    21		"[:graph:]",
    22		"[:lower:]",
    23		"[:print:]",
    24		"[:punct:]",
    25		"[:space:]",
    26		"[:upper:]",
    27		"[:word:]",
    28		"[:xdigit:]",
    29	);
    30	
    31	@perlclasses = (
    32		"\\d",
    33		"\\s",
    34		"\\w",
    35	);
    36	
    37	%overrides = (
    38		# Prior to Perl 5.18, \s did not match vertical tab.
    39		# RE2 preserves that original behaviour.
    40		"\\s:11" => 0,
    41	);
    42	
    43	sub ComputeClass($) {
    44	  my @ranges;
    45	  my ($class) = @_;
    46	  my $regexp = "[$class]";
    47	  my $start = -1;
    48	  for (my $i=0; $i<=129; $i++) {
    49	    if ($i == 129) { $i = 256; }
    50	    if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) {
    51	      if ($start < 0) {
    52	        $start = $i;
    53	      }
    54	    } else {
    55	      if ($start >= 0) {
    56	        push @ranges, [$start, $i-1];
    57	      }
    58	      $start = -1;
    59	    }
    60	  }
    61	  return @ranges;
    62	}
    63	
    64	sub PrintClass($$@) {
    65	  my ($cname, $name, @ranges) = @_;
    66	  print "var code$cname = []rune{  /* $name */\n";
    67	  for (my $i=0; $i<@ranges; $i++) {
    68	    my @a = @{$ranges[$i]};
    69	    printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
    70	  }
    71	  print "}\n\n";
    72	  my $n = @ranges;
    73	  $negname = $name;
    74	  if ($negname =~ /:/) {
    75	    $negname =~ s/:/:^/;
    76	  } else {
    77	    $negname =~ y/a-z/A-Z/;
    78	  }
    79	  return "\t`$name`: {+1, code$cname},\n" .
    80	  	"\t`$negname`: {-1, code$cname},\n";
    81	}
    82	
    83	my $gen = 0;
    84	
    85	sub PrintClasses($@) {
    86	  my ($cname, @classes) = @_;
    87	  my @entries;
    88	  foreach my $cl (@classes) {
    89	    my @ranges = ComputeClass($cl);
    90	    push @entries, PrintClass(++$gen, $cl, @ranges);
    91	  }
    92	  print "var ${cname}Group = map[string]charGroup{\n";
    93	  foreach my $e (@entries) {
    94	    print $e;
    95	  }
    96	  print "}\n";
    97	  my $count = @entries;
    98	}
    99	
   100	print <<EOF;
   101	// Copyright 2013 The Go Authors. All rights reserved.
   102	// Use of this source code is governed by a BSD-style
   103	// license that can be found in the LICENSE file.
   104	
   105	// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
   106	// make_perl_groups.pl >perl_groups.go
   107	
   108	package syntax
   109	
   110	EOF
   111	
   112	PrintClasses("perl", @perlclasses);
   113	PrintClasses("posix", @posixclasses);

View as plain text