#!/usr/bin/perl
# Converts a y.output file from yacc to a LR(1) parse table.

# Copyright (c) 2000 by Ryan Kirkpatrick (softengr@rkirkpat.net). 
#   All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.

# USAGE: The only parameters are a file name and an optional delimiter. If the
# delimiter has white space in it, I suggest you quote it on the command
# line. Make sure to seperate it by a whitespace from the '-d' option, and that
# it is before the filename. The output is then delimited by tab by default, or
# by what ever you specified. Known to work with GNU bison 1.25 and 1.28. If you
# find any bugs, fixed them and send me the correct script or a patch. :)

# To keep things sane.
use strict;

# Print a usage message.
sub Usage
{
  print "Usage: $0 [-d {delimiter}] {filename}\n";
  exit();
}

# Sort numerically.
sub numerically
{
  $a <=> $b;
}

# Double check command line parameters.
my $delimiter = "	";
if ($ARGV[0] eq "-d") { 
  shift @ARGV;
  $delimiter = shift @ARGV;
}
scalar(@ARGV) eq 1 || &Usage();
my $filename = shift @ARGV;

# Open the file.
open(YACC,$filename) || die "Can't open $filename! $!\n";

# Parse the file:
my $state;
my $number;
my @grammar;
my @terminals;
my @nonterminals;
my %table;
while (<YACC>) {

  # Check for the start line of the grammar list. Set if so.
  if ($_ =~ /^Grammar/) {
    $state = "grammar";
    next;
  }

  # Get rules if we are in such a state, and have found a rule.
  ($state eq "grammar" && $_ =~ /^rule/) && push(@grammar,$_);

  # Check for the start line of the terminal list. Set if so.
  if ($_ =~ /^Terminals/) {
    $state = "terminals";
    next;
  }

  # Get terminal if we are in such a state, and have found a terminal.
  if ($state eq "terminals" && $_ !~ /^$/ && $_ !~ /^Nonterminals, with/) {
    # Drop trailing characters.
    my $terminal = $_;
    $terminal =~ s/ \(.*//;
    chop($terminal);
    
    # Add to list of terminals if this is not the special error terminal.
    $terminal ne "error" && push(@terminals,$terminal);
  }

  # Check for the start line of the nonterminal list. Set if so.
  if ($_ =~ /^Nonterminals/) {
    $state = "nonterminals";
    next;
  }

  # Get nonterminal if we are in such a state, and have found a terminal.
  if ($state eq "nonterminals" && $_ !~ /^$/ && $_ !~ /^ / && $_ !~ /state 0/) {
    # Drop trailing characters.
    my $nonterminal = $_;
    $nonterminal =~ s/ \(.*//;
    chop($nonterminal);

    # Add to list of nonterminals.
    push(@nonterminals,$nonterminal);    
  }

  # Check for the start line of the state list. Set if so.
  if ($_ =~ /^state \d+/) {
    $state = "state";

    # Also get the state number, and create a new row in our LR(1) table structure.
    ($number) = $_ =~ /state ([0-9]*)/;
    my %row;
    $table{$number} = \%row;

    # Skip to next line.
    next;
  }

  # Get a state detail if we are in such a state, and have found a such a thing.
  if ($state eq "state" && $_ !~ /^$/ && $_ !~ /->/) {
    # Parse the line into different parts. State could be state to go to, 
    #   or rule to use in reduction.
    my ($symbol,$action,$state) = $_ =~ /^\s+([^\s]*)\s+([^\s,]*)\D*([0-9]+)/;

    # Check for specially formatted line.
    if ($action eq "") { 
      ($symbol,$action) = $_ =~ /^\s+([^\s]*)\s+([^\s,]*)/;
    }

    # Convert action to a short code.
    $action eq "accept" && do { $action = "a" };
    $action eq "shift" && do { $action = "s" };
    $action =~ /reduce/ && do { $action = "r" };
    $action eq "go" && do { $action = "" };

    # Now, put an entry in the LR(1) table structure.
    if ($action eq "") {
      # Just list the state to goto.
      $table{$number}{$symbol} = $state;
    } else {
      if ($state eq "") {
        # Just the action to be taken, usually 'accept'.
        $table{$number}{$symbol} = $action;
      } else {
        # Full version, action and state (or rule number).
        $table{$number}{$symbol} = $action . "," . $state;
      }
    }
  }
}

# Close the file.
close(YACC);

# Now print the LR(1) table.
my $terminal;
my $nonterminal;
my $rule;

# Print the header lines, including list of terminals and nonterminals.
print $delimiter, "Input";
for (my $i = 0; $i < scalar(@terminals); $i++) {
  print $delimiter;
}
print "Gotos\n";
print "State", $delimiter, join($delimiter,@terminals), $delimiter,
  join($delimiter,@nonterminals), "\n";

# Now print each row of teh table.
foreach $state (sort numerically keys %table) {
  # The state.
  print $state, $delimiter;

  # The terminal actions.
  foreach $terminal (@terminals) {
    if ($table{$state}{$terminal} ne "") {
      # Print the action for the state and input terminal.
      print $table{$state}{$terminal};
    } else {
      if  ($table{$state}{"\$default"} ne "") {
        # No action defined for state and input terminal, so print 
        #   the default action.
        print $table{$state}{"\$default"};
      } 
    }
    # Place seperator.
    print $delimiter;
  }

  # The non-terminal actions.
  foreach $nonterminal (@nonterminals) {
    print $table{$state}{$nonterminal}, $delimiter;
  }

  # End off the row.
  print "\n";
}

# Now just list the grammar rules for reference.
print "\nGrammar:\n";
foreach $rule (@grammar) {
  print $rule;
}

# Done!
