set FICKLE_VERSION 2.04
#!/usr/bin/tclsh

# $Id: fickle.tcl,v 1.6 2004/11/14 02:36:28 tang Exp $


#//#
# Fickle is a lexical analyzer generator written in pure Tcl.  It
# reads a <em>fickle specification file</em> to generate pure Tcl code
# that implements a scanner.  See the {@link README} file for complete
# instructions.  Additional information may be found at {@link
# http://mini.net/tcl/fickle}.
#
# @author Jason Tang (tang@jtang.org)
# @version 2.04
#//#

# Process a definition / directive on a single line.
proc handle_defs {line} {
    # trim whitespace and remove any comments
    set line [strip_comments [string trim $line]]
    if {$line == ""} {
        return
    }
    if {$line == "%\{"} {
        handle_literal_block
    } else {
        # extract the keyword to the left of the first space and the
        # arguments (if any) to the right
        if {[regexp -line {^(\S+)\s+(.*)} $line foo keyword args] == 0} {
            set keyword $line
            set args ""
        }
        switch -- $keyword {
            "%s" {
                foreach state_name [split $args] {
                    if {$state_name != ""} {
                        set ::state_table($state_name) $::INCLUSIVE
                    }
                }
            }
            "%x" {
                foreach state_name [split $args] {
                    if {$state_name != ""} {
                        set ::state_table($state_name) $::EXCLUSIVE
                    }
                }
            }
            "%option" {
                handle_options $args
            }
            "%buffersize" {
                if {$args == ""} {
                    fickle_error "%buffersize must have an integer parameter" $::PARAM_ERROR
                } elseif {[string is digit $args] && $args > 0} {
                    set ::BUFFER_SIZE $args
                } else {
                    fickle_error "%buffersize parameter must be positive integer" $::PARAM_ERROR
                }
            }
            default {
                # check if the directive is an option or a substitution
                if {[string index $keyword 0] == "%"} {
                    fickle_error "Unknown directive \"$keyword\"" $::SYNTAX_ERROR
                } else {
                    add_definition $line
                }
            }
        }
    }
}

# Copy everything between ^%\{$ to ^%\}$ to the destination file.
proc handle_literal_block {} {
    set end_defs 0
    while {$end_defs == 0} {
        if {[gets $::src line] < 0} {
            fickle_error "No terminator to verbatim section found " $::SYNTAX_ERROR
        } elseif {[string trim $line] == "%\}"} {
            set end_defs 1
        } else {
            if {[regexp {^\s*(public|private|protected)\s+variable} $line -> t]} {
                append ::variables "    $line\n"
            } else {
                append ::literal "$line\n"
            }
        }
        incr ::line_count
    }
}

# Examine each option (given by a %option directive) and set/unset
# flags as necessary.
proc handle_options {optargs} {
    foreach option [split $optargs] {
        if {$option == ""} {
            continue
        }
        if {$option == "default"} {
            # special construct to handle %option default (because I
            # can't match this in the switch statement below
            set ::suppress 0
            continue
        }
        switch -- $option {
            "caseful" -    "case-sensitive" -
            "nocaseless" - "nocase-insensitive" { set ::nocase 0 }
            "caseless" -  "case-insensitive" -
            "nocaseful" - "nocase-sensitive" { set ::nocase 1 }
            "debug"      { set ::debugmode 1 }
            "nodebug"   { set ::debugmode 0 }
            "nodefault" { set ::suppress 1 }
            "interactive"   { set ::interactive 1 }
            "nointeractive" { set ::interactive 0 }
            "verbose"   { set ::verbose 1 }
            "noverbose" { set ::verbose 0 }            
            "stack"   { set ::startstates 1 }
            "nostack" { set ::startstates 0 }
            "yylineno"   { set ::linenums 1 }
            "noyylineno" { set ::linenums 0 }
            "yywrap"   { set ::callyywrap 1 }
            "noyywrap" { set ::callyywrap 0 }
            "headers"   { set ::headers 1 }
            "noheaders" { set ::headers 0 }
            default {
                # note this is /not/ the same as %option default (see above)
                fickle_error "Unknown %option $option" $::PARAM_ERROR
            }
            
        }
    }
}

# Adds a definition to the substition table.
proc add_definition {line} {
    if {![regexp -line -- {\A\s*([a-zA-Z_]\S*)\s+(.+)} $line foo name pattern]} {
        fickle_error "Malformed definition" $::SYNTAX_ERROR
    }
    # make any substitutions within the pattern now
    foreach {sub_rule sub_pat} [array get ::sub_table] {
        # the quotes around the regexp below is necessary, to
        # allow for substitution of the sub_rule
        regsub -all -- "\{$sub_rule\}" $pattern "\($sub_pat\)" pattern
    }
    # double the backslashes (during the next round of substitution
    # the extras will go away)
    regsub -all -- {\\} $pattern {\\\\} pattern
    set ::sub_table($name) $pattern
}

# Actually build the scanner given a set of pattern / action pairs.
proc build_scanner {rules_buf} {
    # step 0: parse the rules buffer into individual rules and actions
    handle_rules_buf $rules_buf

    if $::interactive {
        set ::BUFFER_SIZE 1
    }
    
    # step 1: write scanner support functions
    write_scanner_utils
    
    # step 2: write the scanner to the destination file
    write_scanner
}

# Scan though the rules buffer, pulling out each pattern / action pair.
proc handle_rules_buf {rules_buf} {
    set regexp_list ""
    set num_rules 0
    while {[string length $rules_buf] > 0} {
        set line_start $::line_count
        # remove the next line from the buffer
        regexp -line -- {\A(.*)\n?} $rules_buf foo line
        set rules_buf [string range $rules_buf [string length $foo] end]
        # consume blank lines
        if {[string trim $line] == ""} {
            incr ::line_count
            continue
        }
        # extract the left hand side
        if {![regexp -line -- {\A\s*(\S+)(.*)} $line foo pattern line]} {
            fickle_error "No pattern found" $::SYNTAX_ERROR
        }
        # the pattern may contain spaces; use [info complete] to keep
        # appending to it
        set pattern_done 0
        while {!$pattern_done && $line != ""} {
            if [info complete $pattern] {
                set pattern_done 1
            } else {
                regexp -- {\A(\S*\s?)(.*)} $line foo p line
                append pattern $p
            }
        }
        if {!$pattern_done} {
            fickle_error "Pattern appears to be unterminated" $::SYNTAX_ERROR
        }
        set pattern [rewrite_pattern [string trim $pattern]]
        set orig_pattern $pattern
        
        # check the pattern to see if it has a start state
        set state_name ""
        if [regexp -- {\A<([^>]+)>} $pattern foo state_name] {
            if {!$::startstates} {
                fickle_error "Start state specified, but states were not enabled with `%option stack'" $::GRAMMAR_ERROR
            }
            # a state was found; remove it from the pattern
            regsub -- {\A<[^>]+>} $pattern "" pattern
            # check that the state was declared
            if {$state_name != "*" && ![info exists ::state_table($state_name)]} {
                fickle_error "Undeclared start state $state_name" $::GRAMMAR_ERROR
            }
        }
        # check if any textual substitutions are needed
        foreach sub_rule [array names ::sub_table] {
            # the quotes around the regexp below is necessary, to
            # allow for substitution of the sub_rule
            regsub -all -- "\{$sub_rule\}" $pattern "\($::sub_table($sub_rule)\)" pattern
        }

        # now determine the action; an action of just a vertical bar
        # means to use the subsequent action
        set action [string trimleft $line]
        if {[string trim $action] == ""} {
            fickle_error "Rule has no associated action" $::SYNTAX_ERROR
        } elseif {[string trim $action] == "|"} {
            # blank action means to use next one
            set action ""
        } else {
            # keep scanning through buffer until action is complete
            set num_lines 0
            set action_done 0
            while {!$action_done && $rules_buf != ""} {
                if [info complete $action] {
                    set action_done 1
                } else {
                    regexp -line -- {\A(.*)\n?} $rules_buf foo line
                    set rules_buf [string range $rules_buf [string length $foo] end]
                    append action "\n$line"
                    incr num_lines
                }
            }
            if {!$action_done && ![info complete $action]} {
                fickle_error "Unterminated action" $::SYNTAX_ERROR
            }
            # clean up the action, especially if it had curly braces
            # around the ends
            set action [string trim $action]
            if {[string index $action 0] == "{" && \
                [string index $action end] == "}"} {
                set action [string trim [string range $action 1 end-1]]
            }
            incr ::line_count $num_lines
        }
        lappend ::rule_table [list $orig_pattern $state_name $pattern $action $line_start]
        incr ::line_count
        if $::verbose {
            if {$state_name == ""} {
                set state "default state"
            } else {
                set state "state $state_name"
            }
            if {$action == ""} {
                set action "<fallthrough>"
            }
            puts stderr "Rule $num_rules: \[$pattern\] ($state) -> $action"
            incr num_rules
        }
    }
}

# Tcl style regexps are not 100% compatible with flex, so rewrite them
# here.
proc rewrite_pattern {pattern} {
    set in_quotes 0
    set in_brackets 0
    set in_escape 0
    foreach c [split $pattern {}] {
        if $in_escape {
            append newpattern $c
            set in_escape 0
            continue
        }
        if $in_quotes {
            if {$c == "\""} {
                set in_quotes 0
            } else {
                # metacharacters lose their meaning within quotes
                if [regexp -- {[.*\[\]^$\{\}+?|/\(\)]} $c foo] {
                    append newpattern "\\"
                }
                append newpattern $c
            }
            continue
        }
        switch -- $c {
            "\\" { append newpattern "\\"; set in_escape 1 }
            "\[" { append newpattern "\["; incr in_brackets }
            "\]" { append newpattern "\]"; incr in_brackets -1 }
            "\"" {
                if $in_brackets {
                    append newpattern "\\\""
                } else {
                    set in_quotes 1
                }
            }
            default {
                append newpattern $c
            }
        }
    }
    return $newpattern
}

######################################################################
# procedure to write scanner

# Writes all of the support procedures needed by the scanner during
# run time.
proc write_scanner_utils {} {
    global argv

    append ::output "
######
# Begin autogenerated fickle (version $::FICKLE_VERSION) routines.
# Although fickle itself is protected by the GNU Public License (GPL)
# all user-supplied functions are protected by their respective
# author's license.  See http://mini.net/tcl/fickle for other details.
######
"
    if $::callyywrap {
        if $::headers {
            append ::output "# If ${::p}wrap() returns false (zero), then it is assumed that the
# function has gone ahead and set up ${::p}in to point to another input
# file, and scanning continues.  If it returns true (non-zero), then
# the scanner terminates, returning 0 to its caller.  Note that in
# either case, the start condition remains unchanged; it does not
# revert to INITIAL.
#   -- from the flex(1) man page\n"
        }
        append ::output "itcl::body ${::classname}::${::p}wrap \{\} \{
    return 1
\}
"
    }
    if $::headers {
        append ::output "# ECHO copies ${::p}text to the scanner's output if no arguments are
# given.  The scanner writes its ECHO output to the ${::p}out global
# (default, stdout), which may be redefined by the user simply by
# assigning it to some other channel.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::ECHO \{\{s \"\"\}\} \{
    if \{\$s == \"\"\} \{
        puts -nonewline \$${::p}out \$${::p}text
    \} else \{
        puts -nonewline \$${::p}out \$s
    \}
\}
"
    if $::headers {
        append ::output "# ${::P}_FLUSH_BUFFER flushes the scanner's internal buffer so that the
# next time the scanner attempts to match a token, it will first
# refill the buffer using ${::P}_INPUT.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::P}_FLUSH_BUFFER \{\} \{
    set ${::p}_buffer \"\"
    set ${::p}index 0
    set ${::p}_done 0
\}
"
    if $::headers {
        append ::output "# ${::p}restart(new_file) may be called to point ${::p}in at the new input
# file.  The switch-over to the new file is immediate (any previously
# buffered-up input is lost).  Note that calling ${::p}restart with ${::p}in
# as an argument thus throws away the current input buffer and
# continues scanning the same input file.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::p}restart \{new_file\} \{
    set ${::p}in \$new_file
    ${::P}_FLUSH_BUFFER
\}
"       
    if $::headers {
        append ::output "# The nature of how it gets its input can be controlled by defining
# the ${::P}_INPUT macro.  ${::P}_INPUT's calling sequence is
# \"${::P}_INPUT(buf,result,max_size)\".  Its action is to place up to
# max_size characters in the character array buf and return in the
# integer variable result either the number of characters read or the
# constant ${::P}_NULL (0 on Unix systems) to indicate EOF.  The default
# ${::P}_INPUT reads from the global file-pointer \"${::p}in\".
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::P}_INPUT \{buf result max_size\} \{
    upvar \$result ret_val
    upvar \$buf new_data
    if \{\$${::p}in != \"\"\} \{\n"
    if $::interactive {
        append ::output "        gets \$${::p}in new_data
        if \{!\[eof \$${::p}in\]\} \{
            append new_data \\n
        \}\n"
    } else {
        append ::output "        set new_data \[read \$${::p}in \$max_size\]\n"
    }
    append ::output "        set ret_val \[string length \$new_data\]
    \} else \{
        set new_data \"\"
        set ret_val 0
    \}
\}
"
    if $::headers {
        append ::output "# yy_scan_string sets up input buffers for scanning in-memory
# strings instead of files.  Note that switching input sources does
# not change the start condition.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::p}_scan_string \{str\} \{
    append ${::p}_buffer \$str
    set ${::p}in \"\"
\}
"
    if $::headers {
        append ::output "# unput(c) puts the character c back onto the input stream.  It will
# be the next character scanned.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::unput \{c\} \{
    set s \[string range $${::p}_buffer 0 \[expr \{$${::p}index - 1\}\]\]
    append s \$c
    set ${::p}_buffer \[append s \[string range $${::p}_buffer $${::p}index end\]\]
\}
"
    if $::headers {
        append ::output "# Returns all but the first n characters of the current token back to
# the input stream, where they will be rescanned when the scanner
# looks for the next match.  ${::p}text and ${::p}leng are adjusted
# appropriately.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::p}less \{n\} \{
    set s \[string range \$${::p}_buffer 0 \[expr \{$${::p}index - 1\}\]\]
    append s \[string range \$${::p}text \$n end\]
    set ${::p}_buffer \[append s \[string range $${::p}_buffer $${::p}index end\]\]
    set ${::p}text \[string range $${::p}text 0 \[expr \{\$n - 1\}\]\]
    set ${::p}leng \[string length $${::p}text\]
\}
"
    if $::headers {
        append ::output "# input() reads the next character from the input stream.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::input \{\} \{
    if \{\[string length \$${::p}_buffer\] - \$${::p}index < $::BUFFER_SIZE\} \{
       set new_buffer_size 0
       if \{\$${::p}_done == 0\} \{
           ${::P}_INPUT new_buffer new_buffer_size $::BUFFER_SIZE
           append ${::p}_buffer \$new_buffer
           if \{\$new_buffer_size == 0\} \{
               set ${::p}_done 1
           \}
       \}
       if \$${::p}_done \{\n"
    if $::callyywrap {
        append ::output "           if \{\[${::p}wrap\] == 0\} \{
               return \[input\]
           \} else"
    } else {
        append ::output "           "
    }
    append ::output "if \{\[string length $${::p}_buffer\] - $${::p}index == 0\} \{
               return \{\}
           \}
        \}
    \}
    set c \[string index $${::p}_buffer $${::p}index\]
    incr ${::p}index
    return \$c
\}
"
    if $::startstates {
        if $::headers {
            append ::output "# Pushes the current start condition onto the top of the start
# condition stack and switches to new_state as though you had used
# BEGIN new_state.
#   -- from the flex(1) man page\n"
        }
        append ::output "itcl::body ${::classname}::${::p}_push_state \{new_state\} \{
    lappend ${::p}_state_stack \$new_state
\}
"
        if $::headers {
            append ::output "# Pops off the top of the state stack; if the stack is now empty, then
# pushes the state \"INITIAL\".
#   -- from the flex(1) man page\n"
        }
        append ::output "itcl::body ${::classname}::${::p}_pop_state \{\} \{
    set ${::p}_state_stack \[lrange \$${::p}_state_stack 0 end-1\]
    if \{\$${::p}_state_stack == \"\"\} \{
        ${::p}_push_state INITIAL
    \}
\}
"
        if $::headers {
            append ::output "# Returns the top of the stack without altering the stack's contents.
#   -- from the flex(1) man page\n"
    }
    append ::output "itcl::body ${::classname}::${::p}_top_state \{\} \{
    return \[lindex \$${::p}_state_stack end\]
\}
"
        if $::headers {
            append ::output "# BEGIN followed by the name of a start condition places the scanner
# in the corresponding start condition. . . .Until the next BEGIN
# action is executed, rules with the given start condition will be
# active and rules with other start conditions will be inactive.  If
# the start condition is inclusive, then rules with no start
# conditions at all will also be active.  If it is exclusive, then
# only rules qualified with the start condition will be active.
#   -- from the flex(1) man page\n"
        }
        append ::output "itcl::body ${::classname}::BEGIN \{new_state\ \{prefix $::p\}\} \{
    eval set \${prefix}_state_stack \[lrange \${prefix}_state_stack 0 end-1\]
    eval lappend \${prefix}_state_stack \$new_state
\}
"
    }
    append ::variables "
    variable ${::p}text
    variable ${::p}leng
    variable ${::p}_buffer
    variable ${::p}index
    variable ${::p}_done
    variable ${::p}_state_stack
    variable ${::p}_state_table
    variable ${::p}lineno
    variable ${::p}_flex_debug
    public variable ${::p}in
    public variable ${::p}out
    "
    set ::constructor "# initialize values used by the lexer
        set ${::p}text {}
        set ${::p}leng 0
        set ${::p}_buffer \{\}
        set ${::p}index 0
        set ${::p}_done 0
"
    if $::startstates {
        append ::constructor "       
        set ${::p}_state_stack \{\}
        BEGIN INITIAL
        array set ${::p}_state_table \{[array get ::state_table]\}
        "
    }
    if $::linenums {
        append ::constructor "         set ${::p}lineno 1\n"
    }
    if $::debugmode {
        append ::constructor "         set ${::p}_flex_debug 1\n"
    }
    append ::constructor "if \{!\[info exists ${::p}in\]\} \{
            set ${::p}in \"stdin\"
        \}
        if \{!\[info exists ${::p}out\]\} \{
            set ${::p}out \"stdout\"
        \}
"
}


# Writes the actual scanner as a function called <code>yylex</code>.
# Note that this function may be renamed if the <code>-P</code> flag
# was given at the command line.
proc write_scanner {} {
    append ::output "######
# autogenerated ${::p}lex function created by fickle
######

# Whenever yylex() is called, it scans tokens from the global input
# file yyin (which defaults to stdin).  It continues until it either
# reaches an end-of-file (at which point it returns the value 0) or
# one of its actions executes a return statement.
#   -- from the flex(1) man page
itcl::body ${::classname}::${::p}lex \{\} \{
    #upvar #0 ::${::p}text ${::p}text
    #upvar #0 ::${::p}leng ${::p}leng
    while \{1\} \{\n"
    if $::startstates {
        append ::output "        set ${::p}_current_state \[${::p}_top_state\]\n"
    }
    append ::output "        if \{\[string length \$${::p}_buffer\] - \$${::p}index < $::BUFFER_SIZE\} \{
            if \{\$${::p}_done == 0\} \{
                set ${::p}_new_buffer \"\"
                ${::P}_INPUT ${::p}_new_buffer ${::p}_buffer_size $::BUFFER_SIZE
                append ${::p}_buffer \$${::p}_new_buffer
                if \{\$${::p}_buffer_size == 0 && \\
                        \[string length \$${::p}_buffer\] - \$${::p}index == 0\} \{
                    set ${::p}_done 1
                \}
            \}
            if \$${::p}_done \{\n"
    if $::debugmode {
        append ::output "                if \$${::p}_flex_debug \{
                    puts stderr \"   --reached end of input buffer\"
                \}\n"
    }
    if $::callyywrap {
        append ::output "                if \{\[${::p}wrap\] == 0\} \{
                    set ${::p}_done 0
                    continue
                \} else"
    } else {
        append ::output "                "
    }
    append ::output "if \{\[string length \$${::p}_buffer\] - \$${::p}index == 0\} \{
                    break
                \}
            \}            
        \}
        set ${::p}leng 0
        set ${::p}_matched_rule -1\n"
    
    # build up the if statements to determine which rule to execute;
    # lex is greedy and will use the rule that matches the most
    # strings
    if {$::nocase} {
        set scan_args "-nocase"
    } else {
        set scan_args ""
    }
    set rule_num 0
    foreach rule $::rule_table {
        foreach {orig_pattern state_name pattern action rule_line} $rule {}
        append ::output "        # rule $rule_num: $orig_pattern\n"
        append ::output "        if \{"
        if $::startstates {
            if {$state_name == ""} {
                append ::output "\$${::p}_state_table(\$${::p}_current_state) && \\\n                "
            } elseif {$state_name != "*"} {
                append ::output "\$${::p}_current_state == \"$state_name\" && \\\n                "
            }
        }
        append ::output "\[regexp -start \$${::p}index -indices -line $scan_args -- \{\\A($pattern)\} \$${::p}_buffer ${::p}_match\] > 0\ && \\
                \[lindex \$${::p}_match 1\] - \$${::p}index + 1 > \$${::p}leng\} \{
            set ${::p}text \[string range \$${::p}_buffer \$${::p}index \[lindex \$${::p}_match 1\]\]
            set ${::p}leng \[string length \$${::p}text\]
            set ${::p}_matched_rule $rule_num\n"
        if $::debugmode {
            append ::output "            set ${::p}rule_num \"rule at line $rule_line\"\n"
        }
        append ::output "        \}\n"
        incr rule_num
    }
    # now add the default case
    append ::output "        if \{\$${::p}_matched_rule == -1\} \{
            set ${::p}text \[string index \$${::p}_buffer \$${::p}index\]
            set ${::p}leng 1\n"
    if $::debugmode {
        append ::output "            set ${::p}rule_num \"default rule\"\n"
    }
    append ::output "        \}
        incr ${::p}index \$${::p}leng
        # workaround for Tcl's circumflex behavior
        if \{\[string index \$${::p}text end\] == \"\\n\"\} \{
            set ${::p}_buffer \[string range \$${::p}_buffer \$${::p}index end\]
            set ${::p}index 0
        \}\n"
    if $::debugmode {
        append ::output "        if \$${::p}_flex_debug \{
            puts stderr \"   --accepting \$${::p}rule_num (\\\"$${::p}text\\\")\"
        \}\n"
    }
    if $::linenums {
        append ::output "        set numlines \[expr \{\[llength \[split \$${::p}text \"\\n\"\]\] - 1\}\]\n"
    }
    append ::output "        switch -- \$${::p}_matched_rule \{\n"
    set rule_num 0
    foreach rule $::rule_table {
        append ::output "            $rule_num "
        if {[string length [lindex $rule 3]] == 0} {
            # action is empty, so use next pattern's action
            append ::output "-\n"
        } else {
            append ::output "\{\n[lindex $rule 3]\n            \}\n"
        }
        incr rule_num
    }
    append ::output "            default\n"
    if {$::suppress == 0} {
        append ::output "                \{ ECHO \}\n"
    } else {
        append ::output "                \{ puts stderr \"unmatched token: \$${::p}text"
        if $::startstates {
            append ::output " in state `\$${::p}_current_state'"
        }
        append ::output "\"; exit -1 \}\n"
    }
    append ::output "        \}\n"
    if $::linenums {
        append ::output "        incr ::${::p}lineno \$numlines\n"
    }
    append ::output "    \}
    return 0
\}
######
# end autogenerated fickle functions
######
"
}

######################################################################
# utility functions

# Given a line, returns a new line with any comments removed.
proc strip_comments {line} {
    regexp -- {\A([^\#]*)} $line foo line
    return $line
}

# If the first non-whitespace character on a line is a hash, then
# return an empty string; otherwise return the entire line.
proc strip_only_comments {line} {
    if [regexp -- {\A\s*\#} $line] {
        return ""
    } else {
        return $line
    }
}

# Retrives a parameter from the options list.  If no parameter exists
# then abort with an error very reminisicent of C's
# <code>getopt</code> function; otherwise increment
# <code>param_num</code> by one.
#
# @param param_list list of parameters from the command line
# @param param_num index into <code>param_list</code> to retrieve
# @param param_name name of the parameter, used when reporting an error
# @return the <code>$param_num</code>'th element into <code>$param_list</code>
proc get_param {param_list param_num param_name} {
    upvar $param_num pn
    incr pn
    if {$pn >= [llength $param_list]} {
        puts stderr "fickle: option requires an argument -- $param_name"
        exit $::PARAM_ERROR
    }
    return [lindex $param_list $pn]
}

# Display an error message to standard error along with where within
# the specification file it occurred.  Then abort this program.
proc fickle_error {message returnvalue} {
    puts -nonewline stderr $message
    puts stderr " (line $::line_count)"
    exit $returnvalue
}

# Display to a channel a brief summary of fickle command line options.
proc print_fickle_help {chan} {
    puts $chan "fickle: a Tcl lexical anaylzer generator
Usage: fickle \[options\] \[FILE\]
  FILE     a fickle specification file

Options:
  -h          print this help message and quit
  -v          be verbose while generating scanner
  -o FILE     specify name to write scanner
  -d          enable debug mode while running scanner
  -i          generate a case-insensitive scanner
  -l          keep track of line numbers in global variable yylineno
  -s          suppress default rule; unmatched input aborts with errors
  -t          write scanner to standard output
  -I          read input interactively
  -P PREFIX   change default yy prefix to PREFIX
  --version   print fickle version and quit

For more information see http://mini.net/tcl/fickle"
}

# Displays to standard out the fickle version, then exits program.
proc print_fickle_version {} {
    puts "fickle version $::FICKLE_VERSION"
    exit 0
}

######################################################################
# other fickle functions

# Parse the command line and set all global options.
proc fickle_args {argv} {
    set argvp 0
    set out_filename ""
    set write_to_stdout 0
    set ::callyywrap 1
    set ::debugmode 0
    set ::headers 1
    set ::interactive 0
    set ::nocase 0
    set ::linenums 0
    set ::startstates 0
    set ::suppress 0
    set ::BUFFER_SIZE 1024
    set ::p "yy"
    set ::P "YY"
    set ::verbose 0
    while {$argvp < [llength $argv]} {
        set arg [lindex $argv $argvp]
        switch -- $arg {
            "-d" { set ::debugmode 1 }
            "-h" - "--help" { print_fickle_help stdout; exit 0 }
            "-i" { set ::nocase 1 }
            "-l" { set ::linenums 1 }
            "-o" { set out_filename [get_param $argv argvp "o"] }
            "-s" { set ::suppress 1 }
            "-t" { set write_to_stdout 1 }
            "-v" { set ::verbose 1 }
            "-I" { set ::interactive 1 }
            "-P" {
                set prefix [get_param $argv argvp "P"]
                set ::p [string tolower $prefix]
                set ::P [string toupper $prefix]
            }
            "--version" { print_fickle_version }
            default {
                if {[string index $arg 0] != "-"} {
                    break
                } else {
                    puts stderr "fickle: unknown option $arg"
                    print_fickle_help stderr
                    exit $::PARAM_ERROR
                }
            }
        }
        incr argvp
    }
    if {$argvp >= [llength $argv]} {
        # read from stdin
        set ::src stdin
        set out_filename "lex.yy.tcl"
    } else {
        set in_filename [lindex $argv $argvp]
        if {$out_filename == ""} {
            set out_filename [file rootname $in_filename]
            append out_filename ".tcl"
        }
        if [catch {open $in_filename r} ::src] {
            puts stderr "Could not open specification file '$in_filename'."
            exit $::IO_ERROR
        }
    }
    if $write_to_stdout {
        set ::dest stdout
    } else {
        if [catch {open $out_filename w} ::dest] {
            puts stderr "Could not create output file '$out_filename'."
            exit $::IO_ERROR
        }
    }
}

# Actually do the scanner generation.
proc fickle_main {} {
    global argv
    set ::line_count 0
    if {[llength $argv] > 0} {
        set ::classname [file rootname [file tail [lindex $argv 0]]]
        regsub -all -nocase {[^A-Z0-9]} $::classname "" ::classname
    } else {
        set ::classname Scanner
    }
   # keep track of all rules found
    set ::rule_table ""
    
    # set up the INITIAL start state to be a normal inclusionary state
    set ::state_table(INITIAL) $::INCLUSIVE

    # keep track of where within the file I am:
    # definitions, rules, or subroutines
    set file_state definitions
    
    while {[gets $::src line] >= 0} {
        incr ::line_count
    
        if {$line == "%%"} {
            if {$file_state == "definitions"} {
                set file_state "rules"
            } elseif {$file_state == "rules"} {
                set file_state "subroutines"
            } else {
                fickle_error "Syntax error." $::SYNTAX_ERROR
            }
        } else {
            if {$file_state == "definitions"} {
                handle_defs $line
            } elseif {$file_state == "rules"} {
                # keep reading the rest of the file until EOF or
                # another '%%' appears
                set rules_buf [strip_only_comments $line]
                while {[gets $::src line] >= 0 && $file_state == "rules"} {
                    if {$line == "%%"} {
                        set file_state "subroutines"
                        break
                    } else {
                        append rules_buf "\n" [strip_only_comments $line]
                    }
                }
                build_scanner $rules_buf
                set file_state "subroutines"
            } else {
                # file_state is subroutines -- copy verbatim to output file
                append ::subroutines "$line\n"
            }
        }
    }
    puts $::dest     "
$::literal
package require Itcl
itcl::class $::classname {
    $::variables
    constructor {args} {
        eval configure \$args
        $::constructor
    }
    method ${::p}wrap {} {}
    method ECHO {{s \"\"}} {}
    method ${::P}_FLUSH_BUFFER {} {}
    method ${::p}restart {new_file} {}
    method ${::P}_INPUT {buf result max_size} {}
    method ${::p}_scan_string {str} {}
    method unput {c} {}
    method ${::p}less {n} {}
    method ${::p}lex {} {}
    method input {} {}
    "
    if $::startstates {
        puts $::dest "
    method ${::p}_push_state {new_state} {}
    method ${::p}_pop_state {} {}
    method ${::p}_top_state {} {}
    method BEGIN {newstate {prefix $::p}} {}
        "
 
    }
    puts $::dest "
}    
    $::output

$::subroutines
"
}

######################################################################
# start of actual script

set IO_ERROR 1
set SYNTAX_ERROR 2
set PARAM_ERROR 3
set GRAMMAR_ERROR 4

# two types of start states allowed:
set INCLUSIVE 1
set EXCLUSIVE 0

fickle_args $argv
fickle_main

