package syntax

import (
	"bytes"
	"fmt"
	"math"
)

// similar to prog.go in the go regex package...also with comment 'may not belong in this package'

// File provides operator constants for use by the Builder and the Machine.

// Implementation notes:
//
// Regexps are built into RegexCodes, which contain an operation array,
// a string table, and some constants.
//
// Each operation is one of the codes below, followed by the integer
// operands specified for each op.
//
// Strings and sets are indices into a string table.

type InstOp int

const (
	// 					    lef/back operands        description

	Onerep    InstOp = 0 // lef,back char,min,max    a {n}
	Notonerep        = 1 // lef,back char,min,max    .{n}
	Setrep           = 2 // lef,back set,min,max     [\d]{n}

	Oneloop    = 3 // lef,back char,min,max    a {,n}
	Notoneloop = 4 // lef,back char,min,max    .{,n}
	Setloop    = 5 // lef,back set,min,max     [\d]{,n}

	Onelazy    = 6 // lef,back char,min,max    a {,n}?
	Notonelazy = 7 // lef,back char,min,max    .{,n}?
	Setlazy    = 8 // lef,back set,min,max     [\d]{,n}?

	One    = 9  // lef      char            a
	Notone = 10 // lef      char            [^a]
	Set    = 11 // lef      set             [a-z\s]  \w \s \d

	Multi = 12 // lef      string          abcd
	Ref   = 13 // lef      group           \#

	Bol         = 14 //                          ^
	Eol         = 15 //                          $
	Boundary    = 16 //                          \b
	Nonboundary = 17 //                          \B
	Beginning   = 18 //                          \A
	Start       = 19 //                          \G
	EndZ        = 20 //                          \Z
	End         = 21 //                          \Z

	Nothing = 22 //                          Reject!

	// Primitive control structures

	Lazybranch      = 23 // back     jump            straight first
	Branchmark      = 24 // back     jump            branch first for loop
	Lazybranchmark  = 25 // back     jump            straight first for loop
	Nullcount       = 26 // back     val             set counter, null mark
	Setcount        = 27 // back     val             set counter, make mark
	Branchcount     = 28 // back     jump,limit      branch++ if zero<=c<limit
	Lazybranchcount = 29 // back     jump,limit      same, but straight first
	Nullmark        = 30 // back                     save position
	Setmark         = 31 // back                     save position
	Capturemark     = 32 // back     group           define group
	Getmark         = 33 // back                     recall position
	Setjump         = 34 // back                     save backtrack state
	Backjump        = 35 //                          zap back to saved state
	Forejump        = 36 //                          zap backtracking state
	Testref         = 37 //                          backtrack if ref undefined
	Goto            = 38 //          jump            just go

	Prune = 39 //                          prune it baby
	Stop  = 40 //                          done!

	ECMABoundary    = 41 //                          \b
	NonECMABoundary = 42 //                          \B

	// Modifiers for alternate modes

	Mask  = 63  // Mask to get unmodified ordinary operator
	Rtl   = 64  // bit to indicate that we're reverse scanning.
	Back  = 128 // bit to indicate that we're backtracking.
	Back2 = 256 // bit to indicate that we're backtracking on a second branch.
	Ci    = 512 // bit to indicate that we're case-insensitive.
)

type Code struct {
	Codes       []int       // the code
	Strings     [][]rune    // string table
	Sets        []*CharSet  //character set table
	TrackCount  int         // how many instructions use backtracking
	Caps        map[int]int // mapping of user group numbers -> impl group slots
	Capsize     int         // number of impl group slots
	FcPrefix    *Prefix     // the set of candidate first characters (may be null)
	BmPrefix    *BmPrefix   // the fixed prefix string as a Boyer-Moore machine (may be null)
	Anchors     AnchorLoc   // the set of zero-length start anchors (RegexFCD.Bol, etc)
	RightToLeft bool        // true if right to left
}

func opcodeBacktracks(op InstOp) bool {
	op &= Mask

	switch op {
	case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
		Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
		Forejump, Goto:
		return true

	default:
		return false
	}
}

func opcodeSize(op InstOp) int {
	op &= Mask

	switch op {
	case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
		End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
		return 1

	case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
		Prune, Set:
		return 2

	case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
		Setlazy, Setrep, Setloop:
		return 3

	default:
		panic(fmt.Errorf("Unexpected op code: %v", op))
	}
}

var codeStr = []string{
	"Onerep", "Notonerep", "Setrep",
	"Oneloop", "Notoneloop", "Setloop",
	"Onelazy", "Notonelazy", "Setlazy",
	"One", "Notone", "Set",
	"Multi", "Ref",
	"Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
	"Nothing",
	"Lazybranch", "Branchmark", "Lazybranchmark",
	"Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
	"Nullmark", "Setmark", "Capturemark", "Getmark",
	"Setjump", "Backjump", "Forejump", "Testref", "Goto",
	"Prune", "Stop",
	"ECMABoundary", "NonECMABoundary",
}

func operatorDescription(op InstOp) string {
	desc := codeStr[op&Mask]
	if (op & Ci) != 0 {
		desc += "-Ci"
	}
	if (op & Rtl) != 0 {
		desc += "-Rtl"
	}
	if (op & Back) != 0 {
		desc += "-Back"
	}
	if (op & Back2) != 0 {
		desc += "-Back2"
	}

	return desc
}

// OpcodeDescription is a humman readable string of the specific offset
func (c *Code) OpcodeDescription(offset int) string {
	buf := &bytes.Buffer{}

	op := InstOp(c.Codes[offset])
	fmt.Fprintf(buf, "%06d ", offset)

	if opcodeBacktracks(op & Mask) {
		buf.WriteString("*")
	} else {
		buf.WriteString(" ")
	}
	buf.WriteString(operatorDescription(op))
	buf.WriteString("(")
	op &= Mask

	switch op {
	case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
		buf.WriteString("Ch = ")
		buf.WriteString(CharDescription(rune(c.Codes[offset+1])))

	case Set, Setrep, Setloop, Setlazy:
		buf.WriteString("Set = ")
		buf.WriteString(c.Sets[c.Codes[offset+1]].String())

	case Multi:
		fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))

	case Ref, Testref:
		fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])

	case Capturemark:
		fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
		if c.Codes[offset+2] != -1 {
			fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
		}

	case Nullcount, Setcount:
		fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])

	case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
		fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
	}

	switch op {
	case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
		buf.WriteString(", Rep = ")
		if c.Codes[offset+2] == math.MaxInt32 {
			buf.WriteString("inf")
		} else {
			fmt.Fprintf(buf, "%d", c.Codes[offset+2])
		}

	case Branchcount, Lazybranchcount:
		buf.WriteString(", Limit = ")
		if c.Codes[offset+2] == math.MaxInt32 {
			buf.WriteString("inf")
		} else {
			fmt.Fprintf(buf, "%d", c.Codes[offset+2])
		}

	}

	buf.WriteString(")")

	return buf.String()
}

func (c *Code) Dump() string {
	buf := &bytes.Buffer{}

	if c.RightToLeft {
		fmt.Fprintln(buf, "Direction:  right-to-left")
	} else {
		fmt.Fprintln(buf, "Direction:  left-to-right")
	}
	if c.FcPrefix == nil {
		fmt.Fprintln(buf, "Firstchars: n/a")
	} else {
		fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
	}

	if c.BmPrefix == nil {
		fmt.Fprintln(buf, "Prefix:     n/a")
	} else {
		fmt.Fprintf(buf, "Prefix:     %v\n", Escape(c.BmPrefix.String()))
	}

	fmt.Fprintf(buf, "Anchors:    %v\n", c.Anchors)
	fmt.Fprintln(buf)

	if c.BmPrefix != nil {
		fmt.Fprintln(buf, "BoyerMoore:")
		fmt.Fprintln(buf, c.BmPrefix.Dump("    "))
	}
	for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
		fmt.Fprintln(buf, c.OpcodeDescription(i))
	}

	return buf.String()
}