evaluate: add strategy record distance, misc improvements

7 years ago · baeb955841
parent ff878a9d79
commit baeb955841
4 changed files with 375 additions and 34 deletions
--- a/common/resh-common.go
+++ b/common/resh-common.go
@ -1,7 +1,10 @@
 package common

 import (
+	"encoding/json"
+	"errors"
 	"log"
+	"math"
 	"strconv"
 	"strings"

@ -86,10 +89,12 @@ type EnrichedRecord struct {
 	Record

 	// enriching fields - added "later"
-	Command      string `json:"command"`
-	FirstWord    string `json:"firstWord"`
-	Invalid      bool   `json:"invalid"`
-	SeqSessionID uint64 `json:"seqSessionId"`
+	Command         string   `json:"command"`
+	FirstWord       string   `json:"firstWord"`
+	Invalid         bool     `json:"invalid"`
+	SeqSessionID    uint64   `json:"seqSessionId"`
+	DebugThisRecord bool     `json:"debugThisRecord"`
+	Errors          []string `json:"errors"`
 	// SeqSessionID uint64 `json:"seqSessionId,omitempty"`
 }

@ -112,14 +117,33 @@ func ConvertRecord(r *FallbackRecord) Record {
 	}
 }

+// ToString - returns record the json
+func (r EnrichedRecord) ToString() (string, error) {
+	jsonRec, err := json.Marshal(r)
+	if err != nil {
+		return "marshalling error", err
+	}
+	return string(jsonRec), nil
+}
+
 // Enrich - adds additional fields to the record
 func (r Record) Enrich() EnrichedRecord {
 	record := EnrichedRecord{Record: r}
 	// Get command/first word from commandline
-	record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine)
-	err := r.Validate()
+	var err error
+	record.Command, record.FirstWord, err = GetCommandAndFirstWord(r.CmdLine)
 	if err != nil {
-		log.Println("Invalid command:", r.CmdLine)
+		record.Errors = append(record.Errors, "GetCommandAndFirstWord error:"+err.Error())
+		rec, _ := record.ToString()
+		log.Println("Invalid command:", rec)
+		record.Invalid = true
+		return record
+	}
+	err = r.Validate()
+	if err != nil {
+		record.Errors = append(record.Errors, "Validate error:"+err.Error())
+		rec, _ := record.ToString()
+		log.Println("Invalid command:", rec)
 		record.Invalid = true
 	}
 	return record
@ -128,18 +152,85 @@ func (r Record) Enrich() EnrichedRecord {

 // Validate - returns error if the record is invalid
 func (r *Record) Validate() error {
+	if r.RealtimeBefore == 0 || r.RealtimeAfter == 0 {
+		return errors.New("There is no Time")
+	}
+	if r.RealPwd == "" || r.RealPwdAfter == "" {
+		return errors.New("There is no Real Pwd")
+	}
+	if r.Pwd == "" || r.PwdAfter == "" {
+		return errors.New("There is no Pwd")
+	}
+
+	// TimezoneBefore
+	// TimezoneAfter
+
+	// RealtimeDuration
+	// RealtimeSinceSessionStart - TODO: add later
+	// RealtimeSinceBoot  - TODO: add later
+
+	// device extras
+	// Host
+	// Hosttype
+	// Ostype
+	// Machtype
+	// OsReleaseID
+	// OsReleaseVersionID
+	// OsReleaseIDLike
+	// OsReleaseName
+	// OsReleasePrettyName
+
+	// session extras
+	// Term
+	// Shlvl
+
+	// static info
+	// Lang
+	// LcAll
+
+	// meta
+	// ReshUUID
+	// ReshVersion
+	// ReshRevision
+
+	// added by sanitizatizer
+	// Sanitized
+	// CmdLength
 	return nil
 }

+// SetCmdLine sets cmdLine and related members
+func (r *EnrichedRecord) SetCmdLine(cmdLine string) {
+	r.CmdLine = cmdLine
+	r.CmdLength = len(cmdLine)
+	r.ExitCode = 0
+	var err error
+	r.Command, r.FirstWord, err = GetCommandAndFirstWord(cmdLine)
+	if err != nil {
+		r.Errors = append(r.Errors, "GetCommandAndFirstWord error:"+err.Error())
+		// log.Println("Invalid command:", r.CmdLine)
+		r.Invalid = true
+	}
+}
+
+// SetBeforeToAfter - set "before" members to "after" members
+func (r *EnrichedRecord) SetBeforeToAfter() {
+	r.Pwd = r.PwdAfter
+	r.RealPwd = r.RealPwdAfter
+	// r.TimezoneBefore = r.TimezoneAfter
+	// r.RealtimeBefore = r.RealtimeAfter
+	// r.RealtimeBeforeLocal = r.RealtimeAfterLocal
+}
+
 // GetCommandAndFirstWord func
-func GetCommandAndFirstWord(cmdLine string) (string, string) {
+func GetCommandAndFirstWord(cmdLine string) (string, string, error) {
 	args, err := shellwords.Parse(cmdLine)
 	if err != nil {
 		log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
-		return "<shellwords_error>", "<shellwords_error>"
+		return "", "", err
 	}
 	if len(args) == 0 {
-		return "", ""
+		return "", "", nil
 	}
 	i := 0
 	for true {
@ -149,10 +240,140 @@ func GetCommandAndFirstWord(cmdLine string) (string, string) {
 			i++
 			continue
 		}
-		return args[i], args[0]
+		return args[i], args[0], nil
 	}
 	log.Fatal("GetCommandAndFirstWord error: this should not happen!")
-	return "ERROR", "ERROR"
+	return "ERROR", "ERROR", errors.New("this should not happen - contact developer ;)")
+}
+
+// DistParams is used to supply params to EnrichedRecord.DistanceTo()
+type DistParams struct {
+	ExitCode  float64
+	MachineID float64
+	SessionID float64
+	Login     float64
+	Shell     float64
+	Pwd       float64
+	RealPwd   float64
+	Git       float64
+	Time      float64
+}
+
+// DistanceTo another record
+func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 {
+	var dist float64
+	dist = 0
+
+	// lev distance or something? TODO later
+	// CmdLine
+
+	// exit code
+	if r.ExitCode != r2.ExitCode {
+		if r.ExitCode == 0 || r2.ExitCode == 0 {
+			// one success + one error -> 1
+			dist += 1 * p.ExitCode
+		} else {
+			// two different errors
+			dist += 0.5 * p.ExitCode
+		}
+	}
+
+	// machine/device
+	if r.MachineID != r2.MachineID {
+		dist += 1 * p.MachineID
+	}
+	// Uname
+
+	// session
+	if r.SessionID != r2.SessionID {
+		dist += 1 * p.SessionID
+	}
+	// Pid - add because of nested shells?
+	// SessionPid
+
+	// user
+	if r.Login != r2.Login {
+		dist += 1 * p.Login
+	}
+	// Home
+
+	// shell
+	if r.Shell != r2.Shell {
+		dist += 1 * p.Shell
+	}
+	// ShellEnv
+
+	// pwd
+	if r.Pwd != r2.Pwd {
+		// TODO: compare using hierarchy
+		// TODO: make more important
+		dist += 1 * p.Pwd
+	}
+	if r.RealPwd != r2.RealPwd {
+		// TODO: -||-
+		dist += 1 * p.RealPwd
+	}
+	// PwdAfter
+	// RealPwdAfter
+
+	// git
+	if r.GitDir != r2.GitDir {
+		dist += 1 * p.Git
+	}
+	if r.GitRealDir != r2.GitRealDir {
+		dist += 1 * p.Git
+	}
+	if r.GitOriginRemote != r2.GitOriginRemote {
+		dist += 1 * p.Git
+	}
+
+	// time
+	// this can actually get negative for differences of less than one second which is fine
+	// distance grows by 1 with every order
+	distTime := math.Log10(math.Abs(r.RealtimeBefore-r2.RealtimeBefore)) * p.Time
+	if math.IsNaN(distTime) == false && math.IsInf(distTime, 0) == false {
+		dist += distTime
+	}
+	// RealtimeBeforeLocal
+	// RealtimeAfter
+	// RealtimeAfterLocal
+
+	// TimezoneBefore
+	// TimezoneAfter
+
+	// RealtimeDuration
+	// RealtimeSinceSessionStart - TODO: add later
+	// RealtimeSinceBoot  - TODO: add later
+
+	// device extras
+	// Host
+	// Hosttype
+	// Ostype
+	// Machtype
+	// OsReleaseID
+	// OsReleaseVersionID
+	// OsReleaseIDLike
+	// OsReleaseName
+	// OsReleasePrettyName
+
+	// session extras
+	// Term
+	// Shlvl
+
+	// static info
+	// Lang
+	// LcAll
+
+	// meta
+	// ReshUUID
+	// ReshVersion
+	// ReshRevision
+
+	// added by sanitizatizer
+	// Sanitized
+	// CmdLength
+
+	return dist
 }

 // Config struct
--- a/evaluate/resh-evaluate-plot.py
+++ b/evaluate/resh-evaluate-plot.py
@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
 import matplotlib.path as mpath
 import numpy as np
 from graphviz import Digraph
+from datetime import datetime

 PLOT_WIDTH = 10 # inches
 PLOT_HEIGHT = 7 # inches
@ -274,7 +275,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True):

 def plot_strategies_matches(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Matches at distance")
+    plt.title("Matches at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
    plt.ylabel('%' + " of matches")
    plt.xlabel("Distance")
    legend = []
@ -349,7 +350,7 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]):

 def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Average characters recalled at distance")
+    plt.title("Average characters recalled at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
    plt.ylabel("Average characters recalled")
    plt.xlabel("Distance")
    x_values = range(1, plot_size+1)
@ -420,7 +421,7 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):

 def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Average characters recalled at distance (including prefix matches)")
+    plt.title("Average characters recalled at distance (including prefix matches) <{}>".format(datetime.now().strftime('%H:%M:%S'))) 
    plt.ylabel("Average characters recalled (including prefix matches)")
    plt.xlabel("Distance")
    x_values = range(1, plot_size+1)
@ -493,17 +494,17 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
        plt.show()


-plot_cmdLineFrq_rank()
-plot_cmdFrq_rank()
+# plot_cmdLineFrq_rank()
+# plot_cmdFrq_rank()
        
-plot_cmdLineVocabularySize_cmdLinesEntered()
-plot_cmdVocabularySize_cmdLinesEntered()
+# plot_cmdLineVocabularySize_cmdLinesEntered()
+# plot_cmdVocabularySize_cmdLinesEntered()

 plot_strategies_matches(20)
 plot_strategies_charsRecalled(20)
 plot_strategies_charsRecalled_prefix(20)

-graph_cmdSequences(node_count=33, edge_minValue=0.048)
+# graph_cmdSequences(node_count=33, edge_minValue=0.048)

 # graph_cmdSequences(node_count=28, edge_minValue=0.06)

--- a/evaluate/resh-evaluate.go
+++ b/evaluate/resh-evaluate.go
@ -8,6 +8,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"log"
+	"math/rand"
 	"os"
 	"os/exec"
 	"os/user"
@ -48,6 +49,9 @@ func main() {
 		"Input data root, enables batch mode, looks for files matching --input option")
 	slow := flag.Bool("slow", false,
 		"Enables stuff that takes a long time (e.g. markov chain strategies).")
+	skipFailedCmds := flag.Bool("skip-failed-cmds", false,
+		"Skips records with non-zero exit status.")
+	debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.")

 	flag.Parse()

@ -77,7 +81,8 @@ func main() {
 		}
 	}

-	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode}
+	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates,
+		BatchMode: batchMode, skipFailedCmds: *skipFailedCmds, debugRecords: *debugRecords}
 	if batchMode {
 		err := evaluator.initBatchMode(*input, *inputDataRoot)
 		if err != nil {
@ -95,29 +100,39 @@ func main() {
 	// dummy := strategyDummy{}
 	// strategies = append(strategies, &dummy)

-	recent := strategyRecent{}
+	strategies = append(strategies, &strategyRecent{})
+
 	frequent := strategyFrequent{}
 	frequent.init()
-	directory := strategyDirectorySensitive{}
-	directory.init()
+	strategies = append(strategies, &frequent)
+
 	random := strategyRandom{candidatesSize: maxCandidates}
 	random.init()
+	strategies = append(strategies, &random)

-	markovCmd := strategyMarkovChainCmd{order: 1}
-	markovCmd.init()
+	directory := strategyDirectorySensitive{}
+	directory.init()
+	strategies = append(strategies, &directory)
+
+	if *slow {
+		distanceStaticBest := strategyRecordDistance{
+			distParams: common.DistParams{SessionID: 1, Pwd: 10, RealPwd: 10, Time: 1},
+			label:      "10*pwd,10*realpwd,1*session,time",
+		}
+		strategies = append(strategies, &distanceStaticBest)

-	markovCmd2 := strategyMarkovChainCmd{order: 2}
-	markovCmd2.init()
+		markovCmd := strategyMarkovChainCmd{order: 1}
+		markovCmd.init()

-	markov := strategyMarkovChain{order: 1}
-	markov.init()
+		markovCmd2 := strategyMarkovChainCmd{order: 2}
+		markovCmd2.init()

-	markov2 := strategyMarkovChain{order: 2}
-	markov2.init()
+		markov := strategyMarkovChain{order: 1}
+		markov.init()

-	strategies = append(strategies, &recent, &frequent, &directory, &random)
+		markov2 := strategyMarkovChain{order: 2}
+		markov2.init()

-	if *slow {
 		strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov)
 	}

@ -175,6 +190,8 @@ type evaluator struct {
 	sanitizedInput bool
 	BatchMode      bool
 	maxCandidates  int
+	skipFailedCmds bool
+	debugRecords   float64
 	UsersRecords   []userRecords
 	Strategies     []strategyJSON
 }
@ -235,6 +252,10 @@ func (e *evaluator) processRecords() {
 					}
 					log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
 				}
+				e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id
+				if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
+					e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true
+				}
 			}
 			sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
 				if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
@ -253,8 +274,37 @@ func (e *evaluator) evaluate(strategy strategy) error {
 	for i := range e.UsersRecords {
 		for j := range e.UsersRecords[i].Devices {
 			bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records))
+			var prevRecord common.EnrichedRecord
 			for _, record := range e.UsersRecords[i].Devices[j].Records {
+				if e.skipFailedCmds && record.ExitCode != 0 {
+					continue
+				}
 				candidates := strategy.GetCandidates()
+				if record.DebugThisRecord {
+					log.Println()
+					log.Println("===================================================")
+					log.Println("STRATEGY:", title, "-", description)
+					log.Println("===================================================")
+					log.Println("Previous record:")
+					if prevRecord.RealtimeBefore == 0 {
+						log.Println("== NIL")
+					} else {
+						rec, _ := prevRecord.ToString()
+						log.Println(rec)
+					}
+					log.Println("---------------------------------------------------")
+					log.Println("Recommendations for:")
+					rec, _ := record.ToString()
+					log.Println(rec)
+					log.Println("---------------------------------------------------")
+					for i, candidate := range candidates {
+						if i > 10 {
+							break
+						}
+						log.Println(string(candidate))
+					}
+					log.Println("===================================================")
+				}

 				matchFound := false
 				longestPrefixMatchLength := 0
@ -289,6 +339,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
 					return err
 				}
 				bar.Add(1)
+				prevRecord = record
 			}
 			strategy.ResetHistory()
 			fmt.Println()
--- a/evaluate/strategy-record-distance.go
+++ b/evaluate/strategy-record-distance.go
@ -0,0 +1,68 @@
+package main
+
+import (
+	"sort"
+	"strconv"
+
+	"github.com/curusarn/resh/common"
+)
+
+type strategyRecordDistance struct {
+	history    []common.EnrichedRecord
+	distParams common.DistParams
+	maxDepth   int
+	label      string
+}
+
+type strDistEntry struct {
+	cmdLine  string
+	distance float64
+}
+
+func (s *strategyRecordDistance) init() {
+	s.history = nil
+}
+
+func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) {
+	return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands"
+}
+
+func (s *strategyRecordDistance) GetCandidates() []string {
+	if len(s.history) == 0 {
+		return nil
+	}
+	var prevRecord common.EnrichedRecord
+	prevRecord = s.history[0]
+	prevRecord.SetCmdLine("")
+	prevRecord.SetBeforeToAfter()
+	var mapItems []strDistEntry
+	for i, record := range s.history {
+		if s.maxDepth != 0 && i > s.maxDepth {
+			break
+		}
+		distance := record.DistanceTo(prevRecord, s.distParams)
+		mapItems = append(mapItems, strDistEntry{record.CmdLine, distance})
+	}
+	sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })
+	var hist []string
+	histSet := map[string]bool{}
+	for _, item := range mapItems {
+		if histSet[item.cmdLine] {
+			continue
+		}
+		histSet[item.cmdLine] = true
+		hist = append(hist, item.cmdLine)
+	}
+	return hist
+}
+
+func (s *strategyRecordDistance) AddHistoryRecord(record *common.EnrichedRecord) error {
+	// append record to front
+	s.history = append([]common.EnrichedRecord{*record}, s.history...)
+	return nil
+}
+
+func (s *strategyRecordDistance) ResetHistory() error {
+	s.init()
+	return nil
+}