evaluate: add strategy record distance, misc improvements

7 years ago · baeb955841
parent ff878a9d79
commit baeb955841
4 changed files with 375 additions and 34 deletions
--- a/common/resh-common.go
+++ b/common/resh-common.go
@ -1,7 +1,10 @@
 package common
 import (
 	"encoding/json"
 	"errors"
 	"log"
 	"math"
 	"strconv"
 	"strings"
@ -86,10 +89,12 @@ type EnrichedRecord struct {
 	Record
 	// enriching fields - added "later"
-	Command      string `json:"command"`
+	Command         string   `json:"command"`
-	FirstWord    string `json:"firstWord"`
+	FirstWord       string   `json:"firstWord"`
-	Invalid      bool   `json:"invalid"`
+	Invalid         bool     `json:"invalid"`
-	SeqSessionID uint64 `json:"seqSessionId"`
+	SeqSessionID    uint64   `json:"seqSessionId"`
 	DebugThisRecord bool     `json:"debugThisRecord"`
 	Errors          []string `json:"errors"`
 	// SeqSessionID uint64 `json:"seqSessionId,omitempty"`
 }
@ -112,14 +117,33 @@ func ConvertRecord(r *FallbackRecord) Record {
 	}
 }
 // ToString - returns record the json
 func (r EnrichedRecord) ToString() (string, error) {
 	jsonRec, err := json.Marshal(r)
 	if err != nil {
 		return "marshalling error", err
 	}
 	return string(jsonRec), nil
 }
 // Enrich - adds additional fields to the record
 func (r Record) Enrich() EnrichedRecord {
 	record := EnrichedRecord{Record: r}
 	// Get command/first word from commandline
-	record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine)
+	var err error
-	err := r.Validate()
+	record.Command, record.FirstWord, err = GetCommandAndFirstWord(r.CmdLine)
 	if err != nil {
-		log.Println("Invalid command:", r.CmdLine)
+		record.Errors = append(record.Errors, "GetCommandAndFirstWord error:"+err.Error())
 		rec, _ := record.ToString()
 		log.Println("Invalid command:", rec)
 		record.Invalid = true
 		return record
 	}
 	err = r.Validate()
 	if err != nil {
 		record.Errors = append(record.Errors, "Validate error:"+err.Error())
 		rec, _ := record.ToString()
 		log.Println("Invalid command:", rec)
 		record.Invalid = true
 	}
 	return record
@ -128,18 +152,85 @@ func (r Record) Enrich() EnrichedRecord {
 // Validate - returns error if the record is invalid
 func (r *Record) Validate() error {
 	if r.RealtimeBefore == 0 || r.RealtimeAfter == 0 {
 		return errors.New("There is no Time")
 	}
 	if r.RealPwd == "" || r.RealPwdAfter == "" {
 		return errors.New("There is no Real Pwd")
 	}
 	if r.Pwd == "" || r.PwdAfter == "" {
 		return errors.New("There is no Pwd")
 	}
 	// TimezoneBefore
 	// TimezoneAfter
 	// RealtimeDuration
 	// RealtimeSinceSessionStart - TODO: add later
 	// RealtimeSinceBoot  - TODO: add later
 	// device extras
 	// Host
 	// Hosttype
 	// Ostype
 	// Machtype
 	// OsReleaseID
 	// OsReleaseVersionID
 	// OsReleaseIDLike
 	// OsReleaseName
 	// OsReleasePrettyName
 	// session extras
 	// Term
 	// Shlvl
 	// static info
 	// Lang
 	// LcAll
 	// meta
 	// ReshUUID
 	// ReshVersion
 	// ReshRevision
 	// added by sanitizatizer
 	// Sanitized
 	// CmdLength
 	return nil
 }
 // SetCmdLine sets cmdLine and related members
 func (r *EnrichedRecord) SetCmdLine(cmdLine string) {
 	r.CmdLine = cmdLine
 	r.CmdLength = len(cmdLine)
 	r.ExitCode = 0
 	var err error
 	r.Command, r.FirstWord, err = GetCommandAndFirstWord(cmdLine)
 	if err != nil {
 		r.Errors = append(r.Errors, "GetCommandAndFirstWord error:"+err.Error())
 		// log.Println("Invalid command:", r.CmdLine)
 		r.Invalid = true
 	}
 }
 // SetBeforeToAfter - set "before" members to "after" members
 func (r *EnrichedRecord) SetBeforeToAfter() {
 	r.Pwd = r.PwdAfter
 	r.RealPwd = r.RealPwdAfter
 	// r.TimezoneBefore = r.TimezoneAfter
 	// r.RealtimeBefore = r.RealtimeAfter
 	// r.RealtimeBeforeLocal = r.RealtimeAfterLocal
 }
 // GetCommandAndFirstWord func
-func GetCommandAndFirstWord(cmdLine string) (string, string) {
+func GetCommandAndFirstWord(cmdLine string) (string, string, error) {
 	args, err := shellwords.Parse(cmdLine)
 	if err != nil {
 		log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
-		return "<shellwords_error>", "<shellwords_error>"
+		return "", "", err
 	}
 	if len(args) == 0 {
-		return "", ""
+		return "", "", nil
 	}
 	i := 0
 	for true {
@ -149,10 +240,140 @@ func GetCommandAndFirstWord(cmdLine string) (string, string) {
 			i++
 			continue
 		}
-		return args[i], args[0]
+		return args[i], args[0], nil
 	}
 	log.Fatal("GetCommandAndFirstWord error: this should not happen!")
-	return "ERROR", "ERROR"
+	return "ERROR", "ERROR", errors.New("this should not happen - contact developer ;)")
 }
 // DistParams is used to supply params to EnrichedRecord.DistanceTo()
 type DistParams struct {
 	ExitCode  float64
 	MachineID float64
 	SessionID float64
 	Login     float64
 	Shell     float64
 	Pwd       float64
 	RealPwd   float64
 	Git       float64
 	Time      float64
 }
 // DistanceTo another record
 func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 {
 	var dist float64
 	dist = 0
 	// lev distance or something? TODO later
 	// CmdLine
 	// exit code
 	if r.ExitCode != r2.ExitCode {
 		if r.ExitCode == 0 || r2.ExitCode == 0 {
 			// one success + one error -> 1
 			dist += 1 * p.ExitCode
 		} else {
 			// two different errors
 			dist += 0.5 * p.ExitCode
 		}
 	}
 	// machine/device
 	if r.MachineID != r2.MachineID {
 		dist += 1 * p.MachineID
 	}
 	// Uname
 	// session
 	if r.SessionID != r2.SessionID {
 		dist += 1 * p.SessionID
 	}
 	// Pid - add because of nested shells?
 	// SessionPid
 	// user
 	if r.Login != r2.Login {
 		dist += 1 * p.Login
 	}
 	// Home
 	// shell
 	if r.Shell != r2.Shell {
 		dist += 1 * p.Shell
 	}
 	// ShellEnv
 	// pwd
 	if r.Pwd != r2.Pwd {
 		// TODO: compare using hierarchy
 		// TODO: make more important
 		dist += 1 * p.Pwd
 	}
 	if r.RealPwd != r2.RealPwd {
 		// TODO: -||-
 		dist += 1 * p.RealPwd
 	}
 	// PwdAfter
 	// RealPwdAfter
 	// git
 	if r.GitDir != r2.GitDir {
 		dist += 1 * p.Git
 	}
 	if r.GitRealDir != r2.GitRealDir {
 		dist += 1 * p.Git
 	}
 	if r.GitOriginRemote != r2.GitOriginRemote {
 		dist += 1 * p.Git
 	}
 	// time
 	// this can actually get negative for differences of less than one second which is fine
 	// distance grows by 1 with every order
 	distTime := math.Log10(math.Abs(r.RealtimeBefore-r2.RealtimeBefore)) * p.Time
 	if math.IsNaN(distTime) == false && math.IsInf(distTime, 0) == false {
 		dist += distTime
 	}
 	// RealtimeBeforeLocal
 	// RealtimeAfter
 	// RealtimeAfterLocal
 	// TimezoneBefore
 	// TimezoneAfter
 	// RealtimeDuration
 	// RealtimeSinceSessionStart - TODO: add later
 	// RealtimeSinceBoot  - TODO: add later
 	// device extras
 	// Host
 	// Hosttype
 	// Ostype
 	// Machtype
 	// OsReleaseID
 	// OsReleaseVersionID
 	// OsReleaseIDLike
 	// OsReleaseName
 	// OsReleasePrettyName
 	// session extras
 	// Term
 	// Shlvl
 	// static info
 	// Lang
 	// LcAll
 	// meta
 	// ReshUUID
 	// ReshVersion
 	// ReshRevision
 	// added by sanitizatizer
 	// Sanitized
 	// CmdLength
 	return dist
 }
 // Config struct
--- a/evaluate/resh-evaluate-plot.py
+++ b/evaluate/resh-evaluate-plot.py
@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
 import matplotlib.path as mpath
 import numpy as np
 from graphviz import Digraph
 from datetime import datetime
 PLOT_WIDTH = 10 # inches
 PLOT_HEIGHT = 7 # inches
@ -274,7 +275,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True):
 def plot_strategies_matches(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Matches at distance")
+    plt.title("Matches at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
    plt.ylabel('%' + " of matches")
    plt.xlabel("Distance")
    legend = []
@ -349,7 +350,7 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]):
 def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Average characters recalled at distance")
+    plt.title("Average characters recalled at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
    plt.ylabel("Average characters recalled")
    plt.xlabel("Distance")
    x_values = range(1, plot_size+1)
@ -420,7 +421,7 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
 def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
-    plt.title("Average characters recalled at distance (including prefix matches)")
+    plt.title("Average characters recalled at distance (including prefix matches) <{}>".format(datetime.now().strftime('%H:%M:%S'))) 
    plt.ylabel("Average characters recalled (including prefix matches)")
    plt.xlabel("Distance")
    x_values = range(1, plot_size+1)
@ -493,17 +494,17 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
        plt.show()
-plot_cmdLineFrq_rank()
+# plot_cmdLineFrq_rank()
-plot_cmdFrq_rank()
+# plot_cmdFrq_rank()
-plot_cmdLineVocabularySize_cmdLinesEntered()
+# plot_cmdLineVocabularySize_cmdLinesEntered()
-plot_cmdVocabularySize_cmdLinesEntered()
+# plot_cmdVocabularySize_cmdLinesEntered()
 plot_strategies_matches(20)
 plot_strategies_charsRecalled(20)
 plot_strategies_charsRecalled_prefix(20)
-graph_cmdSequences(node_count=33, edge_minValue=0.048)
+# graph_cmdSequences(node_count=33, edge_minValue=0.048)
 # graph_cmdSequences(node_count=28, edge_minValue=0.06)
--- a/evaluate/resh-evaluate.go
+++ b/evaluate/resh-evaluate.go
@ -8,6 +8,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"log"
 	"math/rand"
 	"os"
 	"os/exec"
 	"os/user"
@ -48,6 +49,9 @@ func main() {
 		"Input data root, enables batch mode, looks for files matching --input option")
 	slow := flag.Bool("slow", false,
 		"Enables stuff that takes a long time (e.g. markov chain strategies).")
 	skipFailedCmds := flag.Bool("skip-failed-cmds", false,
 		"Skips records with non-zero exit status.")
 	debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.")
 	flag.Parse()
@ -77,7 +81,8 @@ func main() {
 		}
 	}
-	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode}
+	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates,
 		BatchMode: batchMode, skipFailedCmds: *skipFailedCmds, debugRecords: *debugRecords}
 	if batchMode {
 		err := evaluator.initBatchMode(*input, *inputDataRoot)
 		if err != nil {
@ -95,29 +100,39 @@ func main() {
 	// dummy := strategyDummy{}
 	// strategies = append(strategies, &dummy)
-	recent := strategyRecent{}
+	strategies = append(strategies, &strategyRecent{})
 	frequent := strategyFrequent{}
 	frequent.init()
-	directory := strategyDirectorySensitive{}
+	strategies = append(strategies, &frequent)
-	directory.init()
+
 	random := strategyRandom{candidatesSize: maxCandidates}
 	random.init()
 	strategies = append(strategies, &random)
-	markovCmd := strategyMarkovChainCmd{order: 1}
+	directory := strategyDirectorySensitive{}
-	markovCmd.init()
+	directory.init()
 	strategies = append(strategies, &directory)
 	if *slow {
 		distanceStaticBest := strategyRecordDistance{
 			distParams: common.DistParams{SessionID: 1, Pwd: 10, RealPwd: 10, Time: 1},
 			label:      "10*pwd,10*realpwd,1*session,time",
 		}
 		strategies = append(strategies, &distanceStaticBest)
-	markovCmd2 := strategyMarkovChainCmd{order: 2}
+		markovCmd := strategyMarkovChainCmd{order: 1}
-	markovCmd2.init()
+		markovCmd.init()
-	markov := strategyMarkovChain{order: 1}
+		markovCmd2 := strategyMarkovChainCmd{order: 2}
-	markov.init()
+		markovCmd2.init()
-	markov2 := strategyMarkovChain{order: 2}
+		markov := strategyMarkovChain{order: 1}
-	markov2.init()
+		markov.init()
-	strategies = append(strategies, &recent, &frequent, &directory, &random)
+		markov2 := strategyMarkovChain{order: 2}
 		markov2.init()
 	if *slow {
 		strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov)
 	}
@ -175,6 +190,8 @@ type evaluator struct {
 	sanitizedInput bool
 	BatchMode      bool
 	maxCandidates  int
 	skipFailedCmds bool
 	debugRecords   float64
 	UsersRecords   []userRecords
 	Strategies     []strategyJSON
 }
@ -235,6 +252,10 @@ func (e *evaluator) processRecords() {
 					}
 					log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
 				}
 				e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id
 				if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
 					e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true
 				}
 			}
 			sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
 				if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
@ -253,8 +274,37 @@ func (e *evaluator) evaluate(strategy strategy) error {
 	for i := range e.UsersRecords {
 		for j := range e.UsersRecords[i].Devices {
 			bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records))
 			var prevRecord common.EnrichedRecord
 			for _, record := range e.UsersRecords[i].Devices[j].Records {
 				if e.skipFailedCmds && record.ExitCode != 0 {
 					continue
 				}
 				candidates := strategy.GetCandidates()
 				if record.DebugThisRecord {
 					log.Println()
 					log.Println("===================================================")
 					log.Println("STRATEGY:", title, "-", description)
 					log.Println("===================================================")
 					log.Println("Previous record:")
 					if prevRecord.RealtimeBefore == 0 {
 						log.Println("== NIL")
 					} else {
 						rec, _ := prevRecord.ToString()
 						log.Println(rec)
 					}
 					log.Println("---------------------------------------------------")
 					log.Println("Recommendations for:")
 					rec, _ := record.ToString()
 					log.Println(rec)
 					log.Println("---------------------------------------------------")
 					for i, candidate := range candidates {
 						if i > 10 {
 							break
 						}
 						log.Println(string(candidate))
 					}
 					log.Println("===================================================")
 				}
 				matchFound := false
 				longestPrefixMatchLength := 0
@ -289,6 +339,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
 					return err
 				}
 				bar.Add(1)
 				prevRecord = record
 			}
 			strategy.ResetHistory()
 			fmt.Println()
--- a/evaluate/strategy-record-distance.go
+++ b/evaluate/strategy-record-distance.go
@ -0,0 +1,68 @@
 package main
 import (
 	"sort"
 	"strconv"
 	"github.com/curusarn/resh/common"
 )
 type strategyRecordDistance struct {
 	history    []common.EnrichedRecord
 	distParams common.DistParams
 	maxDepth   int
 	label      string
 }
 type strDistEntry struct {
 	cmdLine  string
 	distance float64
 }
 func (s *strategyRecordDistance) init() {
 	s.history = nil
 }
 func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) {
 	return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands"
 }
 func (s *strategyRecordDistance) GetCandidates() []string {
 	if len(s.history) == 0 {
 		return nil
 	}
 	var prevRecord common.EnrichedRecord
 	prevRecord = s.history[0]
 	prevRecord.SetCmdLine("")
 	prevRecord.SetBeforeToAfter()
 	var mapItems []strDistEntry
 	for i, record := range s.history {
 		if s.maxDepth != 0 && i > s.maxDepth {
 			break
 		}
 		distance := record.DistanceTo(prevRecord, s.distParams)
 		mapItems = append(mapItems, strDistEntry{record.CmdLine, distance})
 	}
 	sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })
 	var hist []string
 	histSet := map[string]bool{}
 	for _, item := range mapItems {
 		if histSet[item.cmdLine] {
 			continue
 		}
 		histSet[item.cmdLine] = true
 		hist = append(hist, item.cmdLine)
 	}
 	return hist
 }
 func (s *strategyRecordDistance) AddHistoryRecord(record *common.EnrichedRecord) error {
 	// append record to front
 	s.history = append([]common.EnrichedRecord{*record}, s.history...)
 	return nil
 }
 func (s *strategyRecordDistance) ResetHistory() error {
 	s.init()
 	return nil
 }