evaluate: add strategy record distance, misc improvements

pull/15/head
Simon Let 6 years ago
parent ff878a9d79
commit baeb955841
  1. 245
      common/resh-common.go
  2. 17
      evaluate/resh-evaluate-plot.py
  3. 79
      evaluate/resh-evaluate.go
  4. 68
      evaluate/strategy-record-distance.go

@ -1,7 +1,10 @@
package common package common
import ( import (
"encoding/json"
"errors"
"log" "log"
"math"
"strconv" "strconv"
"strings" "strings"
@ -86,10 +89,12 @@ type EnrichedRecord struct {
Record Record
// enriching fields - added "later" // enriching fields - added "later"
Command string `json:"command"` Command string `json:"command"`
FirstWord string `json:"firstWord"` FirstWord string `json:"firstWord"`
Invalid bool `json:"invalid"` Invalid bool `json:"invalid"`
SeqSessionID uint64 `json:"seqSessionId"` SeqSessionID uint64 `json:"seqSessionId"`
DebugThisRecord bool `json:"debugThisRecord"`
Errors []string `json:"errors"`
// SeqSessionID uint64 `json:"seqSessionId,omitempty"` // SeqSessionID uint64 `json:"seqSessionId,omitempty"`
} }
@ -112,14 +117,33 @@ func ConvertRecord(r *FallbackRecord) Record {
} }
} }
// ToString - returns record the json
func (r EnrichedRecord) ToString() (string, error) {
jsonRec, err := json.Marshal(r)
if err != nil {
return "marshalling error", err
}
return string(jsonRec), nil
}
// Enrich - adds additional fields to the record // Enrich - adds additional fields to the record
func (r Record) Enrich() EnrichedRecord { func (r Record) Enrich() EnrichedRecord {
record := EnrichedRecord{Record: r} record := EnrichedRecord{Record: r}
// Get command/first word from commandline // Get command/first word from commandline
record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine) var err error
err := r.Validate() record.Command, record.FirstWord, err = GetCommandAndFirstWord(r.CmdLine)
if err != nil { if err != nil {
log.Println("Invalid command:", r.CmdLine) record.Errors = append(record.Errors, "GetCommandAndFirstWord error:"+err.Error())
rec, _ := record.ToString()
log.Println("Invalid command:", rec)
record.Invalid = true
return record
}
err = r.Validate()
if err != nil {
record.Errors = append(record.Errors, "Validate error:"+err.Error())
rec, _ := record.ToString()
log.Println("Invalid command:", rec)
record.Invalid = true record.Invalid = true
} }
return record return record
@ -128,18 +152,85 @@ func (r Record) Enrich() EnrichedRecord {
// Validate - returns error if the record is invalid // Validate - returns error if the record is invalid
func (r *Record) Validate() error { func (r *Record) Validate() error {
if r.RealtimeBefore == 0 || r.RealtimeAfter == 0 {
return errors.New("There is no Time")
}
if r.RealPwd == "" || r.RealPwdAfter == "" {
return errors.New("There is no Real Pwd")
}
if r.Pwd == "" || r.PwdAfter == "" {
return errors.New("There is no Pwd")
}
// TimezoneBefore
// TimezoneAfter
// RealtimeDuration
// RealtimeSinceSessionStart - TODO: add later
// RealtimeSinceBoot - TODO: add later
// device extras
// Host
// Hosttype
// Ostype
// Machtype
// OsReleaseID
// OsReleaseVersionID
// OsReleaseIDLike
// OsReleaseName
// OsReleasePrettyName
// session extras
// Term
// Shlvl
// static info
// Lang
// LcAll
// meta
// ReshUUID
// ReshVersion
// ReshRevision
// added by sanitizatizer
// Sanitized
// CmdLength
return nil return nil
} }
// SetCmdLine sets cmdLine and related members
func (r *EnrichedRecord) SetCmdLine(cmdLine string) {
r.CmdLine = cmdLine
r.CmdLength = len(cmdLine)
r.ExitCode = 0
var err error
r.Command, r.FirstWord, err = GetCommandAndFirstWord(cmdLine)
if err != nil {
r.Errors = append(r.Errors, "GetCommandAndFirstWord error:"+err.Error())
// log.Println("Invalid command:", r.CmdLine)
r.Invalid = true
}
}
// SetBeforeToAfter - set "before" members to "after" members
func (r *EnrichedRecord) SetBeforeToAfter() {
r.Pwd = r.PwdAfter
r.RealPwd = r.RealPwdAfter
// r.TimezoneBefore = r.TimezoneAfter
// r.RealtimeBefore = r.RealtimeAfter
// r.RealtimeBeforeLocal = r.RealtimeAfterLocal
}
// GetCommandAndFirstWord func // GetCommandAndFirstWord func
func GetCommandAndFirstWord(cmdLine string) (string, string) { func GetCommandAndFirstWord(cmdLine string) (string, string, error) {
args, err := shellwords.Parse(cmdLine) args, err := shellwords.Parse(cmdLine)
if err != nil { if err != nil {
log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
return "<shellwords_error>", "<shellwords_error>" return "", "", err
} }
if len(args) == 0 { if len(args) == 0 {
return "", "" return "", "", nil
} }
i := 0 i := 0
for true { for true {
@ -149,10 +240,140 @@ func GetCommandAndFirstWord(cmdLine string) (string, string) {
i++ i++
continue continue
} }
return args[i], args[0] return args[i], args[0], nil
} }
log.Fatal("GetCommandAndFirstWord error: this should not happen!") log.Fatal("GetCommandAndFirstWord error: this should not happen!")
return "ERROR", "ERROR" return "ERROR", "ERROR", errors.New("this should not happen - contact developer ;)")
}
// DistParams is used to supply params to EnrichedRecord.DistanceTo()
type DistParams struct {
ExitCode float64
MachineID float64
SessionID float64
Login float64
Shell float64
Pwd float64
RealPwd float64
Git float64
Time float64
}
// DistanceTo another record
func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 {
var dist float64
dist = 0
// lev distance or something? TODO later
// CmdLine
// exit code
if r.ExitCode != r2.ExitCode {
if r.ExitCode == 0 || r2.ExitCode == 0 {
// one success + one error -> 1
dist += 1 * p.ExitCode
} else {
// two different errors
dist += 0.5 * p.ExitCode
}
}
// machine/device
if r.MachineID != r2.MachineID {
dist += 1 * p.MachineID
}
// Uname
// session
if r.SessionID != r2.SessionID {
dist += 1 * p.SessionID
}
// Pid - add because of nested shells?
// SessionPid
// user
if r.Login != r2.Login {
dist += 1 * p.Login
}
// Home
// shell
if r.Shell != r2.Shell {
dist += 1 * p.Shell
}
// ShellEnv
// pwd
if r.Pwd != r2.Pwd {
// TODO: compare using hierarchy
// TODO: make more important
dist += 1 * p.Pwd
}
if r.RealPwd != r2.RealPwd {
// TODO: -||-
dist += 1 * p.RealPwd
}
// PwdAfter
// RealPwdAfter
// git
if r.GitDir != r2.GitDir {
dist += 1 * p.Git
}
if r.GitRealDir != r2.GitRealDir {
dist += 1 * p.Git
}
if r.GitOriginRemote != r2.GitOriginRemote {
dist += 1 * p.Git
}
// time
// this can actually get negative for differences of less than one second which is fine
// distance grows by 1 with every order
distTime := math.Log10(math.Abs(r.RealtimeBefore-r2.RealtimeBefore)) * p.Time
if math.IsNaN(distTime) == false && math.IsInf(distTime, 0) == false {
dist += distTime
}
// RealtimeBeforeLocal
// RealtimeAfter
// RealtimeAfterLocal
// TimezoneBefore
// TimezoneAfter
// RealtimeDuration
// RealtimeSinceSessionStart - TODO: add later
// RealtimeSinceBoot - TODO: add later
// device extras
// Host
// Hosttype
// Ostype
// Machtype
// OsReleaseID
// OsReleaseVersionID
// OsReleaseIDLike
// OsReleaseName
// OsReleasePrettyName
// session extras
// Term
// Shlvl
// static info
// Lang
// LcAll
// meta
// ReshUUID
// ReshVersion
// ReshRevision
// added by sanitizatizer
// Sanitized
// CmdLength
return dist
} }
// Config struct // Config struct

@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
import matplotlib.path as mpath import matplotlib.path as mpath
import numpy as np import numpy as np
from graphviz import Digraph from graphviz import Digraph
from datetime import datetime
PLOT_WIDTH = 10 # inches PLOT_WIDTH = 10 # inches
PLOT_HEIGHT = 7 # inches PLOT_HEIGHT = 7 # inches
@ -274,7 +275,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True):
def plot_strategies_matches(plot_size=50, selected_strategies=[]): def plot_strategies_matches(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Matches at distance") plt.title("Matches at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel('%' + " of matches") plt.ylabel('%' + " of matches")
plt.xlabel("Distance") plt.xlabel("Distance")
legend = [] legend = []
@ -349,7 +350,7 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]):
def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Average characters recalled at distance") plt.title("Average characters recalled at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel("Average characters recalled") plt.ylabel("Average characters recalled")
plt.xlabel("Distance") plt.xlabel("Distance")
x_values = range(1, plot_size+1) x_values = range(1, plot_size+1)
@ -420,7 +421,7 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]): def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Average characters recalled at distance (including prefix matches)") plt.title("Average characters recalled at distance (including prefix matches) <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel("Average characters recalled (including prefix matches)") plt.ylabel("Average characters recalled (including prefix matches)")
plt.xlabel("Distance") plt.xlabel("Distance")
x_values = range(1, plot_size+1) x_values = range(1, plot_size+1)
@ -493,17 +494,17 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
plt.show() plt.show()
plot_cmdLineFrq_rank() # plot_cmdLineFrq_rank()
plot_cmdFrq_rank() # plot_cmdFrq_rank()
plot_cmdLineVocabularySize_cmdLinesEntered() # plot_cmdLineVocabularySize_cmdLinesEntered()
plot_cmdVocabularySize_cmdLinesEntered() # plot_cmdVocabularySize_cmdLinesEntered()
plot_strategies_matches(20) plot_strategies_matches(20)
plot_strategies_charsRecalled(20) plot_strategies_charsRecalled(20)
plot_strategies_charsRecalled_prefix(20) plot_strategies_charsRecalled_prefix(20)
graph_cmdSequences(node_count=33, edge_minValue=0.048) # graph_cmdSequences(node_count=33, edge_minValue=0.048)
# graph_cmdSequences(node_count=28, edge_minValue=0.06) # graph_cmdSequences(node_count=28, edge_minValue=0.06)

@ -8,6 +8,7 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"math/rand"
"os" "os"
"os/exec" "os/exec"
"os/user" "os/user"
@ -48,6 +49,9 @@ func main() {
"Input data root, enables batch mode, looks for files matching --input option") "Input data root, enables batch mode, looks for files matching --input option")
slow := flag.Bool("slow", false, slow := flag.Bool("slow", false,
"Enables stuff that takes a long time (e.g. markov chain strategies).") "Enables stuff that takes a long time (e.g. markov chain strategies).")
skipFailedCmds := flag.Bool("skip-failed-cmds", false,
"Skips records with non-zero exit status.")
debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.")
flag.Parse() flag.Parse()
@ -77,7 +81,8 @@ func main() {
} }
} }
evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode} evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates,
BatchMode: batchMode, skipFailedCmds: *skipFailedCmds, debugRecords: *debugRecords}
if batchMode { if batchMode {
err := evaluator.initBatchMode(*input, *inputDataRoot) err := evaluator.initBatchMode(*input, *inputDataRoot)
if err != nil { if err != nil {
@ -95,29 +100,39 @@ func main() {
// dummy := strategyDummy{} // dummy := strategyDummy{}
// strategies = append(strategies, &dummy) // strategies = append(strategies, &dummy)
recent := strategyRecent{} strategies = append(strategies, &strategyRecent{})
frequent := strategyFrequent{} frequent := strategyFrequent{}
frequent.init() frequent.init()
directory := strategyDirectorySensitive{} strategies = append(strategies, &frequent)
directory.init()
random := strategyRandom{candidatesSize: maxCandidates} random := strategyRandom{candidatesSize: maxCandidates}
random.init() random.init()
strategies = append(strategies, &random)
markovCmd := strategyMarkovChainCmd{order: 1} directory := strategyDirectorySensitive{}
markovCmd.init() directory.init()
strategies = append(strategies, &directory)
if *slow {
distanceStaticBest := strategyRecordDistance{
distParams: common.DistParams{SessionID: 1, Pwd: 10, RealPwd: 10, Time: 1},
label: "10*pwd,10*realpwd,1*session,time",
}
strategies = append(strategies, &distanceStaticBest)
markovCmd2 := strategyMarkovChainCmd{order: 2} markovCmd := strategyMarkovChainCmd{order: 1}
markovCmd2.init() markovCmd.init()
markov := strategyMarkovChain{order: 1} markovCmd2 := strategyMarkovChainCmd{order: 2}
markov.init() markovCmd2.init()
markov2 := strategyMarkovChain{order: 2} markov := strategyMarkovChain{order: 1}
markov2.init() markov.init()
strategies = append(strategies, &recent, &frequent, &directory, &random) markov2 := strategyMarkovChain{order: 2}
markov2.init()
if *slow {
strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov) strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov)
} }
@ -175,6 +190,8 @@ type evaluator struct {
sanitizedInput bool sanitizedInput bool
BatchMode bool BatchMode bool
maxCandidates int maxCandidates int
skipFailedCmds bool
debugRecords float64
UsersRecords []userRecords UsersRecords []userRecords
Strategies []strategyJSON Strategies []strategyJSON
} }
@ -235,6 +252,10 @@ func (e *evaluator) processRecords() {
} }
log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
} }
e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id
if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true
}
} }
sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
@ -253,8 +274,37 @@ func (e *evaluator) evaluate(strategy strategy) error {
for i := range e.UsersRecords { for i := range e.UsersRecords {
for j := range e.UsersRecords[i].Devices { for j := range e.UsersRecords[i].Devices {
bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records)) bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records))
var prevRecord common.EnrichedRecord
for _, record := range e.UsersRecords[i].Devices[j].Records { for _, record := range e.UsersRecords[i].Devices[j].Records {
if e.skipFailedCmds && record.ExitCode != 0 {
continue
}
candidates := strategy.GetCandidates() candidates := strategy.GetCandidates()
if record.DebugThisRecord {
log.Println()
log.Println("===================================================")
log.Println("STRATEGY:", title, "-", description)
log.Println("===================================================")
log.Println("Previous record:")
if prevRecord.RealtimeBefore == 0 {
log.Println("== NIL")
} else {
rec, _ := prevRecord.ToString()
log.Println(rec)
}
log.Println("---------------------------------------------------")
log.Println("Recommendations for:")
rec, _ := record.ToString()
log.Println(rec)
log.Println("---------------------------------------------------")
for i, candidate := range candidates {
if i > 10 {
break
}
log.Println(string(candidate))
}
log.Println("===================================================")
}
matchFound := false matchFound := false
longestPrefixMatchLength := 0 longestPrefixMatchLength := 0
@ -289,6 +339,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
return err return err
} }
bar.Add(1) bar.Add(1)
prevRecord = record
} }
strategy.ResetHistory() strategy.ResetHistory()
fmt.Println() fmt.Println()

@ -0,0 +1,68 @@
package main
import (
"sort"
"strconv"
"github.com/curusarn/resh/common"
)
type strategyRecordDistance struct {
history []common.EnrichedRecord
distParams common.DistParams
maxDepth int
label string
}
type strDistEntry struct {
cmdLine string
distance float64
}
func (s *strategyRecordDistance) init() {
s.history = nil
}
func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) {
return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands"
}
func (s *strategyRecordDistance) GetCandidates() []string {
if len(s.history) == 0 {
return nil
}
var prevRecord common.EnrichedRecord
prevRecord = s.history[0]
prevRecord.SetCmdLine("")
prevRecord.SetBeforeToAfter()
var mapItems []strDistEntry
for i, record := range s.history {
if s.maxDepth != 0 && i > s.maxDepth {
break
}
distance := record.DistanceTo(prevRecord, s.distParams)
mapItems = append(mapItems, strDistEntry{record.CmdLine, distance})
}
sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })
var hist []string
histSet := map[string]bool{}
for _, item := range mapItems {
if histSet[item.cmdLine] {
continue
}
histSet[item.cmdLine] = true
hist = append(hist, item.cmdLine)
}
return hist
}
func (s *strategyRecordDistance) AddHistoryRecord(record *common.EnrichedRecord) error {
// append record to front
s.history = append([]common.EnrichedRecord{*record}, s.history...)
return nil
}
func (s *strategyRecordDistance) ResetHistory() error {
s.init()
return nil
}
Loading…
Cancel
Save