evaluate: add strategy record distance, misc improvements

pull/15/head
Simon Let 6 years ago
parent ff878a9d79
commit baeb955841
  1. 245
      common/resh-common.go
  2. 17
      evaluate/resh-evaluate-plot.py
  3. 79
      evaluate/resh-evaluate.go
  4. 68
      evaluate/strategy-record-distance.go

@ -1,7 +1,10 @@
package common
import (
"encoding/json"
"errors"
"log"
"math"
"strconv"
"strings"
@ -86,10 +89,12 @@ type EnrichedRecord struct {
Record
// enriching fields - added "later"
Command string `json:"command"`
FirstWord string `json:"firstWord"`
Invalid bool `json:"invalid"`
SeqSessionID uint64 `json:"seqSessionId"`
Command string `json:"command"`
FirstWord string `json:"firstWord"`
Invalid bool `json:"invalid"`
SeqSessionID uint64 `json:"seqSessionId"`
DebugThisRecord bool `json:"debugThisRecord"`
Errors []string `json:"errors"`
// SeqSessionID uint64 `json:"seqSessionId,omitempty"`
}
@ -112,14 +117,33 @@ func ConvertRecord(r *FallbackRecord) Record {
}
}
// ToString - returns record the json
func (r EnrichedRecord) ToString() (string, error) {
jsonRec, err := json.Marshal(r)
if err != nil {
return "marshalling error", err
}
return string(jsonRec), nil
}
// Enrich - adds additional fields to the record
func (r Record) Enrich() EnrichedRecord {
record := EnrichedRecord{Record: r}
// Get command/first word from commandline
record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine)
err := r.Validate()
var err error
record.Command, record.FirstWord, err = GetCommandAndFirstWord(r.CmdLine)
if err != nil {
log.Println("Invalid command:", r.CmdLine)
record.Errors = append(record.Errors, "GetCommandAndFirstWord error:"+err.Error())
rec, _ := record.ToString()
log.Println("Invalid command:", rec)
record.Invalid = true
return record
}
err = r.Validate()
if err != nil {
record.Errors = append(record.Errors, "Validate error:"+err.Error())
rec, _ := record.ToString()
log.Println("Invalid command:", rec)
record.Invalid = true
}
return record
@ -128,18 +152,85 @@ func (r Record) Enrich() EnrichedRecord {
// Validate - returns error if the record is invalid
func (r *Record) Validate() error {
if r.RealtimeBefore == 0 || r.RealtimeAfter == 0 {
return errors.New("There is no Time")
}
if r.RealPwd == "" || r.RealPwdAfter == "" {
return errors.New("There is no Real Pwd")
}
if r.Pwd == "" || r.PwdAfter == "" {
return errors.New("There is no Pwd")
}
// TimezoneBefore
// TimezoneAfter
// RealtimeDuration
// RealtimeSinceSessionStart - TODO: add later
// RealtimeSinceBoot - TODO: add later
// device extras
// Host
// Hosttype
// Ostype
// Machtype
// OsReleaseID
// OsReleaseVersionID
// OsReleaseIDLike
// OsReleaseName
// OsReleasePrettyName
// session extras
// Term
// Shlvl
// static info
// Lang
// LcAll
// meta
// ReshUUID
// ReshVersion
// ReshRevision
// added by sanitizatizer
// Sanitized
// CmdLength
return nil
}
// SetCmdLine sets cmdLine and related members
func (r *EnrichedRecord) SetCmdLine(cmdLine string) {
r.CmdLine = cmdLine
r.CmdLength = len(cmdLine)
r.ExitCode = 0
var err error
r.Command, r.FirstWord, err = GetCommandAndFirstWord(cmdLine)
if err != nil {
r.Errors = append(r.Errors, "GetCommandAndFirstWord error:"+err.Error())
// log.Println("Invalid command:", r.CmdLine)
r.Invalid = true
}
}
// SetBeforeToAfter - set "before" members to "after" members
func (r *EnrichedRecord) SetBeforeToAfter() {
r.Pwd = r.PwdAfter
r.RealPwd = r.RealPwdAfter
// r.TimezoneBefore = r.TimezoneAfter
// r.RealtimeBefore = r.RealtimeAfter
// r.RealtimeBeforeLocal = r.RealtimeAfterLocal
}
// GetCommandAndFirstWord func
func GetCommandAndFirstWord(cmdLine string) (string, string) {
func GetCommandAndFirstWord(cmdLine string) (string, string, error) {
args, err := shellwords.Parse(cmdLine)
if err != nil {
log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
return "<shellwords_error>", "<shellwords_error>"
return "", "", err
}
if len(args) == 0 {
return "", ""
return "", "", nil
}
i := 0
for true {
@ -149,10 +240,140 @@ func GetCommandAndFirstWord(cmdLine string) (string, string) {
i++
continue
}
return args[i], args[0]
return args[i], args[0], nil
}
log.Fatal("GetCommandAndFirstWord error: this should not happen!")
return "ERROR", "ERROR"
return "ERROR", "ERROR", errors.New("this should not happen - contact developer ;)")
}
// DistParams is used to supply params to EnrichedRecord.DistanceTo()
type DistParams struct {
ExitCode float64
MachineID float64
SessionID float64
Login float64
Shell float64
Pwd float64
RealPwd float64
Git float64
Time float64
}
// DistanceTo another record
func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 {
var dist float64
dist = 0
// lev distance or something? TODO later
// CmdLine
// exit code
if r.ExitCode != r2.ExitCode {
if r.ExitCode == 0 || r2.ExitCode == 0 {
// one success + one error -> 1
dist += 1 * p.ExitCode
} else {
// two different errors
dist += 0.5 * p.ExitCode
}
}
// machine/device
if r.MachineID != r2.MachineID {
dist += 1 * p.MachineID
}
// Uname
// session
if r.SessionID != r2.SessionID {
dist += 1 * p.SessionID
}
// Pid - add because of nested shells?
// SessionPid
// user
if r.Login != r2.Login {
dist += 1 * p.Login
}
// Home
// shell
if r.Shell != r2.Shell {
dist += 1 * p.Shell
}
// ShellEnv
// pwd
if r.Pwd != r2.Pwd {
// TODO: compare using hierarchy
// TODO: make more important
dist += 1 * p.Pwd
}
if r.RealPwd != r2.RealPwd {
// TODO: -||-
dist += 1 * p.RealPwd
}
// PwdAfter
// RealPwdAfter
// git
if r.GitDir != r2.GitDir {
dist += 1 * p.Git
}
if r.GitRealDir != r2.GitRealDir {
dist += 1 * p.Git
}
if r.GitOriginRemote != r2.GitOriginRemote {
dist += 1 * p.Git
}
// time
// this can actually get negative for differences of less than one second which is fine
// distance grows by 1 with every order
distTime := math.Log10(math.Abs(r.RealtimeBefore-r2.RealtimeBefore)) * p.Time
if math.IsNaN(distTime) == false && math.IsInf(distTime, 0) == false {
dist += distTime
}
// RealtimeBeforeLocal
// RealtimeAfter
// RealtimeAfterLocal
// TimezoneBefore
// TimezoneAfter
// RealtimeDuration
// RealtimeSinceSessionStart - TODO: add later
// RealtimeSinceBoot - TODO: add later
// device extras
// Host
// Hosttype
// Ostype
// Machtype
// OsReleaseID
// OsReleaseVersionID
// OsReleaseIDLike
// OsReleaseName
// OsReleasePrettyName
// session extras
// Term
// Shlvl
// static info
// Lang
// LcAll
// meta
// ReshUUID
// ReshVersion
// ReshRevision
// added by sanitizatizer
// Sanitized
// CmdLength
return dist
}
// Config struct

@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
from graphviz import Digraph
from datetime import datetime
PLOT_WIDTH = 10 # inches
PLOT_HEIGHT = 7 # inches
@ -274,7 +275,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True):
def plot_strategies_matches(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Matches at distance")
plt.title("Matches at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel('%' + " of matches")
plt.xlabel("Distance")
legend = []
@ -349,7 +350,7 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]):
def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Average characters recalled at distance")
plt.title("Average characters recalled at distance <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel("Average characters recalled")
plt.xlabel("Distance")
x_values = range(1, plot_size+1)
@ -420,7 +421,7 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Average characters recalled at distance (including prefix matches)")
plt.title("Average characters recalled at distance (including prefix matches) <{}>".format(datetime.now().strftime('%H:%M:%S')))
plt.ylabel("Average characters recalled (including prefix matches)")
plt.xlabel("Distance")
x_values = range(1, plot_size+1)
@ -493,17 +494,17 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
plt.show()
plot_cmdLineFrq_rank()
plot_cmdFrq_rank()
# plot_cmdLineFrq_rank()
# plot_cmdFrq_rank()
plot_cmdLineVocabularySize_cmdLinesEntered()
plot_cmdVocabularySize_cmdLinesEntered()
# plot_cmdLineVocabularySize_cmdLinesEntered()
# plot_cmdVocabularySize_cmdLinesEntered()
plot_strategies_matches(20)
plot_strategies_charsRecalled(20)
plot_strategies_charsRecalled_prefix(20)
graph_cmdSequences(node_count=33, edge_minValue=0.048)
# graph_cmdSequences(node_count=33, edge_minValue=0.048)
# graph_cmdSequences(node_count=28, edge_minValue=0.06)

@ -8,6 +8,7 @@ import (
"fmt"
"io/ioutil"
"log"
"math/rand"
"os"
"os/exec"
"os/user"
@ -48,6 +49,9 @@ func main() {
"Input data root, enables batch mode, looks for files matching --input option")
slow := flag.Bool("slow", false,
"Enables stuff that takes a long time (e.g. markov chain strategies).")
skipFailedCmds := flag.Bool("skip-failed-cmds", false,
"Skips records with non-zero exit status.")
debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.")
flag.Parse()
@ -77,7 +81,8 @@ func main() {
}
}
evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode}
evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates,
BatchMode: batchMode, skipFailedCmds: *skipFailedCmds, debugRecords: *debugRecords}
if batchMode {
err := evaluator.initBatchMode(*input, *inputDataRoot)
if err != nil {
@ -95,29 +100,39 @@ func main() {
// dummy := strategyDummy{}
// strategies = append(strategies, &dummy)
recent := strategyRecent{}
strategies = append(strategies, &strategyRecent{})
frequent := strategyFrequent{}
frequent.init()
directory := strategyDirectorySensitive{}
directory.init()
strategies = append(strategies, &frequent)
random := strategyRandom{candidatesSize: maxCandidates}
random.init()
strategies = append(strategies, &random)
markovCmd := strategyMarkovChainCmd{order: 1}
markovCmd.init()
directory := strategyDirectorySensitive{}
directory.init()
strategies = append(strategies, &directory)
if *slow {
distanceStaticBest := strategyRecordDistance{
distParams: common.DistParams{SessionID: 1, Pwd: 10, RealPwd: 10, Time: 1},
label: "10*pwd,10*realpwd,1*session,time",
}
strategies = append(strategies, &distanceStaticBest)
markovCmd2 := strategyMarkovChainCmd{order: 2}
markovCmd2.init()
markovCmd := strategyMarkovChainCmd{order: 1}
markovCmd.init()
markov := strategyMarkovChain{order: 1}
markov.init()
markovCmd2 := strategyMarkovChainCmd{order: 2}
markovCmd2.init()
markov2 := strategyMarkovChain{order: 2}
markov2.init()
markov := strategyMarkovChain{order: 1}
markov.init()
strategies = append(strategies, &recent, &frequent, &directory, &random)
markov2 := strategyMarkovChain{order: 2}
markov2.init()
if *slow {
strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov)
}
@ -175,6 +190,8 @@ type evaluator struct {
sanitizedInput bool
BatchMode bool
maxCandidates int
skipFailedCmds bool
debugRecords float64
UsersRecords []userRecords
Strategies []strategyJSON
}
@ -235,6 +252,10 @@ func (e *evaluator) processRecords() {
}
log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
}
e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id
if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true
}
}
sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
@ -253,8 +274,37 @@ func (e *evaluator) evaluate(strategy strategy) error {
for i := range e.UsersRecords {
for j := range e.UsersRecords[i].Devices {
bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records))
var prevRecord common.EnrichedRecord
for _, record := range e.UsersRecords[i].Devices[j].Records {
if e.skipFailedCmds && record.ExitCode != 0 {
continue
}
candidates := strategy.GetCandidates()
if record.DebugThisRecord {
log.Println()
log.Println("===================================================")
log.Println("STRATEGY:", title, "-", description)
log.Println("===================================================")
log.Println("Previous record:")
if prevRecord.RealtimeBefore == 0 {
log.Println("== NIL")
} else {
rec, _ := prevRecord.ToString()
log.Println(rec)
}
log.Println("---------------------------------------------------")
log.Println("Recommendations for:")
rec, _ := record.ToString()
log.Println(rec)
log.Println("---------------------------------------------------")
for i, candidate := range candidates {
if i > 10 {
break
}
log.Println(string(candidate))
}
log.Println("===================================================")
}
matchFound := false
longestPrefixMatchLength := 0
@ -289,6 +339,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
return err
}
bar.Add(1)
prevRecord = record
}
strategy.ResetHistory()
fmt.Println()

@ -0,0 +1,68 @@
package main
import (
"sort"
"strconv"
"github.com/curusarn/resh/common"
)
type strategyRecordDistance struct {
history []common.EnrichedRecord
distParams common.DistParams
maxDepth int
label string
}
type strDistEntry struct {
cmdLine string
distance float64
}
func (s *strategyRecordDistance) init() {
s.history = nil
}
func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) {
return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands"
}
func (s *strategyRecordDistance) GetCandidates() []string {
if len(s.history) == 0 {
return nil
}
var prevRecord common.EnrichedRecord
prevRecord = s.history[0]
prevRecord.SetCmdLine("")
prevRecord.SetBeforeToAfter()
var mapItems []strDistEntry
for i, record := range s.history {
if s.maxDepth != 0 && i > s.maxDepth {
break
}
distance := record.DistanceTo(prevRecord, s.distParams)
mapItems = append(mapItems, strDistEntry{record.CmdLine, distance})
}
sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })
var hist []string
histSet := map[string]bool{}
for _, item := range mapItems {
if histSet[item.cmdLine] {
continue
}
histSet[item.cmdLine] = true
hist = append(hist, item.cmdLine)
}
return hist
}
func (s *strategyRecordDistance) AddHistoryRecord(record *common.EnrichedRecord) error {
// append record to front
s.history = append([]common.EnrichedRecord{*record}, s.history...)
return nil
}
func (s *strategyRecordDistance) ResetHistory() error {
s.init()
return nil
}
Loading…
Cancel
Save