diff --git a/common/resh-common.go b/common/resh-common.go index 7f1d4a6..260c4be 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -1,7 +1,10 @@ package common import ( + "encoding/json" + "errors" "log" + "math" "strconv" "strings" @@ -86,10 +89,12 @@ type EnrichedRecord struct { Record // enriching fields - added "later" - Command string `json:"command"` - FirstWord string `json:"firstWord"` - Invalid bool `json:"invalid"` - SeqSessionID uint64 `json:"seqSessionId"` + Command string `json:"command"` + FirstWord string `json:"firstWord"` + Invalid bool `json:"invalid"` + SeqSessionID uint64 `json:"seqSessionId"` + DebugThisRecord bool `json:"debugThisRecord"` + Errors []string `json:"errors"` // SeqSessionID uint64 `json:"seqSessionId,omitempty"` } @@ -112,14 +117,33 @@ func ConvertRecord(r *FallbackRecord) Record { } } +// ToString - returns record the json +func (r EnrichedRecord) ToString() (string, error) { + jsonRec, err := json.Marshal(r) + if err != nil { + return "marshalling error", err + } + return string(jsonRec), nil +} + // Enrich - adds additional fields to the record func (r Record) Enrich() EnrichedRecord { record := EnrichedRecord{Record: r} // Get command/first word from commandline - record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine) - err := r.Validate() + var err error + record.Command, record.FirstWord, err = GetCommandAndFirstWord(r.CmdLine) if err != nil { - log.Println("Invalid command:", r.CmdLine) + record.Errors = append(record.Errors, "GetCommandAndFirstWord error:"+err.Error()) + rec, _ := record.ToString() + log.Println("Invalid command:", rec) + record.Invalid = true + return record + } + err = r.Validate() + if err != nil { + record.Errors = append(record.Errors, "Validate error:"+err.Error()) + rec, _ := record.ToString() + log.Println("Invalid command:", rec) record.Invalid = true } return record @@ -128,18 +152,85 @@ func (r Record) Enrich() EnrichedRecord { // Validate - returns error if the record is invalid func (r *Record) Validate() error { + if r.RealtimeBefore == 0 || r.RealtimeAfter == 0 { + return errors.New("There is no Time") + } + if r.RealPwd == "" || r.RealPwdAfter == "" { + return errors.New("There is no Real Pwd") + } + if r.Pwd == "" || r.PwdAfter == "" { + return errors.New("There is no Pwd") + } + + // TimezoneBefore + // TimezoneAfter + + // RealtimeDuration + // RealtimeSinceSessionStart - TODO: add later + // RealtimeSinceBoot - TODO: add later + + // device extras + // Host + // Hosttype + // Ostype + // Machtype + // OsReleaseID + // OsReleaseVersionID + // OsReleaseIDLike + // OsReleaseName + // OsReleasePrettyName + + // session extras + // Term + // Shlvl + + // static info + // Lang + // LcAll + + // meta + // ReshUUID + // ReshVersion + // ReshRevision + + // added by sanitizatizer + // Sanitized + // CmdLength return nil } +// SetCmdLine sets cmdLine and related members +func (r *EnrichedRecord) SetCmdLine(cmdLine string) { + r.CmdLine = cmdLine + r.CmdLength = len(cmdLine) + r.ExitCode = 0 + var err error + r.Command, r.FirstWord, err = GetCommandAndFirstWord(cmdLine) + if err != nil { + r.Errors = append(r.Errors, "GetCommandAndFirstWord error:"+err.Error()) + // log.Println("Invalid command:", r.CmdLine) + r.Invalid = true + } +} + +// SetBeforeToAfter - set "before" members to "after" members +func (r *EnrichedRecord) SetBeforeToAfter() { + r.Pwd = r.PwdAfter + r.RealPwd = r.RealPwdAfter + // r.TimezoneBefore = r.TimezoneAfter + // r.RealtimeBefore = r.RealtimeAfter + // r.RealtimeBeforeLocal = r.RealtimeAfterLocal +} + // GetCommandAndFirstWord func -func GetCommandAndFirstWord(cmdLine string) (string, string) { +func GetCommandAndFirstWord(cmdLine string) (string, string, error) { args, err := shellwords.Parse(cmdLine) if err != nil { log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") - return "", "" + return "", "", err } if len(args) == 0 { - return "", "" + return "", "", nil } i := 0 for true { @@ -149,10 +240,140 @@ func GetCommandAndFirstWord(cmdLine string) (string, string) { i++ continue } - return args[i], args[0] + return args[i], args[0], nil } log.Fatal("GetCommandAndFirstWord error: this should not happen!") - return "ERROR", "ERROR" + return "ERROR", "ERROR", errors.New("this should not happen - contact developer ;)") +} + +// DistParams is used to supply params to EnrichedRecord.DistanceTo() +type DistParams struct { + ExitCode float64 + MachineID float64 + SessionID float64 + Login float64 + Shell float64 + Pwd float64 + RealPwd float64 + Git float64 + Time float64 +} + +// DistanceTo another record +func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 { + var dist float64 + dist = 0 + + // lev distance or something? TODO later + // CmdLine + + // exit code + if r.ExitCode != r2.ExitCode { + if r.ExitCode == 0 || r2.ExitCode == 0 { + // one success + one error -> 1 + dist += 1 * p.ExitCode + } else { + // two different errors + dist += 0.5 * p.ExitCode + } + } + + // machine/device + if r.MachineID != r2.MachineID { + dist += 1 * p.MachineID + } + // Uname + + // session + if r.SessionID != r2.SessionID { + dist += 1 * p.SessionID + } + // Pid - add because of nested shells? + // SessionPid + + // user + if r.Login != r2.Login { + dist += 1 * p.Login + } + // Home + + // shell + if r.Shell != r2.Shell { + dist += 1 * p.Shell + } + // ShellEnv + + // pwd + if r.Pwd != r2.Pwd { + // TODO: compare using hierarchy + // TODO: make more important + dist += 1 * p.Pwd + } + if r.RealPwd != r2.RealPwd { + // TODO: -||- + dist += 1 * p.RealPwd + } + // PwdAfter + // RealPwdAfter + + // git + if r.GitDir != r2.GitDir { + dist += 1 * p.Git + } + if r.GitRealDir != r2.GitRealDir { + dist += 1 * p.Git + } + if r.GitOriginRemote != r2.GitOriginRemote { + dist += 1 * p.Git + } + + // time + // this can actually get negative for differences of less than one second which is fine + // distance grows by 1 with every order + distTime := math.Log10(math.Abs(r.RealtimeBefore-r2.RealtimeBefore)) * p.Time + if math.IsNaN(distTime) == false && math.IsInf(distTime, 0) == false { + dist += distTime + } + // RealtimeBeforeLocal + // RealtimeAfter + // RealtimeAfterLocal + + // TimezoneBefore + // TimezoneAfter + + // RealtimeDuration + // RealtimeSinceSessionStart - TODO: add later + // RealtimeSinceBoot - TODO: add later + + // device extras + // Host + // Hosttype + // Ostype + // Machtype + // OsReleaseID + // OsReleaseVersionID + // OsReleaseIDLike + // OsReleaseName + // OsReleasePrettyName + + // session extras + // Term + // Shlvl + + // static info + // Lang + // LcAll + + // meta + // ReshUUID + // ReshVersion + // ReshRevision + + // added by sanitizatizer + // Sanitized + // CmdLength + + return dist } // Config struct diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index bb4eb3e..7568800 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -9,6 +9,7 @@ import matplotlib.pyplot as plt import matplotlib.path as mpath import numpy as np from graphviz import Digraph +from datetime import datetime PLOT_WIDTH = 10 # inches PLOT_HEIGHT = 7 # inches @@ -274,7 +275,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True): def plot_strategies_matches(plot_size=50, selected_strategies=[]): plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) - plt.title("Matches at distance") + plt.title("Matches at distance <{}>".format(datetime.now().strftime('%H:%M:%S'))) plt.ylabel('%' + " of matches") plt.xlabel("Distance") legend = [] @@ -349,7 +350,7 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]): def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) - plt.title("Average characters recalled at distance") + plt.title("Average characters recalled at distance <{}>".format(datetime.now().strftime('%H:%M:%S'))) plt.ylabel("Average characters recalled") plt.xlabel("Distance") x_values = range(1, plot_size+1) @@ -420,7 +421,7 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]): plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) - plt.title("Average characters recalled at distance (including prefix matches)") + plt.title("Average characters recalled at distance (including prefix matches) <{}>".format(datetime.now().strftime('%H:%M:%S'))) plt.ylabel("Average characters recalled (including prefix matches)") plt.xlabel("Distance") x_values = range(1, plot_size+1) @@ -493,17 +494,17 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]): plt.show() -plot_cmdLineFrq_rank() -plot_cmdFrq_rank() +# plot_cmdLineFrq_rank() +# plot_cmdFrq_rank() -plot_cmdLineVocabularySize_cmdLinesEntered() -plot_cmdVocabularySize_cmdLinesEntered() +# plot_cmdLineVocabularySize_cmdLinesEntered() +# plot_cmdVocabularySize_cmdLinesEntered() plot_strategies_matches(20) plot_strategies_charsRecalled(20) plot_strategies_charsRecalled_prefix(20) -graph_cmdSequences(node_count=33, edge_minValue=0.048) +# graph_cmdSequences(node_count=33, edge_minValue=0.048) # graph_cmdSequences(node_count=28, edge_minValue=0.06) diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 39ddbb6..3fdd45a 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -8,6 +8,7 @@ import ( "fmt" "io/ioutil" "log" + "math/rand" "os" "os/exec" "os/user" @@ -48,6 +49,9 @@ func main() { "Input data root, enables batch mode, looks for files matching --input option") slow := flag.Bool("slow", false, "Enables stuff that takes a long time (e.g. markov chain strategies).") + skipFailedCmds := flag.Bool("skip-failed-cmds", false, + "Skips records with non-zero exit status.") + debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.") flag.Parse() @@ -77,7 +81,8 @@ func main() { } } - evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode} + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, + BatchMode: batchMode, skipFailedCmds: *skipFailedCmds, debugRecords: *debugRecords} if batchMode { err := evaluator.initBatchMode(*input, *inputDataRoot) if err != nil { @@ -95,29 +100,39 @@ func main() { // dummy := strategyDummy{} // strategies = append(strategies, &dummy) - recent := strategyRecent{} + strategies = append(strategies, &strategyRecent{}) + frequent := strategyFrequent{} frequent.init() - directory := strategyDirectorySensitive{} - directory.init() + strategies = append(strategies, &frequent) + random := strategyRandom{candidatesSize: maxCandidates} random.init() + strategies = append(strategies, &random) - markovCmd := strategyMarkovChainCmd{order: 1} - markovCmd.init() + directory := strategyDirectorySensitive{} + directory.init() + strategies = append(strategies, &directory) + + if *slow { + distanceStaticBest := strategyRecordDistance{ + distParams: common.DistParams{SessionID: 1, Pwd: 10, RealPwd: 10, Time: 1}, + label: "10*pwd,10*realpwd,1*session,time", + } + strategies = append(strategies, &distanceStaticBest) - markovCmd2 := strategyMarkovChainCmd{order: 2} - markovCmd2.init() + markovCmd := strategyMarkovChainCmd{order: 1} + markovCmd.init() - markov := strategyMarkovChain{order: 1} - markov.init() + markovCmd2 := strategyMarkovChainCmd{order: 2} + markovCmd2.init() - markov2 := strategyMarkovChain{order: 2} - markov2.init() + markov := strategyMarkovChain{order: 1} + markov.init() - strategies = append(strategies, &recent, &frequent, &directory, &random) + markov2 := strategyMarkovChain{order: 2} + markov2.init() - if *slow { strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov) } @@ -175,6 +190,8 @@ type evaluator struct { sanitizedInput bool BatchMode bool maxCandidates int + skipFailedCmds bool + debugRecords float64 UsersRecords []userRecords Strategies []strategyJSON } @@ -235,6 +252,10 @@ func (e *evaluator) processRecords() { } log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") } + e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id + if e.debugRecords > 0 && rand.Float64() < e.debugRecords { + e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true + } } sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { @@ -253,8 +274,37 @@ func (e *evaluator) evaluate(strategy strategy) error { for i := range e.UsersRecords { for j := range e.UsersRecords[i].Devices { bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records)) + var prevRecord common.EnrichedRecord for _, record := range e.UsersRecords[i].Devices[j].Records { + if e.skipFailedCmds && record.ExitCode != 0 { + continue + } candidates := strategy.GetCandidates() + if record.DebugThisRecord { + log.Println() + log.Println("===================================================") + log.Println("STRATEGY:", title, "-", description) + log.Println("===================================================") + log.Println("Previous record:") + if prevRecord.RealtimeBefore == 0 { + log.Println("== NIL") + } else { + rec, _ := prevRecord.ToString() + log.Println(rec) + } + log.Println("---------------------------------------------------") + log.Println("Recommendations for:") + rec, _ := record.ToString() + log.Println(rec) + log.Println("---------------------------------------------------") + for i, candidate := range candidates { + if i > 10 { + break + } + log.Println(string(candidate)) + } + log.Println("===================================================") + } matchFound := false longestPrefixMatchLength := 0 @@ -289,6 +339,7 @@ func (e *evaluator) evaluate(strategy strategy) error { return err } bar.Add(1) + prevRecord = record } strategy.ResetHistory() fmt.Println() diff --git a/evaluate/strategy-record-distance.go b/evaluate/strategy-record-distance.go new file mode 100644 index 0000000..07593e0 --- /dev/null +++ b/evaluate/strategy-record-distance.go @@ -0,0 +1,68 @@ +package main + +import ( + "sort" + "strconv" + + "github.com/curusarn/resh/common" +) + +type strategyRecordDistance struct { + history []common.EnrichedRecord + distParams common.DistParams + maxDepth int + label string +} + +type strDistEntry struct { + cmdLine string + distance float64 +} + +func (s *strategyRecordDistance) init() { + s.history = nil +} + +func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) { + return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands" +} + +func (s *strategyRecordDistance) GetCandidates() []string { + if len(s.history) == 0 { + return nil + } + var prevRecord common.EnrichedRecord + prevRecord = s.history[0] + prevRecord.SetCmdLine("") + prevRecord.SetBeforeToAfter() + var mapItems []strDistEntry + for i, record := range s.history { + if s.maxDepth != 0 && i > s.maxDepth { + break + } + distance := record.DistanceTo(prevRecord, s.distParams) + mapItems = append(mapItems, strDistEntry{record.CmdLine, distance}) + } + sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance }) + var hist []string + histSet := map[string]bool{} + for _, item := range mapItems { + if histSet[item.cmdLine] { + continue + } + histSet[item.cmdLine] = true + hist = append(hist, item.cmdLine) + } + return hist +} + +func (s *strategyRecordDistance) AddHistoryRecord(record *common.EnrichedRecord) error { + // append record to front + s.history = append([]common.EnrichedRecord{*record}, s.history...) + return nil +} + +func (s *strategyRecordDistance) ResetHistory() error { + s.init() + return nil +}