diff --git a/cmd/collect/main.go b/cmd/collect/main.go index 41d635b..75cc61d 100644 --- a/cmd/collect/main.go +++ b/cmd/collect/main.go @@ -11,6 +11,7 @@ import ( "os" "github.com/BurntSushi/toml" + "github.com/curusarn/resh/pkg/cfg" "github.com/curusarn/resh/pkg/records" // "os/exec" @@ -34,7 +35,7 @@ func main() { machineIDPath := "/etc/machine-id" - var config records.Config + var config cfg.Config if _, err := toml.DecodeFile(configPath, &config); err != nil { log.Fatal("Error reading config:", err) } diff --git a/cmd/daemon/main.go b/cmd/daemon/main.go index e2f7997..9411350 100644 --- a/cmd/daemon/main.go +++ b/cmd/daemon/main.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/BurntSushi/toml" + "github.com/curusarn/resh/pkg/cfg" "github.com/curusarn/resh/pkg/records" ) @@ -43,7 +44,7 @@ func main() { log.SetOutput(f) log.SetPrefix(strconv.Itoa(os.Getpid()) + " | ") - var config records.Config + var config cfg.Config if _, err := toml.DecodeFile(configPath, &config); err != nil { log.Println("Error reading config", err) return diff --git a/cmd/evaluate/main.go b/cmd/evaluate/main.go index 0290f59..6e013cc 100644 --- a/cmd/evaluate/main.go +++ b/cmd/evaluate/main.go @@ -13,7 +13,6 @@ import ( "os/exec" "os/user" "path/filepath" - "sort" "github.com/curusarn/resh/pkg/records" "github.com/jpillora/longestcommon" @@ -49,7 +48,7 @@ func main() { inputDataRoot := flag.String("input-data-root", "", "Input data root, enables batch mode, looks for files matching --input option") slow := flag.Bool("slow", false, - "Enables stuff that takes a long time (e.g. markov chain strategies).") + "Enables strategies that takes a long time (e.g. markov chain strategies).") skipFailedCmds := flag.Bool("skip-failed-cmds", false, "Skips records with non-zero exit status.") debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.") @@ -96,32 +95,33 @@ func main() { } } - var strategies []strategy + var simpleStrategies []ISimpleStrategy + var strategies []IStrategy // dummy := strategyDummy{} - // strategies = append(strategies, &dummy) + // simpleStrategies = append(simpleStrategies, &dummy) - strategies = append(strategies, &strategyRecent{}) + simpleStrategies = append(simpleStrategies, &strategyRecent{}) - frequent := strategyFrequent{} - frequent.init() - strategies = append(strategies, &frequent) + // frequent := strategyFrequent{} + // frequent.init() + // simpleStrategies = append(simpleStrategies, &frequent) - random := strategyRandom{candidatesSize: maxCandidates} - random.init() - strategies = append(strategies, &random) + // random := strategyRandom{candidatesSize: maxCandidates} + // random.init() + // simpleStrategies = append(simpleStrategies, &random) directory := strategyDirectorySensitive{} directory.init() - strategies = append(strategies, &directory) + simpleStrategies = append(simpleStrategies, &directory) - dynamicDist := strategyDynamicRecordDistance{ + dynamicDistG := strategyDynamicRecordDistance{ maxDepth: 3000, - distParams: records.DistParams{Pwd: 10, RealPwd: 10, SessionID: 1, Time: 1}, - label: "10*pwd,10*realpwd,session,time", + distParams: records.DistParams{Pwd: 10, RealPwd: 10, SessionID: 1, Time: 1, Git: 10}, + label: "10*pwd,10*realpwd,session,time,10*git", } - dynamicDist.init() - strategies = append(strategies, &dynamicDist) + dynamicDistG.init() + strategies = append(strategies, &dynamicDistG) distanceStaticBest := strategyRecordDistance{ maxDepth: 3000, @@ -130,6 +130,10 @@ func main() { } strategies = append(strategies, &distanceStaticBest) + recentBash := strategyRecentBash{} + recentBash.init() + strategies = append(strategies, &recentBash) + if *slow { markovCmd := strategyMarkovChainCmd{order: 1} @@ -144,7 +148,11 @@ func main() { markov2 := strategyMarkovChain{order: 2} markov2.init() - strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov) + simpleStrategies = append(simpleStrategies, &markovCmd2, &markovCmd, &markov2, &markov) + } + + for _, strat := range simpleStrategies { + strategies = append(strategies, NewSimpleStrategyWrapper(strat)) } for _, strat := range strategies { @@ -157,13 +165,45 @@ func main() { evaluator.calculateStatsAndPlot(*plottingScript) } -type strategy interface { +type ISimpleStrategy interface { GetTitleAndDescription() (string, string) GetCandidates() []string AddHistoryRecord(record *records.EnrichedRecord) error ResetHistory() error } +type IStrategy interface { + GetTitleAndDescription() (string, string) + GetCandidates(r records.EnrichedRecord) []string + AddHistoryRecord(record *records.EnrichedRecord) error + ResetHistory() error +} + +type simpleStrategyWrapper struct { + strategy ISimpleStrategy +} + +// NewSimpleStrategyWrapper returns IStrategy created by wrapping given ISimpleStrategy +func NewSimpleStrategyWrapper(strategy ISimpleStrategy) *simpleStrategyWrapper { + return &simpleStrategyWrapper{strategy: strategy} +} + +func (s *simpleStrategyWrapper) GetTitleAndDescription() (string, string) { + return s.strategy.GetTitleAndDescription() +} + +func (s *simpleStrategyWrapper) GetCandidates(r records.EnrichedRecord) []string { + return s.strategy.GetCandidates() +} + +func (s *simpleStrategyWrapper) AddHistoryRecord(r *records.EnrichedRecord) error { + return s.strategy.AddHistoryRecord(r) +} + +func (s *simpleStrategyWrapper) ResetHistory() error { + return s.strategy.ResetHistory() +} + type matchJSON struct { Match bool Distance int @@ -209,7 +249,7 @@ type evaluator struct { func (e *evaluator) initBatchMode(input string, inputDataRoot string) error { e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot) - e.processRecords() + e.preprocessRecords() return nil } @@ -219,7 +259,7 @@ func (e *evaluator) init(inputPath string) error { user := userRecords{} user.Devices = append(user.Devices, device) e.UsersRecords = append(e.UsersRecords, user) - e.processRecords() + e.preprocessRecords() return nil } @@ -241,44 +281,61 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) { } } -// enrich records and add them to serializable structure -func (e *evaluator) processRecords() { - for i := range e.UsersRecords { - for j, device := range e.UsersRecords[i].Devices { - sessionIDs := map[string]uint64{} - var nextID uint64 - nextID = 1 // start with 1 because 0 won't get saved to json - for k, record := range e.UsersRecords[i].Devices[j].Records { - id, found := sessionIDs[record.SessionID] - if found == false { - id = nextID - sessionIDs[record.SessionID] = id - nextID++ - } - e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id - // assert - if record.Sanitized != e.sanitizedInput { - if e.sanitizedInput { - log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") - } - log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") - } - e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id - if e.debugRecords > 0 && rand.Float64() < e.debugRecords { - e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true - } +func (e *evaluator) preprocessDeviceRecords(device deviceRecords) deviceRecords { + sessionIDs := map[string]uint64{} + var nextID uint64 + nextID = 1 // start with 1 because 0 won't get saved to json + for k, record := range device.Records { + id, found := sessionIDs[record.SessionID] + if found == false { + id = nextID + sessionIDs[record.SessionID] = id + nextID++ + } + device.Records[k].SeqSessionID = id + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") } - sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { - if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { - return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal - } - return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID - }) + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } + device.Records[k].SeqSessionID = id + if e.debugRecords > 0 && rand.Float64() < e.debugRecords { + device.Records[k].DebugThisRecord = true + } + } + // sort.SliceStable(device.Records, func(x, y int) bool { + // if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { + // return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal + // } + // return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID + // }) + + // iterate from back and mark last record of each session + sessionIDSet := map[string]bool{} + for i := len(device.Records) - 1; i >= 0; i-- { + var record *records.EnrichedRecord + record = &device.Records[i] + if sessionIDSet[record.SessionID] { + continue + } + sessionIDSet[record.SessionID] = true + record.LastRecordOfSession = true + } + return device +} + +// enrich records and add sequential session ID +func (e *evaluator) preprocessRecords() { + for i := range e.UsersRecords { + for j := range e.UsersRecords[i].Devices { + e.UsersRecords[i].Devices[j] = e.preprocessDeviceRecords(e.UsersRecords[i].Devices[j]) } } } -func (e *evaluator) evaluate(strategy strategy) error { +func (e *evaluator) evaluate(strategy IStrategy) error { title, description := strategy.GetTitleAndDescription() log.Println("Evaluating strategy:", title, "-", description) strategyData := strategyJSON{Title: title, Description: description} @@ -290,7 +347,7 @@ func (e *evaluator) evaluate(strategy strategy) error { if e.skipFailedCmds && record.ExitCode != 0 { continue } - candidates := strategy.GetCandidates() + candidates := strategy.GetCandidates(records.Stripped(record)) if record.DebugThisRecord { log.Println() log.Println("===================================================") diff --git a/cmd/evaluate/strategy-dynamic-record-distance.go b/cmd/evaluate/strategy-dynamic-record-distance.go index fdaa820..0a107e9 100644 --- a/cmd/evaluate/strategy-dynamic-record-distance.go +++ b/cmd/evaluate/strategy-dynamic-record-distance.go @@ -9,12 +9,13 @@ import ( ) type strategyDynamicRecordDistance struct { - history []records.EnrichedRecord - distParams records.DistParams - pwdHistogram map[string]int - realPwdHistogram map[string]int - maxDepth int - label string + history []records.EnrichedRecord + distParams records.DistParams + pwdHistogram map[string]int + realPwdHistogram map[string]int + gitOriginHistogram map[string]int + maxDepth int + label string } type strDynDistEntry struct { @@ -26,6 +27,7 @@ func (s *strategyDynamicRecordDistance) init() { s.history = nil s.pwdHistogram = map[string]int{} s.realPwdHistogram = map[string]int{} + s.gitOriginHistogram = map[string]int{} } func (s *strategyDynamicRecordDistance) GetTitleAndDescription() (string, string) { @@ -36,26 +38,23 @@ func (s *strategyDynamicRecordDistance) idf(count int) float64 { return math.Log(float64(len(s.history)) / float64(count)) } -func (s *strategyDynamicRecordDistance) GetCandidates() []string { +func (s *strategyDynamicRecordDistance) GetCandidates(strippedRecord records.EnrichedRecord) []string { if len(s.history) == 0 { return nil } - var prevRecord records.EnrichedRecord - prevRecord = s.history[0] - prevRecord.SetCmdLine("") - prevRecord.SetBeforeToAfter() var mapItems []strDynDistEntry for i, record := range s.history { if s.maxDepth != 0 && i > s.maxDepth { break } distParams := records.DistParams{ - Pwd: s.distParams.Pwd * s.idf(s.pwdHistogram[prevRecord.PwdAfter]), - RealPwd: s.distParams.RealPwd * s.idf(s.realPwdHistogram[prevRecord.RealPwdAfter]), + Pwd: s.distParams.Pwd * s.idf(s.pwdHistogram[strippedRecord.PwdAfter]), + RealPwd: s.distParams.RealPwd * s.idf(s.realPwdHistogram[strippedRecord.RealPwdAfter]), + Git: s.distParams.Git * s.idf(s.gitOriginHistogram[strippedRecord.GitOriginRemote]), Time: s.distParams.Time, SessionID: s.distParams.SessionID, } - distance := record.DistanceTo(prevRecord, distParams) + distance := record.DistanceTo(strippedRecord, distParams) mapItems = append(mapItems, strDynDistEntry{record.CmdLine, distance}) } sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance }) @@ -76,6 +75,7 @@ func (s *strategyDynamicRecordDistance) AddHistoryRecord(record *records.Enriche s.history = append([]records.EnrichedRecord{*record}, s.history...) s.pwdHistogram[record.Pwd]++ s.realPwdHistogram[record.RealPwd]++ + s.gitOriginHistogram[record.GitOriginRemote]++ return nil } diff --git a/cmd/evaluate/strategy-recent-bash.go b/cmd/evaluate/strategy-recent-bash.go new file mode 100644 index 0000000..7a83632 --- /dev/null +++ b/cmd/evaluate/strategy-recent-bash.go @@ -0,0 +1,50 @@ +package main + +import "github.com/curusarn/resh/pkg/records" + +type strategyRecentBash struct { + histfile []string + histfileSnapshot map[string][]string + history map[string][]string +} + +func (s *strategyRecentBash) init() { + s.histfileSnapshot = map[string][]string{} + s.history = map[string][]string{} +} + +func (s *strategyRecentBash) GetTitleAndDescription() (string, string) { + return "recent (bash-like)", "Behave like bash" +} + +func (s *strategyRecentBash) GetCandidates(strippedRecord records.EnrichedRecord) []string { + // populate the local history from histfile + if s.histfileSnapshot[strippedRecord.SessionID] == nil { + s.histfileSnapshot[strippedRecord.SessionID] = s.histfile + } + return append(s.history[strippedRecord.SessionID], s.histfileSnapshot[strippedRecord.SessionID]...) +} + +func (s *strategyRecentBash) AddHistoryRecord(record *records.EnrichedRecord) error { + // remove previous occurance of record + for i, cmd := range s.history[record.SessionID] { + if cmd == record.CmdLine { + s.history[record.SessionID] = append(s.history[record.SessionID][:i], s.history[record.SessionID][i+1:]...) + } + } + // append new record + s.history[record.SessionID] = append([]string{record.CmdLine}, s.history[record.SessionID]...) + + if record.LastRecordOfSession { + // append history of the session to histfile and clear session history + s.histfile = append(s.history[record.SessionID], s.histfile...) + s.histfileSnapshot[record.SessionID] = nil + s.history[record.SessionID] = nil + } + return nil +} + +func (s *strategyRecentBash) ResetHistory() error { + s.init() + return nil +} diff --git a/cmd/evaluate/strategy-record-distance.go b/cmd/evaluate/strategy-record-distance.go index 31ed8e0..d2b8696 100644 --- a/cmd/evaluate/strategy-record-distance.go +++ b/cmd/evaluate/strategy-record-distance.go @@ -27,20 +27,16 @@ func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) { return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands" } -func (s *strategyRecordDistance) GetCandidates() []string { +func (s *strategyRecordDistance) GetCandidates(strippedRecord records.EnrichedRecord) []string { if len(s.history) == 0 { return nil } - var prevRecord records.EnrichedRecord - prevRecord = s.history[0] - prevRecord.SetCmdLine("") - prevRecord.SetBeforeToAfter() var mapItems []strDistEntry for i, record := range s.history { if s.maxDepth != 0 && i > s.maxDepth { break } - distance := record.DistanceTo(prevRecord, s.distParams) + distance := record.DistanceTo(strippedRecord, s.distParams) mapItems = append(mapItems, strDistEntry{record.CmdLine, distance}) } sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance }) diff --git a/pkg/cfg/cfg.go b/pkg/cfg/cfg.go new file mode 100644 index 0000000..8373306 --- /dev/null +++ b/pkg/cfg/cfg.go @@ -0,0 +1,6 @@ +package cfg + +// Config struct +type Config struct { + Port int +} diff --git a/pkg/records/records.go b/pkg/records/records.go index c63950d..32ea789 100644 --- a/pkg/records/records.go +++ b/pkg/records/records.go @@ -89,12 +89,13 @@ type EnrichedRecord struct { Record // enriching fields - added "later" - Command string `json:"command"` - FirstWord string `json:"firstWord"` - Invalid bool `json:"invalid"` - SeqSessionID uint64 `json:"seqSessionId"` - DebugThisRecord bool `json:"debugThisRecord"` - Errors []string `json:"errors"` + Command string `json:"command"` + FirstWord string `json:"firstWord"` + Invalid bool `json:"invalid"` + SeqSessionID uint64 `json:"seqSessionId"` + LastRecordOfSession bool `json:"lastRecordOfSession"` + DebugThisRecord bool `json:"debugThisRecord"` + Errors []string `json:"errors"` // SeqSessionID uint64 `json:"seqSessionId,omitempty"` } @@ -213,6 +214,22 @@ func (r *EnrichedRecord) SetCmdLine(cmdLine string) { } } +// Stripped returns record stripped of all info that is not available during prediction +func Stripped(r EnrichedRecord) EnrichedRecord { + // clear the cmd itself + r.SetCmdLine("") + // replace after info with before info + r.PwdAfter = r.Pwd + r.RealPwdAfter = r.RealPwd + r.TimezoneAfter = r.TimezoneBefore + r.RealtimeAfter = r.RealtimeBefore + r.RealtimeAfterLocal = r.RealtimeBeforeLocal + // clear some more stuff + r.RealtimeDuration = 0 + r.LastRecordOfSession = false + return r +} + // SetBeforeToAfter - set "before" members to "after" members func (r *EnrichedRecord) SetBeforeToAfter() { r.Pwd = r.PwdAfter @@ -375,8 +392,3 @@ func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 { return dist } - -// Config struct -type Config struct { - Port int -}