split cfg from records, add bash-like strategy, improvements

pull/15/head
Simon Let 6 years ago
parent c229caced9
commit 96cf2ae032
  1. 3
      cmd/collect/main.go
  2. 3
      cmd/daemon/main.go
  3. 167
      cmd/evaluate/main.go
  4. 28
      cmd/evaluate/strategy-dynamic-record-distance.go
  5. 50
      cmd/evaluate/strategy-recent-bash.go
  6. 8
      cmd/evaluate/strategy-record-distance.go
  7. 6
      pkg/cfg/cfg.go
  8. 34
      pkg/records/records.go

@ -11,6 +11,7 @@ import (
"os" "os"
"github.com/BurntSushi/toml" "github.com/BurntSushi/toml"
"github.com/curusarn/resh/pkg/cfg"
"github.com/curusarn/resh/pkg/records" "github.com/curusarn/resh/pkg/records"
// "os/exec" // "os/exec"
@ -34,7 +35,7 @@ func main() {
machineIDPath := "/etc/machine-id" machineIDPath := "/etc/machine-id"
var config records.Config var config cfg.Config
if _, err := toml.DecodeFile(configPath, &config); err != nil { if _, err := toml.DecodeFile(configPath, &config); err != nil {
log.Fatal("Error reading config:", err) log.Fatal("Error reading config:", err)
} }

@ -14,6 +14,7 @@ import (
"strings" "strings"
"github.com/BurntSushi/toml" "github.com/BurntSushi/toml"
"github.com/curusarn/resh/pkg/cfg"
"github.com/curusarn/resh/pkg/records" "github.com/curusarn/resh/pkg/records"
) )
@ -43,7 +44,7 @@ func main() {
log.SetOutput(f) log.SetOutput(f)
log.SetPrefix(strconv.Itoa(os.Getpid()) + " | ") log.SetPrefix(strconv.Itoa(os.Getpid()) + " | ")
var config records.Config var config cfg.Config
if _, err := toml.DecodeFile(configPath, &config); err != nil { if _, err := toml.DecodeFile(configPath, &config); err != nil {
log.Println("Error reading config", err) log.Println("Error reading config", err)
return return

@ -13,7 +13,6 @@ import (
"os/exec" "os/exec"
"os/user" "os/user"
"path/filepath" "path/filepath"
"sort"
"github.com/curusarn/resh/pkg/records" "github.com/curusarn/resh/pkg/records"
"github.com/jpillora/longestcommon" "github.com/jpillora/longestcommon"
@ -49,7 +48,7 @@ func main() {
inputDataRoot := flag.String("input-data-root", "", inputDataRoot := flag.String("input-data-root", "",
"Input data root, enables batch mode, looks for files matching --input option") "Input data root, enables batch mode, looks for files matching --input option")
slow := flag.Bool("slow", false, slow := flag.Bool("slow", false,
"Enables stuff that takes a long time (e.g. markov chain strategies).") "Enables strategies that takes a long time (e.g. markov chain strategies).")
skipFailedCmds := flag.Bool("skip-failed-cmds", false, skipFailedCmds := flag.Bool("skip-failed-cmds", false,
"Skips records with non-zero exit status.") "Skips records with non-zero exit status.")
debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.") debugRecords := flag.Float64("debug", 0, "Debug records - percentage of records that should be debugged.")
@ -96,32 +95,33 @@ func main() {
} }
} }
var strategies []strategy var simpleStrategies []ISimpleStrategy
var strategies []IStrategy
// dummy := strategyDummy{} // dummy := strategyDummy{}
// strategies = append(strategies, &dummy) // simpleStrategies = append(simpleStrategies, &dummy)
strategies = append(strategies, &strategyRecent{}) simpleStrategies = append(simpleStrategies, &strategyRecent{})
frequent := strategyFrequent{} // frequent := strategyFrequent{}
frequent.init() // frequent.init()
strategies = append(strategies, &frequent) // simpleStrategies = append(simpleStrategies, &frequent)
random := strategyRandom{candidatesSize: maxCandidates} // random := strategyRandom{candidatesSize: maxCandidates}
random.init() // random.init()
strategies = append(strategies, &random) // simpleStrategies = append(simpleStrategies, &random)
directory := strategyDirectorySensitive{} directory := strategyDirectorySensitive{}
directory.init() directory.init()
strategies = append(strategies, &directory) simpleStrategies = append(simpleStrategies, &directory)
dynamicDist := strategyDynamicRecordDistance{ dynamicDistG := strategyDynamicRecordDistance{
maxDepth: 3000, maxDepth: 3000,
distParams: records.DistParams{Pwd: 10, RealPwd: 10, SessionID: 1, Time: 1}, distParams: records.DistParams{Pwd: 10, RealPwd: 10, SessionID: 1, Time: 1, Git: 10},
label: "10*pwd,10*realpwd,session,time", label: "10*pwd,10*realpwd,session,time,10*git",
} }
dynamicDist.init() dynamicDistG.init()
strategies = append(strategies, &dynamicDist) strategies = append(strategies, &dynamicDistG)
distanceStaticBest := strategyRecordDistance{ distanceStaticBest := strategyRecordDistance{
maxDepth: 3000, maxDepth: 3000,
@ -130,6 +130,10 @@ func main() {
} }
strategies = append(strategies, &distanceStaticBest) strategies = append(strategies, &distanceStaticBest)
recentBash := strategyRecentBash{}
recentBash.init()
strategies = append(strategies, &recentBash)
if *slow { if *slow {
markovCmd := strategyMarkovChainCmd{order: 1} markovCmd := strategyMarkovChainCmd{order: 1}
@ -144,7 +148,11 @@ func main() {
markov2 := strategyMarkovChain{order: 2} markov2 := strategyMarkovChain{order: 2}
markov2.init() markov2.init()
strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov) simpleStrategies = append(simpleStrategies, &markovCmd2, &markovCmd, &markov2, &markov)
}
for _, strat := range simpleStrategies {
strategies = append(strategies, NewSimpleStrategyWrapper(strat))
} }
for _, strat := range strategies { for _, strat := range strategies {
@ -157,13 +165,45 @@ func main() {
evaluator.calculateStatsAndPlot(*plottingScript) evaluator.calculateStatsAndPlot(*plottingScript)
} }
type strategy interface { type ISimpleStrategy interface {
GetTitleAndDescription() (string, string) GetTitleAndDescription() (string, string)
GetCandidates() []string GetCandidates() []string
AddHistoryRecord(record *records.EnrichedRecord) error AddHistoryRecord(record *records.EnrichedRecord) error
ResetHistory() error ResetHistory() error
} }
type IStrategy interface {
GetTitleAndDescription() (string, string)
GetCandidates(r records.EnrichedRecord) []string
AddHistoryRecord(record *records.EnrichedRecord) error
ResetHistory() error
}
type simpleStrategyWrapper struct {
strategy ISimpleStrategy
}
// NewSimpleStrategyWrapper returns IStrategy created by wrapping given ISimpleStrategy
func NewSimpleStrategyWrapper(strategy ISimpleStrategy) *simpleStrategyWrapper {
return &simpleStrategyWrapper{strategy: strategy}
}
func (s *simpleStrategyWrapper) GetTitleAndDescription() (string, string) {
return s.strategy.GetTitleAndDescription()
}
func (s *simpleStrategyWrapper) GetCandidates(r records.EnrichedRecord) []string {
return s.strategy.GetCandidates()
}
func (s *simpleStrategyWrapper) AddHistoryRecord(r *records.EnrichedRecord) error {
return s.strategy.AddHistoryRecord(r)
}
func (s *simpleStrategyWrapper) ResetHistory() error {
return s.strategy.ResetHistory()
}
type matchJSON struct { type matchJSON struct {
Match bool Match bool
Distance int Distance int
@ -209,7 +249,7 @@ type evaluator struct {
func (e *evaluator) initBatchMode(input string, inputDataRoot string) error { func (e *evaluator) initBatchMode(input string, inputDataRoot string) error {
e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot) e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot)
e.processRecords() e.preprocessRecords()
return nil return nil
} }
@ -219,7 +259,7 @@ func (e *evaluator) init(inputPath string) error {
user := userRecords{} user := userRecords{}
user.Devices = append(user.Devices, device) user.Devices = append(user.Devices, device)
e.UsersRecords = append(e.UsersRecords, user) e.UsersRecords = append(e.UsersRecords, user)
e.processRecords() e.preprocessRecords()
return nil return nil
} }
@ -241,44 +281,61 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) {
} }
} }
// enrich records and add them to serializable structure func (e *evaluator) preprocessDeviceRecords(device deviceRecords) deviceRecords {
func (e *evaluator) processRecords() { sessionIDs := map[string]uint64{}
for i := range e.UsersRecords { var nextID uint64
for j, device := range e.UsersRecords[i].Devices { nextID = 1 // start with 1 because 0 won't get saved to json
sessionIDs := map[string]uint64{} for k, record := range device.Records {
var nextID uint64 id, found := sessionIDs[record.SessionID]
nextID = 1 // start with 1 because 0 won't get saved to json if found == false {
for k, record := range e.UsersRecords[i].Devices[j].Records { id = nextID
id, found := sessionIDs[record.SessionID] sessionIDs[record.SessionID] = id
if found == false { nextID++
id = nextID }
sessionIDs[record.SessionID] = id device.Records[k].SeqSessionID = id
nextID++ // assert
} if record.Sanitized != e.sanitizedInput {
e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id if e.sanitizedInput {
// assert log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
if record.Sanitized != e.sanitizedInput {
if e.sanitizedInput {
log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
}
log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
}
e.UsersRecords[i].Devices[j].Records[k].SeqSessionID = id
if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
e.UsersRecords[i].Devices[j].Records[k].DebugThisRecord = true
}
} }
sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { }
return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal device.Records[k].SeqSessionID = id
} if e.debugRecords > 0 && rand.Float64() < e.debugRecords {
return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID device.Records[k].DebugThisRecord = true
}) }
}
// sort.SliceStable(device.Records, func(x, y int) bool {
// if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
// return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal
// }
// return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID
// })
// iterate from back and mark last record of each session
sessionIDSet := map[string]bool{}
for i := len(device.Records) - 1; i >= 0; i-- {
var record *records.EnrichedRecord
record = &device.Records[i]
if sessionIDSet[record.SessionID] {
continue
}
sessionIDSet[record.SessionID] = true
record.LastRecordOfSession = true
}
return device
}
// enrich records and add sequential session ID
func (e *evaluator) preprocessRecords() {
for i := range e.UsersRecords {
for j := range e.UsersRecords[i].Devices {
e.UsersRecords[i].Devices[j] = e.preprocessDeviceRecords(e.UsersRecords[i].Devices[j])
} }
} }
} }
func (e *evaluator) evaluate(strategy strategy) error { func (e *evaluator) evaluate(strategy IStrategy) error {
title, description := strategy.GetTitleAndDescription() title, description := strategy.GetTitleAndDescription()
log.Println("Evaluating strategy:", title, "-", description) log.Println("Evaluating strategy:", title, "-", description)
strategyData := strategyJSON{Title: title, Description: description} strategyData := strategyJSON{Title: title, Description: description}
@ -290,7 +347,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
if e.skipFailedCmds && record.ExitCode != 0 { if e.skipFailedCmds && record.ExitCode != 0 {
continue continue
} }
candidates := strategy.GetCandidates() candidates := strategy.GetCandidates(records.Stripped(record))
if record.DebugThisRecord { if record.DebugThisRecord {
log.Println() log.Println()
log.Println("===================================================") log.Println("===================================================")

@ -9,12 +9,13 @@ import (
) )
type strategyDynamicRecordDistance struct { type strategyDynamicRecordDistance struct {
history []records.EnrichedRecord history []records.EnrichedRecord
distParams records.DistParams distParams records.DistParams
pwdHistogram map[string]int pwdHistogram map[string]int
realPwdHistogram map[string]int realPwdHistogram map[string]int
maxDepth int gitOriginHistogram map[string]int
label string maxDepth int
label string
} }
type strDynDistEntry struct { type strDynDistEntry struct {
@ -26,6 +27,7 @@ func (s *strategyDynamicRecordDistance) init() {
s.history = nil s.history = nil
s.pwdHistogram = map[string]int{} s.pwdHistogram = map[string]int{}
s.realPwdHistogram = map[string]int{} s.realPwdHistogram = map[string]int{}
s.gitOriginHistogram = map[string]int{}
} }
func (s *strategyDynamicRecordDistance) GetTitleAndDescription() (string, string) { func (s *strategyDynamicRecordDistance) GetTitleAndDescription() (string, string) {
@ -36,26 +38,23 @@ func (s *strategyDynamicRecordDistance) idf(count int) float64 {
return math.Log(float64(len(s.history)) / float64(count)) return math.Log(float64(len(s.history)) / float64(count))
} }
func (s *strategyDynamicRecordDistance) GetCandidates() []string { func (s *strategyDynamicRecordDistance) GetCandidates(strippedRecord records.EnrichedRecord) []string {
if len(s.history) == 0 { if len(s.history) == 0 {
return nil return nil
} }
var prevRecord records.EnrichedRecord
prevRecord = s.history[0]
prevRecord.SetCmdLine("")
prevRecord.SetBeforeToAfter()
var mapItems []strDynDistEntry var mapItems []strDynDistEntry
for i, record := range s.history { for i, record := range s.history {
if s.maxDepth != 0 && i > s.maxDepth { if s.maxDepth != 0 && i > s.maxDepth {
break break
} }
distParams := records.DistParams{ distParams := records.DistParams{
Pwd: s.distParams.Pwd * s.idf(s.pwdHistogram[prevRecord.PwdAfter]), Pwd: s.distParams.Pwd * s.idf(s.pwdHistogram[strippedRecord.PwdAfter]),
RealPwd: s.distParams.RealPwd * s.idf(s.realPwdHistogram[prevRecord.RealPwdAfter]), RealPwd: s.distParams.RealPwd * s.idf(s.realPwdHistogram[strippedRecord.RealPwdAfter]),
Git: s.distParams.Git * s.idf(s.gitOriginHistogram[strippedRecord.GitOriginRemote]),
Time: s.distParams.Time, Time: s.distParams.Time,
SessionID: s.distParams.SessionID, SessionID: s.distParams.SessionID,
} }
distance := record.DistanceTo(prevRecord, distParams) distance := record.DistanceTo(strippedRecord, distParams)
mapItems = append(mapItems, strDynDistEntry{record.CmdLine, distance}) mapItems = append(mapItems, strDynDistEntry{record.CmdLine, distance})
} }
sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance }) sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })
@ -76,6 +75,7 @@ func (s *strategyDynamicRecordDistance) AddHistoryRecord(record *records.Enriche
s.history = append([]records.EnrichedRecord{*record}, s.history...) s.history = append([]records.EnrichedRecord{*record}, s.history...)
s.pwdHistogram[record.Pwd]++ s.pwdHistogram[record.Pwd]++
s.realPwdHistogram[record.RealPwd]++ s.realPwdHistogram[record.RealPwd]++
s.gitOriginHistogram[record.GitOriginRemote]++
return nil return nil
} }

@ -0,0 +1,50 @@
package main
import "github.com/curusarn/resh/pkg/records"
type strategyRecentBash struct {
histfile []string
histfileSnapshot map[string][]string
history map[string][]string
}
func (s *strategyRecentBash) init() {
s.histfileSnapshot = map[string][]string{}
s.history = map[string][]string{}
}
func (s *strategyRecentBash) GetTitleAndDescription() (string, string) {
return "recent (bash-like)", "Behave like bash"
}
func (s *strategyRecentBash) GetCandidates(strippedRecord records.EnrichedRecord) []string {
// populate the local history from histfile
if s.histfileSnapshot[strippedRecord.SessionID] == nil {
s.histfileSnapshot[strippedRecord.SessionID] = s.histfile
}
return append(s.history[strippedRecord.SessionID], s.histfileSnapshot[strippedRecord.SessionID]...)
}
func (s *strategyRecentBash) AddHistoryRecord(record *records.EnrichedRecord) error {
// remove previous occurance of record
for i, cmd := range s.history[record.SessionID] {
if cmd == record.CmdLine {
s.history[record.SessionID] = append(s.history[record.SessionID][:i], s.history[record.SessionID][i+1:]...)
}
}
// append new record
s.history[record.SessionID] = append([]string{record.CmdLine}, s.history[record.SessionID]...)
if record.LastRecordOfSession {
// append history of the session to histfile and clear session history
s.histfile = append(s.history[record.SessionID], s.histfile...)
s.histfileSnapshot[record.SessionID] = nil
s.history[record.SessionID] = nil
}
return nil
}
func (s *strategyRecentBash) ResetHistory() error {
s.init()
return nil
}

@ -27,20 +27,16 @@ func (s *strategyRecordDistance) GetTitleAndDescription() (string, string) {
return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands" return "record distance (depth:" + strconv.Itoa(s.maxDepth) + ";" + s.label + ")", "Use record distance to recommend commands"
} }
func (s *strategyRecordDistance) GetCandidates() []string { func (s *strategyRecordDistance) GetCandidates(strippedRecord records.EnrichedRecord) []string {
if len(s.history) == 0 { if len(s.history) == 0 {
return nil return nil
} }
var prevRecord records.EnrichedRecord
prevRecord = s.history[0]
prevRecord.SetCmdLine("")
prevRecord.SetBeforeToAfter()
var mapItems []strDistEntry var mapItems []strDistEntry
for i, record := range s.history { for i, record := range s.history {
if s.maxDepth != 0 && i > s.maxDepth { if s.maxDepth != 0 && i > s.maxDepth {
break break
} }
distance := record.DistanceTo(prevRecord, s.distParams) distance := record.DistanceTo(strippedRecord, s.distParams)
mapItems = append(mapItems, strDistEntry{record.CmdLine, distance}) mapItems = append(mapItems, strDistEntry{record.CmdLine, distance})
} }
sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance }) sort.SliceStable(mapItems, func(i int, j int) bool { return mapItems[i].distance < mapItems[j].distance })

@ -0,0 +1,6 @@
package cfg
// Config struct
type Config struct {
Port int
}

@ -89,12 +89,13 @@ type EnrichedRecord struct {
Record Record
// enriching fields - added "later" // enriching fields - added "later"
Command string `json:"command"` Command string `json:"command"`
FirstWord string `json:"firstWord"` FirstWord string `json:"firstWord"`
Invalid bool `json:"invalid"` Invalid bool `json:"invalid"`
SeqSessionID uint64 `json:"seqSessionId"` SeqSessionID uint64 `json:"seqSessionId"`
DebugThisRecord bool `json:"debugThisRecord"` LastRecordOfSession bool `json:"lastRecordOfSession"`
Errors []string `json:"errors"` DebugThisRecord bool `json:"debugThisRecord"`
Errors []string `json:"errors"`
// SeqSessionID uint64 `json:"seqSessionId,omitempty"` // SeqSessionID uint64 `json:"seqSessionId,omitempty"`
} }
@ -213,6 +214,22 @@ func (r *EnrichedRecord) SetCmdLine(cmdLine string) {
} }
} }
// Stripped returns record stripped of all info that is not available during prediction
func Stripped(r EnrichedRecord) EnrichedRecord {
// clear the cmd itself
r.SetCmdLine("")
// replace after info with before info
r.PwdAfter = r.Pwd
r.RealPwdAfter = r.RealPwd
r.TimezoneAfter = r.TimezoneBefore
r.RealtimeAfter = r.RealtimeBefore
r.RealtimeAfterLocal = r.RealtimeBeforeLocal
// clear some more stuff
r.RealtimeDuration = 0
r.LastRecordOfSession = false
return r
}
// SetBeforeToAfter - set "before" members to "after" members // SetBeforeToAfter - set "before" members to "after" members
func (r *EnrichedRecord) SetBeforeToAfter() { func (r *EnrichedRecord) SetBeforeToAfter() {
r.Pwd = r.PwdAfter r.Pwd = r.PwdAfter
@ -375,8 +392,3 @@ func (r *EnrichedRecord) DistanceTo(r2 EnrichedRecord, p DistParams) float64 {
return dist return dist
} }
// Config struct
type Config struct {
Port int
}

Loading…
Cancel
Save