diff --git a/common/resh-common.go b/common/resh-common.go index 908cf9b..7f1d4a6 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -3,6 +3,7 @@ package common import ( "log" "strconv" + "strings" "github.com/mattn/go-shellwords" ) @@ -85,6 +86,7 @@ type EnrichedRecord struct { Record // enriching fields - added "later" + Command string `json:"command"` FirstWord string `json:"firstWord"` Invalid bool `json:"invalid"` SeqSessionID uint64 `json:"seqSessionId"` @@ -114,7 +116,7 @@ func ConvertRecord(r *FallbackRecord) Record { func (r Record) Enrich() EnrichedRecord { record := EnrichedRecord{Record: r} // Get command/first word from commandline - record.FirstWord = GetCommandFromCommandLine(r.CmdLine) + record.Command, record.FirstWord = GetCommandAndFirstWord(r.CmdLine) err := r.Validate() if err != nil { log.Println("Invalid command:", r.CmdLine) @@ -129,17 +131,28 @@ func (r *Record) Validate() error { return nil } -// GetCommandFromCommandLine func -func GetCommandFromCommandLine(cmdLine string) string { +// GetCommandAndFirstWord func +func GetCommandAndFirstWord(cmdLine string) (string, string) { args, err := shellwords.Parse(cmdLine) if err != nil { log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") - return "" + return "", "" } - if len(args) > 0 { - return args[0] + if len(args) == 0 { + return "", "" } - return "" + i := 0 + for true { + // commands in shell sometimes look like this `variable=something command argument otherArgument --option` + // to get the command we skip over tokens that contain '=' + if strings.ContainsRune(args[i], '=') && len(args) > i+1 { + i++ + continue + } + return args[i], args[0] + } + log.Fatal("GetCommandAndFirstWord error: this should not happen!") + return "ERROR", "ERROR" } // Config struct diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index f652ec5..bb4eb3e 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -80,7 +80,7 @@ def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): cmd_count = defaultdict(int) for record in DATA_records: - cmd = record["firstWord"] + cmd = record["command"] if cmd == "": continue cmd_count[cmd] += 1 @@ -110,7 +110,7 @@ def plot_cmdVocabularySize_cmdLinesEntered(): cmd_vocabulary = set() y_cmd_count = [0] for record in DATA_records: - cmd = record["firstWord"] + cmd = record["command"] if cmd in cmd_vocabulary: # repeat last value y_cmd_count.append(y_cmd_count[-1]) @@ -173,7 +173,7 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05, view_graph=True): cmd_count[START_CMD] += 1 prev_cmd = START_CMD for record in session: - cmd = record["firstWord"] + cmd = record["command"] cmdSeq_count[prev_cmd][cmd] += 1 cmd_count[cmd] += 1 if cmd not in cmd_id: @@ -347,7 +347,6 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]): plt.show() - def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) plt.title("Average characters recalled at distance") @@ -419,6 +418,81 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): plt.show() +def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]): + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.title("Average characters recalled at distance (including prefix matches)") + plt.ylabel("Average characters recalled (including prefix matches)") + plt.xlabel("Distance") + x_values = range(1, plot_size+1) + legend = [] + saved_charsRecalled_total = None + saved_dataPoint_count = None + for strategy in data["Strategies"]: + strategy_title = strategy["Title"] + # strategy_description = strategy["Description"] + + dataPoint_count = 0 + matches_total = 0 + charsRecalled = [0] * plot_size + charsRecalled_total = 0 + + for multiMatch in strategy["PrefixMatches"]: + dataPoint_count += 1 + + if not multiMatch["Match"]: + continue + matches_total += 1 + + last_charsRecalled = 0 + for match in multiMatch["Entries"]: + + chars = match["CharsRecalled"] + charsIncrease = chars - last_charsRecalled + assert(charsIncrease > 0) + charsRecalled_total += charsIncrease + + dist = match["Distance"] + if dist > plot_size: + continue + + charsRecalled[dist-1] += charsIncrease + last_charsRecalled = chars + + # recent is very simple strategy so we will believe + # that there is no bug in it and we can use it to determine total + if strategy_title == "recent": + saved_charsRecalled_total = charsRecalled_total + saved_dataPoint_count = dataPoint_count + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue + + acc = 0 + charsRecalled_cumulative = [] + for x in charsRecalled: + acc += x + charsRecalled_cumulative.append(acc) + charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) + + plt.plot(x_values, charsRecalled_average, 'o-') + legend.append(strategy_title) + + assert(saved_charsRecalled_total is not None) + assert(saved_dataPoint_count is not None) + max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values) + plt.plot(x_values, max_values, 'r-') + legend.append("maximum possible") + + x_ticks = list(range(1, plot_size+1, 2)) + x_labels = x_ticks[:] + plt.xticks(x_ticks, x_labels) + plt.legend(legend, loc="best") + if async_draw: + plt.draw() + else: + plt.show() + + plot_cmdLineFrq_rank() plot_cmdFrq_rank() @@ -427,6 +501,7 @@ plot_cmdVocabularySize_cmdLinesEntered() plot_strategies_matches(20) plot_strategies_charsRecalled(20) +plot_strategies_charsRecalled_prefix(20) graph_cmdSequences(node_count=33, edge_minValue=0.048) @@ -437,6 +512,7 @@ graph_cmdSequences(node_count=33, edge_minValue=0.048) # e *= 0.001 # graph_cmdSequences(node_count=n, edge_minValue=e, view_graph=False) +# be careful and check if labels fit the display + if async_draw: plt.show() -# be careful and check if labels fit the display \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index ce04081..39ddbb6 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -15,6 +15,8 @@ import ( "sort" "github.com/curusarn/resh/common" + "github.com/jpillora/longestcommon" + "github.com/schollz/progressbar" ) // Version from git set during build @@ -24,6 +26,8 @@ var Version string var Revision string func main() { + const maxCandidates = 50 + usr, _ := user.Current() dir := usr.HomeDir historyPath := filepath.Join(dir, ".resh_history.json") @@ -42,6 +46,8 @@ func main() { plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") inputDataRoot := flag.String("input-data-root", "", "Input data root, enables batch mode, looks for files matching --input option") + slow := flag.Bool("slow", false, + "Enables stuff that takes a long time (e.g. markov chain strategies).") flag.Parse() @@ -71,7 +77,7 @@ func main() { } } - evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode} + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: maxCandidates, BatchMode: batchMode} if batchMode { err := evaluator.initBatchMode(*input, *inputDataRoot) if err != nil { @@ -94,8 +100,26 @@ func main() { frequent.init() directory := strategyDirectorySensitive{} directory.init() + random := strategyRandom{candidatesSize: maxCandidates} + random.init() + + markovCmd := strategyMarkovChainCmd{order: 1} + markovCmd.init() + + markovCmd2 := strategyMarkovChainCmd{order: 2} + markovCmd2.init() + + markov := strategyMarkovChain{order: 1} + markov.init() - strategies = append(strategies, &recent, &frequent, &directory) + markov2 := strategyMarkovChain{order: 2} + markov2.init() + + strategies = append(strategies, &recent, &frequent, &directory, &random) + + if *slow { + strategies = append(strategies, &markovCmd2, &markovCmd, &markov2, &markov) + } for _, strat := range strategies { err := evaluator.evaluate(strat) @@ -120,10 +144,21 @@ type matchJSON struct { CharsRecalled int } +type multiMatchItemJSON struct { + Distance int + CharsRecalled int +} + +type multiMatchJSON struct { + Match bool + Entries []multiMatchItemJSON +} + type strategyJSON struct { - Title string - Description string - Matches []matchJSON + Title string + Description string + Matches []matchJSON + PrefixMatches []multiMatchJSON } type deviceRecords struct { @@ -213,35 +248,50 @@ func (e *evaluator) processRecords() { func (e *evaluator) evaluate(strategy strategy) error { title, description := strategy.GetTitleAndDescription() + log.Println("Evaluating strategy:", title, "-", description) strategyData := strategyJSON{Title: title, Description: description} for i := range e.UsersRecords { for j := range e.UsersRecords[i].Devices { + bar := progressbar.New(len(e.UsersRecords[i].Devices[j].Records)) for _, record := range e.UsersRecords[i].Devices[j].Records { candidates := strategy.GetCandidates() matchFound := false + longestPrefixMatchLength := 0 + multiMatch := multiMatchJSON{} for i, candidate := range candidates { // make an option (--calculate-total) to turn this on/off ? // if i >= e.maxCandidates { // break // } + commonPrefixLength := len(longestcommon.Prefix([]string{candidate, record.CmdLine})) + if commonPrefixLength > longestPrefixMatchLength { + longestPrefixMatchLength = commonPrefixLength + prefixMatch := multiMatchItemJSON{Distance: i + 1, CharsRecalled: commonPrefixLength} + multiMatch.Match = true + multiMatch.Entries = append(multiMatch.Entries, prefixMatch) + } if candidate == record.CmdLine { match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength} - strategyData.Matches = append(strategyData.Matches, match) matchFound = true + strategyData.Matches = append(strategyData.Matches, match) + strategyData.PrefixMatches = append(strategyData.PrefixMatches, multiMatch) break } } if matchFound == false { strategyData.Matches = append(strategyData.Matches, matchJSON{}) + strategyData.PrefixMatches = append(strategyData.PrefixMatches, multiMatch) } err := strategy.AddHistoryRecord(&record) if err != nil { log.Println("Error while evauating", err) return err } + bar.Add(1) } strategy.ResetHistory() + fmt.Println() } } e.Strategies = append(e.Strategies, strategyData) diff --git a/evaluate/strategy-directory-sensitive.go b/evaluate/strategy-directory-sensitive.go index 2915fc3..f1ae106 100644 --- a/evaluate/strategy-directory-sensitive.go +++ b/evaluate/strategy-directory-sensitive.go @@ -1,8 +1,6 @@ package main -import ( - "github.com/curusarn/resh/common" -) +import "github.com/curusarn/resh/common" type strategyDirectorySensitive struct { history map[string][]string diff --git a/evaluate/strategy-frequent.go b/evaluate/strategy-frequent.go index b88ab91..5480779 100644 --- a/evaluate/strategy-frequent.go +++ b/evaluate/strategy-frequent.go @@ -42,6 +42,6 @@ func (s *strategyFrequent) AddHistoryRecord(record *common.EnrichedRecord) error } func (s *strategyFrequent) ResetHistory() error { - s.history = map[string]int{} + s.init() return nil } diff --git a/evaluate/strategy-markov-chain-cmd.go b/evaluate/strategy-markov-chain-cmd.go new file mode 100644 index 0000000..ec7a230 --- /dev/null +++ b/evaluate/strategy-markov-chain-cmd.go @@ -0,0 +1,91 @@ +package main + +import ( + "sort" + "strconv" + + "github.com/curusarn/resh/common" + "github.com/mb-14/gomarkov" +) + +type strategyMarkovChainCmd struct { + order int + history []strMarkCmdHistoryEntry + historyCmds []string +} + +type strMarkCmdHistoryEntry struct { + cmd string + cmdLine string +} + +type strMarkCmdEntry struct { + cmd string + transProb float64 +} + +func (s *strategyMarkovChainCmd) init() { + s.history = nil + s.historyCmds = nil +} + +func (s *strategyMarkovChainCmd) GetTitleAndDescription() (string, string) { + return "command-based markov chain (order " + strconv.Itoa(s.order) + ")", "Use command-based markov chain to recommend commands" +} + +func (s *strategyMarkovChainCmd) GetCandidates() []string { + if len(s.history) < s.order { + var hist []string + for _, item := range s.history { + hist = append(hist, item.cmdLine) + } + return hist + } + chain := gomarkov.NewChain(s.order) + + chain.Add(s.historyCmds) + + cmdsSet := map[string]bool{} + var entries []strMarkCmdEntry + for _, cmd := range s.historyCmds { + if cmdsSet[cmd] { + continue + } + cmdsSet[cmd] = true + prob, _ := chain.TransitionProbability(cmd, s.historyCmds[len(s.historyCmds)-s.order:]) + entries = append(entries, strMarkCmdEntry{cmd: cmd, transProb: prob}) + } + sort.Slice(entries, func(i int, j int) bool { return entries[i].transProb > entries[j].transProb }) + var hist []string + histSet := map[string]bool{} + for i := len(s.history) - 1; i >= 0; i-- { + if histSet[s.history[i].cmdLine] { + continue + } + histSet[s.history[i].cmdLine] = true + if s.history[i].cmd == entries[0].cmd { + hist = append(hist, s.history[i].cmdLine) + } + } + // log.Println("################") + // log.Println(s.history[len(s.history)-s.order:]) + // log.Println(" -> ") + // x := math.Min(float64(len(hist)), 3) + // log.Println(entries[:int(x)]) + // x = math.Min(float64(len(hist)), 5) + // log.Println(hist[:int(x)]) + // log.Println("################") + return hist +} + +func (s *strategyMarkovChainCmd) AddHistoryRecord(record *common.EnrichedRecord) error { + s.history = append(s.history, strMarkCmdHistoryEntry{cmdLine: record.CmdLine, cmd: record.Command}) + s.historyCmds = append(s.historyCmds, record.Command) + // s.historySet[record.CmdLine] = true + return nil +} + +func (s *strategyMarkovChainCmd) ResetHistory() error { + s.init() + return nil +} diff --git a/evaluate/strategy-markov-chain.go b/evaluate/strategy-markov-chain.go new file mode 100644 index 0000000..adabbb4 --- /dev/null +++ b/evaluate/strategy-markov-chain.go @@ -0,0 +1,70 @@ +package main + +import ( + "sort" + "strconv" + + "github.com/curusarn/resh/common" + "github.com/mb-14/gomarkov" +) + +type strategyMarkovChain struct { + order int + history []string +} + +type strMarkEntry struct { + cmdLine string + transProb float64 +} + +func (s *strategyMarkovChain) init() { + s.history = nil +} + +func (s *strategyMarkovChain) GetTitleAndDescription() (string, string) { + return "markov chain (order " + strconv.Itoa(s.order) + ")", "Use markov chain to recommend commands" +} + +func (s *strategyMarkovChain) GetCandidates() []string { + if len(s.history) < s.order { + return s.history + } + chain := gomarkov.NewChain(s.order) + + chain.Add(s.history) + + cmdLinesSet := map[string]bool{} + var entries []strMarkEntry + for _, cmdLine := range s.history { + if cmdLinesSet[cmdLine] { + continue + } + cmdLinesSet[cmdLine] = true + prob, _ := chain.TransitionProbability(cmdLine, s.history[len(s.history)-s.order:]) + entries = append(entries, strMarkEntry{cmdLine: cmdLine, transProb: prob}) + } + sort.Slice(entries, func(i int, j int) bool { return entries[i].transProb > entries[j].transProb }) + var hist []string + for _, item := range entries { + hist = append(hist, item.cmdLine) + } + // log.Println("################") + // log.Println(s.history[len(s.history)-s.order:]) + // log.Println(" -> ") + // x := math.Min(float64(len(hist)), 5) + // log.Println(hist[:int(x)]) + // log.Println("################") + return hist +} + +func (s *strategyMarkovChain) AddHistoryRecord(record *common.EnrichedRecord) error { + s.history = append(s.history, record.CmdLine) + // s.historySet[record.CmdLine] = true + return nil +} + +func (s *strategyMarkovChain) ResetHistory() error { + s.init() + return nil +} diff --git a/evaluate/strategy-random.go b/evaluate/strategy-random.go new file mode 100644 index 0000000..c4d7b27 --- /dev/null +++ b/evaluate/strategy-random.go @@ -0,0 +1,51 @@ +package main + +import ( + "math/rand" + "time" + + "github.com/curusarn/resh/common" +) + +type strategyRandom struct { + candidatesSize int + history []string + historySet map[string]bool +} + +func (s *strategyRandom) init() { + s.history = nil + s.historySet = map[string]bool{} +} + +func (s *strategyRandom) GetTitleAndDescription() (string, string) { + return "random", "Use random commands" +} + +func (s *strategyRandom) GetCandidates() []string { + seed := time.Now().UnixNano() + rand.Seed(seed) + var candidates []string + candidateSet := map[string]bool{} + for len(candidates) < s.candidatesSize && len(candidates)*2 < len(s.historySet) { + x := rand.Intn(len(s.history)) + candidate := s.history[x] + if candidateSet[candidate] == false { + candidateSet[candidate] = true + candidates = append(candidates, candidate) + continue + } + } + return candidates +} + +func (s *strategyRandom) AddHistoryRecord(record *common.EnrichedRecord) error { + s.history = append([]string{record.CmdLine}, s.history...) + s.historySet[record.CmdLine] = true + return nil +} + +func (s *strategyRandom) ResetHistory() error { + s.init() + return nil +} diff --git a/go.mod b/go.mod index 9c901e1..b13b13d 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,10 @@ go 1.12 require ( github.com/BurntSushi/toml v0.3.1 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect + github.com/jpillora/longestcommon v0.0.0-20161227235612-adb9d91ee629 github.com/mattn/go-shellwords v1.0.6 + github.com/mb-14/gomarkov v0.0.0-20190125094512-044dd0dcb5e7 + github.com/schollz/progressbar v1.0.0 github.com/wcharczuk/go-chart v2.0.1+incompatible github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 // indirect diff --git a/go.sum b/go.sum index 92beac2..b5684fc 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,14 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/jpillora/longestcommon v0.0.0-20161227235612-adb9d91ee629 h1:1dSBUfGlorLAua2CRx0zFN7kQsTpE2DQSmr7rrTNgY8= +github.com/jpillora/longestcommon v0.0.0-20161227235612-adb9d91ee629/go.mod h1:mb5nS4uRANwOJSZj8rlCWAfAcGi72GGMIXx+xGOjA7M= github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI= github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= +github.com/mb-14/gomarkov v0.0.0-20190125094512-044dd0dcb5e7 h1:VsJjhYhufMGXICLwLYr8mFVMp8/A+YqmagMHnG/BA/4= +github.com/mb-14/gomarkov v0.0.0-20190125094512-044dd0dcb5e7/go.mod h1:zQmHoMvvVJb7cxyt1wGT77lqUaeOFXlogOppOr4uHVo= +github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM= +github.com/schollz/progressbar v1.0.0/go.mod h1:/l9I7PC3L3erOuz54ghIRKUEFcosiWfLvJv+Eq26UMs= github.com/wcharczuk/go-chart v2.0.1+incompatible h1:0pz39ZAycJFF7ju/1mepnk26RLVLBCWz1STcD3doU0A= github.com/wcharczuk/go-chart v2.0.1+incompatible/go.mod h1:PF5tmL4EIx/7Wf+hEkpCqYi5He4u90sw+0+6FhrryuE= github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk=