From 039573e240332a70b1696faf34dade588d6fefb8 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 10 Sep 2019 02:34:31 +0200 Subject: [PATCH] precess data in golang then analyze and plot in python --- Makefile | 4 +- common/resh-common.go | 3 +- evaluate/resh-evaluate-plot.py | 41 +++++++++++ evaluate/resh-evaluate.go | 120 ++++++++++++++++++++++----------- evaluate/results.go | 99 --------------------------- evaluate/statistics.go | 117 -------------------------------- version | 2 +- 7 files changed, 128 insertions(+), 258 deletions(-) create mode 100755 evaluate/resh-evaluate-plot.py delete mode 100644 evaluate/results.go delete mode 100644 evaluate/statistics.go diff --git a/Makefile b/Makefile index 918a124..d5b4bb0 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< -resh-evaluate: evaluate/resh-evaluate.go evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version - go build ${GOFLAGS} -o $@ $< evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go +resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/common/resh-common.go b/common/resh-common.go index 1bd3f4a..481486d 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -218,7 +218,8 @@ func (r *Record) Enrich() { func GetCommandFromCommandLine(cmdLine string) string { args, err := shellwords.Parse(cmdLine) if err != nil { - log.Fatal("shellwords Error:", err) + log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") + return "" } if len(args) > 0 { return args[0] diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py new file mode 100755 index 0000000..8999e0b --- /dev/null +++ b/evaluate/resh-evaluate-plot.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import sys +import json +from collections import defaultdict +import matplotlib.pyplot as plt +import matplotlib.path as mpath +import numpy as np + + +def addRank(data): + return list(enumerate(data, start=1)) + + +data = json.load(sys.stdin) +# for strategy in data["Strategies"]: +# print(json.dumps(strategy)) + +cmd_count = defaultdict(int) +cmdLine_count = defaultdict(int) + +for record in data["Records"]: + cmd_count[record["firstWord"]] += 1 + cmdLine_count[record["cmdLine"]] += 1 + + +cmdFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmd_count.items(), key=lambda x: x[1], reverse=True))) +cmdLineFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True))) + +print(cmdFrq) +print("#################") +#print(cmdLineFrq_rank) + +plt.plot(range(1, len(cmdFrq)+1), cmdFrq) +plt.title("Command frequency") +#plt.xticks(range(1, len(cmdFrq)+1)) +plt.show() + +plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq) +plt.title("Commandline frequency") +plt.show() \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 930a694..30c5a9f 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -2,11 +2,13 @@ package main import ( "bufio" + "bytes" "encoding/json" "flag" "fmt" "log" "os" + "os/exec" "os/user" "path/filepath" @@ -24,15 +26,17 @@ func main() { dir := usr.HomeDir historyPath := filepath.Join(dir, ".resh_history.json") sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") + // tmpPath := "/tmp/resh-evaluate-tmp.json" showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") inputPath := flag.String("input", "", "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ " depending on --sanitized-input option)") - outputPath := flag.String("output", "", "Output file (default: use stdout)") + outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") sanitizedInput := flag.Bool("sanitized-input", false, "Handle input as sanitized (also changes default value for input argument)") + plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") flag.Parse() @@ -54,20 +58,7 @@ func main() { os.Exit(0) } - var writer *bufio.Writer - if *outputPath != "" { - outputFile, err := os.Create(*outputPath) - if err != nil { - log.Fatal("Create() output file error:", err) - } - defer outputFile.Close() - writer = bufio.NewWriter(outputFile) - } else { - writer = bufio.NewWriter(os.Stdout) - } - defer writer.Flush() - - evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 50} + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50} err := evaluator.init(*inputPath) if err != nil { log.Fatal("Evaluator init() error:", err) @@ -87,6 +78,18 @@ func main() { log.Println("Evaluator evaluate() error:", err) } } + // evaluator.dumpJSON(tmpPath) + + // run python script to stat and plot/ + cmd := exec.Command("echo", *outputDir) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + log.Printf("") + err = cmd.Run() + if err != nil { + log.Printf("Command finished with error: %v", err) + } + evaluator.calculateStatsAndPlot(*plottingScript) } type strategy interface { @@ -96,42 +99,93 @@ type strategy interface { ResetHistory() error } +type matchJSON struct { + Match bool + Distance int + CharsRecalled int +} + +type strategyJSON struct { + Title string + Description string + Matches []matchJSON +} + +type evaluateJSON struct { + Strategies []strategyJSON + Records []common.Record +} + type evaluator struct { sanitizedInput bool - writer *bufio.Writer maxCandidates int historyRecords []common.Record + data evaluateJSON } func (e *evaluator) init(inputPath string) error { e.historyRecords = e.loadHistoryRecords(inputPath) + e.processRecords() return nil } -func (e *evaluator) evaluate(strategy strategy) error { - res := results{writer: e.writer, size: e.maxCandidates + 1} - stats := statistics{} - res.init() - stats.init() +func (e *evaluator) calculateStatsAndPlot(scriptName string) { + evalJSON, err := json.Marshal(e.data) + if err != nil { + log.Fatal("json marshal error", err) + } + buffer := bytes.Buffer{} + buffer.Write(evalJSON) + // run python script to stat and plot/ + cmd := exec.Command(scriptName) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = &buffer + log.Printf("...") + err = cmd.Run() + if err != nil { + log.Printf("Command finished with error: %v", err) + } +} + +// enrich records and add them to serializable structure +func (e *evaluator) processRecords() { + for _, record := range e.historyRecords { + + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + } + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } + + record.Enrich() + e.data.Records = append(e.data.Records, record) + } +} +func (e *evaluator) evaluate(strategy strategy) error { + title, description := strategy.GetTitleAndDescription() + strategyData := strategyJSON{Title: title, Description: description} for _, record := range e.historyRecords { - stats.addCmdLine(record.CmdLine, record.CmdLength) candidates := strategy.GetCandidates() - match := false + matchFound := false for i, candidate := range candidates { // make an option (--calculate-total) to turn this on/off ? // if i >= e.maxCandidates { // break // } if candidate == record.CmdLine { - res.addMatch(i+1, record.CmdLength) - match = true + match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength} + strategyData.Matches = append(strategyData.Matches, match) + matchFound = true break } } - if match == false { - res.addMiss() + if matchFound == false { + strategyData.Matches = append(strategyData.Matches, matchJSON{}) } err := strategy.AddHistoryRecord(&record) if err != nil { @@ -139,17 +193,7 @@ func (e *evaluator) evaluate(strategy strategy) error { return err } } - title, description := strategy.GetTitleAndDescription() - n, err := e.writer.WriteString(title + " - " + description + "\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } - // print results - res.printCumulative() - stats.graphCmdFrequencyAsFuncOfRank() + e.data.Strategies = append(e.data.Strategies, strategyData) return nil } diff --git a/evaluate/results.go b/evaluate/results.go deleted file mode 100644 index 5a82aba..0000000 --- a/evaluate/results.go +++ /dev/null @@ -1,99 +0,0 @@ -package main - -import ( - "bufio" - "fmt" - "log" - "math" - "strconv" -) - -type results struct { - writer *bufio.Writer - size int - matches []int // matches[N] -> # of matches at distance N - matchesTotal int - charactersRecalled []int - charactersRecalledTotal int - dataPointCount int -} - -func (r *results) init() { - r.matches = make([]int, r.size) - r.charactersRecalled = make([]int, r.size) -} - -func (r *results) addMatch(distance int, cmdLength int) { - if distance >= r.size { - // --calculate-total - // log.Fatal("Match distance is greater than size of statistics") - r.matchesTotal++ - r.charactersRecalledTotal += cmdLength - return - } - r.matches[distance]++ - r.matchesTotal++ - r.charactersRecalled[distance] += cmdLength - r.charactersRecalledTotal += cmdLength - r.dataPointCount++ -} - -func (r *results) addMiss() { - r.dataPointCount++ -} - -func (r *results) printCumulative() { - matchesPercent := 0.0 - out := "### Matches ###\n" - for i := 0; i < r.size; i++ { - matchesPercent += 100 * float64(r.matches[i]) / float64(r.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" - } - matchesPercent = 100 * float64(r.matchesTotal) / float64(r.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" - - n, err := r.writer.WriteString(string(out) + "\n\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } - - charsRecall := 0.0 - out = "### Characters recalled per submission ###\n" - for i := 0; i < r.size; i++ { - charsRecall += float64(r.charactersRecalled[i]) / float64(r.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" - } - out += "\n" - } - charsRecall = float64(r.charactersRecalledTotal) / float64(r.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" - } - out += "\n" - - n, err = r.writer.WriteString(string(out) + "\n\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } -} diff --git a/evaluate/statistics.go b/evaluate/statistics.go deleted file mode 100644 index 0a71857..0000000 --- a/evaluate/statistics.go +++ /dev/null @@ -1,117 +0,0 @@ -package main - -import ( - "bytes" - "io/ioutil" - "log" - "sort" - - "github.com/wcharczuk/go-chart" -) - -type statistics struct { - //size int - dataPointCount int - cmdLineCount map[string]int -} - -func (s *statistics) init() { - s.cmdLineCount = make(map[string]int) -} - -func (s *statistics) addCmdLine(cmdLine string, cmdLength int) { - s.cmdLineCount[cmdLine]++ - s.dataPointCount++ -} - -func (s *statistics) graphCmdFrequencyAsFuncOfRank() { - - var xValues []float64 - var yValues []float64 - - sortedValues := sortMapByvalue(s.cmdLineCount) - sortedValues = sortedValues[:100] // cut off at rank 100 - - normalizeCoeficient := float64(s.dataPointCount) / float64(sortedValues[0].Value) - for i, pair := range sortedValues { - rank := i + 1 - frequency := float64(pair.Value) / float64(s.dataPointCount) - normalizeFrequency := frequency * normalizeCoeficient - - xValues = append(xValues, float64(rank)) - yValues = append(yValues, normalizeFrequency) - } - - graphName := "cmdFrqAsFuncOfRank" - graph := chart.Chart{ - XAxis: chart.XAxis{ - Style: chart.StyleShow(), //enables / displays the x-axis - Ticks: []chart.Tick{ - {0.0, "0"}, - {1.0, "1"}, - {2.0, "2"}, - {3.0, "3"}, - {4.0, "4"}, - {5.0, "5"}, - {10.0, "10"}, - {15.0, "15"}, - {20.0, "20"}, - {25.0, "25"}, - {30.0, "30"}, - {35.0, "35"}, - {40.0, "40"}, - {45.0, "45"}, - {50.0, "50"}, - }, - }, - YAxis: chart.YAxis{ - AxisType: chart.YAxisSecondary, - Style: chart.StyleShow(), //enables / displays the y-axis - }, - Series: []chart.Series{ - chart.ContinuousSeries{ - Style: chart.Style{ - Show: true, - StrokeColor: chart.GetDefaultColor(0).WithAlpha(64), - FillColor: chart.GetDefaultColor(0).WithAlpha(64), - DotColor: chart.GetDefaultColor(0), - DotWidth: 3.0, - }, - XValues: xValues, - YValues: yValues, - }, - }, - } - - buffer := bytes.NewBuffer([]byte{}) - err := graph.Render(chart.PNG, buffer) - if err != nil { - log.Fatal("chart.Render error:", err) - } - ioutil.WriteFile("/tmp/resh-graph_"+graphName+".png", buffer.Bytes(), 0644) -} - -func sortMapByvalue(input map[string]int) []Pair { - p := make(PairList, len(input)) - - i := 0 - for k, v := range input { - p[i] = Pair{k, v} - i++ - } - sort.Sort(sort.Reverse(p)) - return p -} - -// Pair - A data structure to hold key/value pairs -type Pair struct { - Key string - Value int -} - -// PairList - A slice of pairs that implements sort.Interface to sort by values -type PairList []Pair - -func (p PairList) Len() int { return len(p) } -func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value } diff --git a/version b/version index 524cb55..45a1b3f 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.1.1 +1.1.2