precess data in golang then analyze and plot in python

6 years ago · 039573e240
parent 6f7f505420
commit 039573e240
7 changed files with 128 additions and 258 deletions
--- a/4
+++ b/4
@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version
 resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version
 	go build ${GOFLAGS} -o $@ $<

-resh-evaluate: evaluate/resh-evaluate.go evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version
-	go build ${GOFLAGS} -o $@ $< evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go 
+resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version
+	go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go 

 $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config:
 	# Creating dirs ...
--- a/common/resh-common.go
+++ b/common/resh-common.go
@ -218,7 +218,8 @@ func (r *Record) Enrich() {
 func GetCommandFromCommandLine(cmdLine string) string {
 	args, err := shellwords.Parse(cmdLine)
 	if err != nil {
-		log.Fatal("shellwords Error:", err)
+		log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
+		return "<error>"
 	}
 	if len(args) > 0 {
 		return args[0]
--- a/evaluate/resh-evaluate-plot.py
+++ b/evaluate/resh-evaluate-plot.py
@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import matplotlib.path as mpath
+import numpy as np
+
+
+def addRank(data):
+    return list(enumerate(data, start=1))
+
+
+data = json.load(sys.stdin)
+# for strategy in data["Strategies"]:
+#     print(json.dumps(strategy))
+
+cmd_count = defaultdict(int)
+cmdLine_count = defaultdict(int)
+
+for record in data["Records"]:
+    cmd_count[record["firstWord"]] += 1
+    cmdLine_count[record["cmdLine"]] += 1
+
+
+cmdFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)))
+cmdLineFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)))
+
+print(cmdFrq)
+print("#################")
+#print(cmdLineFrq_rank)
+
+plt.plot(range(1, len(cmdFrq)+1), cmdFrq)
+plt.title("Command frequency")
+#plt.xticks(range(1, len(cmdFrq)+1))
+plt.show()
+
+plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq)
+plt.title("Commandline frequency")
+plt.show()
--- a/evaluate/resh-evaluate.go
+++ b/evaluate/resh-evaluate.go
@ -2,11 +2,13 @@ package main

 import (
 	"bufio"
+	"bytes"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"log"
 	"os"
+	"os/exec"
 	"os/user"
 	"path/filepath"

@ -24,15 +26,17 @@ func main() {
 	dir := usr.HomeDir
 	historyPath := filepath.Join(dir, ".resh_history.json")
 	sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json")
+	// tmpPath := "/tmp/resh-evaluate-tmp.json"

 	showVersion := flag.Bool("version", false, "Show version and exit")
 	showRevision := flag.Bool("revision", false, "Show git revision and exit")
 	inputPath := flag.String("input", "",
 		"Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+
 			" depending on --sanitized-input option)")
-	outputPath := flag.String("output", "", "Output file (default: use stdout)")
+	outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory")
 	sanitizedInput := flag.Bool("sanitized-input", false,
 		"Handle input as sanitized (also changes default value for input argument)")
+	plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting")

 	flag.Parse()

@ -54,20 +58,7 @@ func main() {
 		os.Exit(0)
 	}

-	var writer *bufio.Writer
-	if *outputPath != "" {
-		outputFile, err := os.Create(*outputPath)
-		if err != nil {
-			log.Fatal("Create() output file error:", err)
-		}
-		defer outputFile.Close()
-		writer = bufio.NewWriter(outputFile)
-	} else {
-		writer = bufio.NewWriter(os.Stdout)
-	}
-	defer writer.Flush()
-
-	evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 50}
+	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50}
 	err := evaluator.init(*inputPath)
 	if err != nil {
 		log.Fatal("Evaluator init() error:", err)
@ -87,6 +78,18 @@ func main() {
 			log.Println("Evaluator evaluate() error:", err)
 		}
 	}
+	// evaluator.dumpJSON(tmpPath)
+
+	// run python script to stat and plot/
+	cmd := exec.Command("echo", *outputDir)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	log.Printf("")
+	err = cmd.Run()
+	if err != nil {
+		log.Printf("Command finished with error: %v", err)
+	}
+	evaluator.calculateStatsAndPlot(*plottingScript)
 }

 type strategy interface {
@ -96,42 +99,93 @@ type strategy interface {
 	ResetHistory() error
 }

+type matchJSON struct {
+	Match         bool
+	Distance      int
+	CharsRecalled int
+}
+
+type strategyJSON struct {
+	Title       string
+	Description string
+	Matches     []matchJSON
+}
+
+type evaluateJSON struct {
+	Strategies []strategyJSON
+	Records    []common.Record
+}
+
 type evaluator struct {
 	sanitizedInput bool
-	writer         *bufio.Writer
 	maxCandidates  int
 	historyRecords []common.Record
+	data           evaluateJSON
 }

 func (e *evaluator) init(inputPath string) error {
 	e.historyRecords = e.loadHistoryRecords(inputPath)
+	e.processRecords()
 	return nil
 }

-func (e *evaluator) evaluate(strategy strategy) error {
-	res := results{writer: e.writer, size: e.maxCandidates + 1}
-	stats := statistics{}
-	res.init()
-	stats.init()
+func (e *evaluator) calculateStatsAndPlot(scriptName string) {
+	evalJSON, err := json.Marshal(e.data)
+	if err != nil {
+		log.Fatal("json marshal error", err)
+	}
+	buffer := bytes.Buffer{}
+	buffer.Write(evalJSON)
+	// run python script to stat and plot/
+	cmd := exec.Command(scriptName)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = &buffer
+	log.Printf("...")
+	err = cmd.Run()
+	if err != nil {
+		log.Printf("Command finished with error: %v", err)
+	}
+}
+
+// enrich records and add them to serializable structure
+func (e *evaluator) processRecords() {
+	for _, record := range e.historyRecords {
+
+		// assert
+		if record.Sanitized != e.sanitizedInput {
+			if e.sanitizedInput {
+				log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
+			}
+			log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
+		}
+
+		record.Enrich()
+		e.data.Records = append(e.data.Records, record)
+	}
+}

+func (e *evaluator) evaluate(strategy strategy) error {
+	title, description := strategy.GetTitleAndDescription()
+	strategyData := strategyJSON{Title: title, Description: description}
 	for _, record := range e.historyRecords {
-		stats.addCmdLine(record.CmdLine, record.CmdLength)
 		candidates := strategy.GetCandidates()

-		match := false
+		matchFound := false
 		for i, candidate := range candidates {
 			// make an option (--calculate-total) to turn this on/off ?
 			// if i >= e.maxCandidates {
 			// 	break
 			// }
 			if candidate == record.CmdLine {
-				res.addMatch(i+1, record.CmdLength)
-				match = true
+				match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength}
+				strategyData.Matches = append(strategyData.Matches, match)
+				matchFound = true
 				break
 			}
 		}
-		if match == false {
-			res.addMiss()
+		if matchFound == false {
+			strategyData.Matches = append(strategyData.Matches, matchJSON{})
 		}
 		err := strategy.AddHistoryRecord(&record)
 		if err != nil {
@ -139,17 +193,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
 			return err
 		}
 	}
-	title, description := strategy.GetTitleAndDescription()
-	n, err := e.writer.WriteString(title + " - " + description + "\n")
-	if err != nil {
-		log.Fatal(err)
-	}
-	if n == 0 {
-		log.Fatal("Nothing was written", n)
-	}
-	// print results
-	res.printCumulative()
-	stats.graphCmdFrequencyAsFuncOfRank()
+	e.data.Strategies = append(e.data.Strategies, strategyData)
 	return nil
 }

--- a/evaluate/results.go
+++ b/evaluate/results.go
@ -1,99 +0,0 @@
-package main
-
-import (
-	"bufio"
-	"fmt"
-	"log"
-	"math"
-	"strconv"
-)
-
-type results struct {
-	writer                  *bufio.Writer
-	size                    int
-	matches                 []int // matches[N] -> # of matches at distance N
-	matchesTotal            int
-	charactersRecalled      []int
-	charactersRecalledTotal int
-	dataPointCount          int
-}
-
-func (r *results) init() {
-	r.matches = make([]int, r.size)
-	r.charactersRecalled = make([]int, r.size)
-}
-
-func (r *results) addMatch(distance int, cmdLength int) {
-	if distance >= r.size {
-		// --calculate-total
-		// log.Fatal("Match distance is greater than size of statistics")
-		r.matchesTotal++
-		r.charactersRecalledTotal += cmdLength
-		return
-	}
-	r.matches[distance]++
-	r.matchesTotal++
-	r.charactersRecalled[distance] += cmdLength
-	r.charactersRecalledTotal += cmdLength
-	r.dataPointCount++
-}
-
-func (r *results) addMiss() {
-	r.dataPointCount++
-}
-
-func (r *results) printCumulative() {
-	matchesPercent := 0.0
-	out := "### Matches ###\n"
-	for i := 0; i < r.size; i++ {
-		matchesPercent += 100 * float64(r.matches[i]) / float64(r.dataPointCount)
-		out += strconv.Itoa(i) + " ->"
-		out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent)
-		for j := 0; j < int(math.Round(matchesPercent)); j++ {
-			out += "#"
-		}
-		out += "\n"
-	}
-	matchesPercent = 100 * float64(r.matchesTotal) / float64(r.dataPointCount)
-	out += "TOTAL ->"
-	out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent)
-	for j := 0; j < int(math.Round(matchesPercent)); j++ {
-		out += "#"
-	}
-	out += "\n"
-
-	n, err := r.writer.WriteString(string(out) + "\n\n")
-	if err != nil {
-		log.Fatal(err)
-	}
-	if n == 0 {
-		log.Fatal("Nothing was written", n)
-	}
-
-	charsRecall := 0.0
-	out = "### Characters recalled per submission ###\n"
-	for i := 0; i < r.size; i++ {
-		charsRecall += float64(r.charactersRecalled[i]) / float64(r.dataPointCount)
-		out += strconv.Itoa(i) + " ->"
-		out += fmt.Sprintf(" (%.2f)\n", charsRecall)
-		for j := 0; j < int(math.Round(charsRecall)); j++ {
-			out += "#"
-		}
-		out += "\n"
-	}
-	charsRecall = float64(r.charactersRecalledTotal) / float64(r.dataPointCount)
-	out += "TOTAL ->"
-	out += fmt.Sprintf(" (%.2f)\n", charsRecall)
-	for j := 0; j < int(math.Round(charsRecall)); j++ {
-		out += "#"
-	}
-	out += "\n"
-
-	n, err = r.writer.WriteString(string(out) + "\n\n")
-	if err != nil {
-		log.Fatal(err)
-	}
-	if n == 0 {
-		log.Fatal("Nothing was written", n)
-	}
-}
--- a/evaluate/statistics.go
+++ b/evaluate/statistics.go
@ -1,117 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"io/ioutil"
-	"log"
-	"sort"
-
-	"github.com/wcharczuk/go-chart"
-)
-
-type statistics struct {
-	//size                    int
-	dataPointCount int
-	cmdLineCount   map[string]int
-}
-
-func (s *statistics) init() {
-	s.cmdLineCount = make(map[string]int)
-}
-
-func (s *statistics) addCmdLine(cmdLine string, cmdLength int) {
-	s.cmdLineCount[cmdLine]++
-	s.dataPointCount++
-}
-
-func (s *statistics) graphCmdFrequencyAsFuncOfRank() {
-
-	var xValues []float64
-	var yValues []float64
-
-	sortedValues := sortMapByvalue(s.cmdLineCount)
-	sortedValues = sortedValues[:100] // cut off at rank 100
-
-	normalizeCoeficient := float64(s.dataPointCount) / float64(sortedValues[0].Value)
-	for i, pair := range sortedValues {
-		rank := i + 1
-		frequency := float64(pair.Value) / float64(s.dataPointCount)
-		normalizeFrequency := frequency * normalizeCoeficient
-
-		xValues = append(xValues, float64(rank))
-		yValues = append(yValues, normalizeFrequency)
-	}
-
-	graphName := "cmdFrqAsFuncOfRank"
-	graph := chart.Chart{
-		XAxis: chart.XAxis{
-			Style: chart.StyleShow(), //enables / displays the x-axis
-			Ticks: []chart.Tick{
-				{0.0, "0"},
-				{1.0, "1"},
-				{2.0, "2"},
-				{3.0, "3"},
-				{4.0, "4"},
-				{5.0, "5"},
-				{10.0, "10"},
-				{15.0, "15"},
-				{20.0, "20"},
-				{25.0, "25"},
-				{30.0, "30"},
-				{35.0, "35"},
-				{40.0, "40"},
-				{45.0, "45"},
-				{50.0, "50"},
-			},
-		},
-		YAxis: chart.YAxis{
-			AxisType: chart.YAxisSecondary,
-			Style:    chart.StyleShow(), //enables / displays the y-axis
-		},
-		Series: []chart.Series{
-			chart.ContinuousSeries{
-				Style: chart.Style{
-					Show:        true,
-					StrokeColor: chart.GetDefaultColor(0).WithAlpha(64),
-					FillColor:   chart.GetDefaultColor(0).WithAlpha(64),
-					DotColor:    chart.GetDefaultColor(0),
-					DotWidth:    3.0,
-				},
-				XValues: xValues,
-				YValues: yValues,
-			},
-		},
-	}
-
-	buffer := bytes.NewBuffer([]byte{})
-	err := graph.Render(chart.PNG, buffer)
-	if err != nil {
-		log.Fatal("chart.Render error:", err)
-	}
-	ioutil.WriteFile("/tmp/resh-graph_"+graphName+".png", buffer.Bytes(), 0644)
-}
-
-func sortMapByvalue(input map[string]int) []Pair {
-	p := make(PairList, len(input))
-
-	i := 0
-	for k, v := range input {
-		p[i] = Pair{k, v}
-		i++
-	}
-	sort.Sort(sort.Reverse(p))
-	return p
-}
-
-// Pair - A data structure to hold key/value pairs
-type Pair struct {
-	Key   string
-	Value int
-}
-
-// PairList - A slice of pairs that implements sort.Interface to sort by values
-type PairList []Pair
-
-func (p PairList) Len() int           { return len(p) }
-func (p PairList) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
-func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value }
--- a/2
+++ b/2
@ -1 +1 @@
-1.1.1
+1.1.2
 @ -1 +1 @@
 .1.1
 .1.2