precess data in golang then analyze and plot in python

pull/13/head
Simon Let 6 years ago
parent 6f7f505420
commit 039573e240
  1. 4
      Makefile
  2. 3
      common/resh-common.go
  3. 41
      evaluate/resh-evaluate-plot.py
  4. 120
      evaluate/resh-evaluate.go
  5. 99
      evaluate/results.go
  6. 117
      evaluate/statistics.go
  7. 2
      version

@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version
resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $<
resh-evaluate: evaluate/resh-evaluate.go evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $< evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go
resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go
$(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config:
# Creating dirs ...

@ -218,7 +218,8 @@ func (r *Record) Enrich() {
func GetCommandFromCommandLine(cmdLine string) string {
args, err := shellwords.Parse(cmdLine)
if err != nil {
log.Fatal("shellwords Error:", err)
log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
return "<error>"
}
if len(args) > 0 {
return args[0]

@ -0,0 +1,41 @@
#!/usr/bin/env python3
import sys
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
def addRank(data):
return list(enumerate(data, start=1))
data = json.load(sys.stdin)
# for strategy in data["Strategies"]:
# print(json.dumps(strategy))
cmd_count = defaultdict(int)
cmdLine_count = defaultdict(int)
for record in data["Records"]:
cmd_count[record["firstWord"]] += 1
cmdLine_count[record["cmdLine"]] += 1
cmdFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)))
cmdLineFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)))
print(cmdFrq)
print("#################")
#print(cmdLineFrq_rank)
plt.plot(range(1, len(cmdFrq)+1), cmdFrq)
plt.title("Command frequency")
#plt.xticks(range(1, len(cmdFrq)+1))
plt.show()
plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq)
plt.title("Commandline frequency")
plt.show()

@ -2,11 +2,13 @@ package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/exec"
"os/user"
"path/filepath"
@ -24,15 +26,17 @@ func main() {
dir := usr.HomeDir
historyPath := filepath.Join(dir, ".resh_history.json")
sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json")
// tmpPath := "/tmp/resh-evaluate-tmp.json"
showVersion := flag.Bool("version", false, "Show version and exit")
showRevision := flag.Bool("revision", false, "Show git revision and exit")
inputPath := flag.String("input", "",
"Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+
" depending on --sanitized-input option)")
outputPath := flag.String("output", "", "Output file (default: use stdout)")
outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory")
sanitizedInput := flag.Bool("sanitized-input", false,
"Handle input as sanitized (also changes default value for input argument)")
plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting")
flag.Parse()
@ -54,20 +58,7 @@ func main() {
os.Exit(0)
}
var writer *bufio.Writer
if *outputPath != "" {
outputFile, err := os.Create(*outputPath)
if err != nil {
log.Fatal("Create() output file error:", err)
}
defer outputFile.Close()
writer = bufio.NewWriter(outputFile)
} else {
writer = bufio.NewWriter(os.Stdout)
}
defer writer.Flush()
evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 50}
evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50}
err := evaluator.init(*inputPath)
if err != nil {
log.Fatal("Evaluator init() error:", err)
@ -87,6 +78,18 @@ func main() {
log.Println("Evaluator evaluate() error:", err)
}
}
// evaluator.dumpJSON(tmpPath)
// run python script to stat and plot/
cmd := exec.Command("echo", *outputDir)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
log.Printf("")
err = cmd.Run()
if err != nil {
log.Printf("Command finished with error: %v", err)
}
evaluator.calculateStatsAndPlot(*plottingScript)
}
type strategy interface {
@ -96,42 +99,93 @@ type strategy interface {
ResetHistory() error
}
type matchJSON struct {
Match bool
Distance int
CharsRecalled int
}
type strategyJSON struct {
Title string
Description string
Matches []matchJSON
}
type evaluateJSON struct {
Strategies []strategyJSON
Records []common.Record
}
type evaluator struct {
sanitizedInput bool
writer *bufio.Writer
maxCandidates int
historyRecords []common.Record
data evaluateJSON
}
func (e *evaluator) init(inputPath string) error {
e.historyRecords = e.loadHistoryRecords(inputPath)
e.processRecords()
return nil
}
func (e *evaluator) evaluate(strategy strategy) error {
res := results{writer: e.writer, size: e.maxCandidates + 1}
stats := statistics{}
res.init()
stats.init()
func (e *evaluator) calculateStatsAndPlot(scriptName string) {
evalJSON, err := json.Marshal(e.data)
if err != nil {
log.Fatal("json marshal error", err)
}
buffer := bytes.Buffer{}
buffer.Write(evalJSON)
// run python script to stat and plot/
cmd := exec.Command(scriptName)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = &buffer
log.Printf("...")
err = cmd.Run()
if err != nil {
log.Printf("Command finished with error: %v", err)
}
}
// enrich records and add them to serializable structure
func (e *evaluator) processRecords() {
for _, record := range e.historyRecords {
// assert
if record.Sanitized != e.sanitizedInput {
if e.sanitizedInput {
log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
}
log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
}
record.Enrich()
e.data.Records = append(e.data.Records, record)
}
}
func (e *evaluator) evaluate(strategy strategy) error {
title, description := strategy.GetTitleAndDescription()
strategyData := strategyJSON{Title: title, Description: description}
for _, record := range e.historyRecords {
stats.addCmdLine(record.CmdLine, record.CmdLength)
candidates := strategy.GetCandidates()
match := false
matchFound := false
for i, candidate := range candidates {
// make an option (--calculate-total) to turn this on/off ?
// if i >= e.maxCandidates {
// break
// }
if candidate == record.CmdLine {
res.addMatch(i+1, record.CmdLength)
match = true
match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength}
strategyData.Matches = append(strategyData.Matches, match)
matchFound = true
break
}
}
if match == false {
res.addMiss()
if matchFound == false {
strategyData.Matches = append(strategyData.Matches, matchJSON{})
}
err := strategy.AddHistoryRecord(&record)
if err != nil {
@ -139,17 +193,7 @@ func (e *evaluator) evaluate(strategy strategy) error {
return err
}
}
title, description := strategy.GetTitleAndDescription()
n, err := e.writer.WriteString(title + " - " + description + "\n")
if err != nil {
log.Fatal(err)
}
if n == 0 {
log.Fatal("Nothing was written", n)
}
// print results
res.printCumulative()
stats.graphCmdFrequencyAsFuncOfRank()
e.data.Strategies = append(e.data.Strategies, strategyData)
return nil
}

@ -1,99 +0,0 @@
package main
import (
"bufio"
"fmt"
"log"
"math"
"strconv"
)
type results struct {
writer *bufio.Writer
size int
matches []int // matches[N] -> # of matches at distance N
matchesTotal int
charactersRecalled []int
charactersRecalledTotal int
dataPointCount int
}
func (r *results) init() {
r.matches = make([]int, r.size)
r.charactersRecalled = make([]int, r.size)
}
func (r *results) addMatch(distance int, cmdLength int) {
if distance >= r.size {
// --calculate-total
// log.Fatal("Match distance is greater than size of statistics")
r.matchesTotal++
r.charactersRecalledTotal += cmdLength
return
}
r.matches[distance]++
r.matchesTotal++
r.charactersRecalled[distance] += cmdLength
r.charactersRecalledTotal += cmdLength
r.dataPointCount++
}
func (r *results) addMiss() {
r.dataPointCount++
}
func (r *results) printCumulative() {
matchesPercent := 0.0
out := "### Matches ###\n"
for i := 0; i < r.size; i++ {
matchesPercent += 100 * float64(r.matches[i]) / float64(r.dataPointCount)
out += strconv.Itoa(i) + " ->"
out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent)
for j := 0; j < int(math.Round(matchesPercent)); j++ {
out += "#"
}
out += "\n"
}
matchesPercent = 100 * float64(r.matchesTotal) / float64(r.dataPointCount)
out += "TOTAL ->"
out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent)
for j := 0; j < int(math.Round(matchesPercent)); j++ {
out += "#"
}
out += "\n"
n, err := r.writer.WriteString(string(out) + "\n\n")
if err != nil {
log.Fatal(err)
}
if n == 0 {
log.Fatal("Nothing was written", n)
}
charsRecall := 0.0
out = "### Characters recalled per submission ###\n"
for i := 0; i < r.size; i++ {
charsRecall += float64(r.charactersRecalled[i]) / float64(r.dataPointCount)
out += strconv.Itoa(i) + " ->"
out += fmt.Sprintf(" (%.2f)\n", charsRecall)
for j := 0; j < int(math.Round(charsRecall)); j++ {
out += "#"
}
out += "\n"
}
charsRecall = float64(r.charactersRecalledTotal) / float64(r.dataPointCount)
out += "TOTAL ->"
out += fmt.Sprintf(" (%.2f)\n", charsRecall)
for j := 0; j < int(math.Round(charsRecall)); j++ {
out += "#"
}
out += "\n"
n, err = r.writer.WriteString(string(out) + "\n\n")
if err != nil {
log.Fatal(err)
}
if n == 0 {
log.Fatal("Nothing was written", n)
}
}

@ -1,117 +0,0 @@
package main
import (
"bytes"
"io/ioutil"
"log"
"sort"
"github.com/wcharczuk/go-chart"
)
type statistics struct {
//size int
dataPointCount int
cmdLineCount map[string]int
}
func (s *statistics) init() {
s.cmdLineCount = make(map[string]int)
}
func (s *statistics) addCmdLine(cmdLine string, cmdLength int) {
s.cmdLineCount[cmdLine]++
s.dataPointCount++
}
func (s *statistics) graphCmdFrequencyAsFuncOfRank() {
var xValues []float64
var yValues []float64
sortedValues := sortMapByvalue(s.cmdLineCount)
sortedValues = sortedValues[:100] // cut off at rank 100
normalizeCoeficient := float64(s.dataPointCount) / float64(sortedValues[0].Value)
for i, pair := range sortedValues {
rank := i + 1
frequency := float64(pair.Value) / float64(s.dataPointCount)
normalizeFrequency := frequency * normalizeCoeficient
xValues = append(xValues, float64(rank))
yValues = append(yValues, normalizeFrequency)
}
graphName := "cmdFrqAsFuncOfRank"
graph := chart.Chart{
XAxis: chart.XAxis{
Style: chart.StyleShow(), //enables / displays the x-axis
Ticks: []chart.Tick{
{0.0, "0"},
{1.0, "1"},
{2.0, "2"},
{3.0, "3"},
{4.0, "4"},
{5.0, "5"},
{10.0, "10"},
{15.0, "15"},
{20.0, "20"},
{25.0, "25"},
{30.0, "30"},
{35.0, "35"},
{40.0, "40"},
{45.0, "45"},
{50.0, "50"},
},
},
YAxis: chart.YAxis{
AxisType: chart.YAxisSecondary,
Style: chart.StyleShow(), //enables / displays the y-axis
},
Series: []chart.Series{
chart.ContinuousSeries{
Style: chart.Style{
Show: true,
StrokeColor: chart.GetDefaultColor(0).WithAlpha(64),
FillColor: chart.GetDefaultColor(0).WithAlpha(64),
DotColor: chart.GetDefaultColor(0),
DotWidth: 3.0,
},
XValues: xValues,
YValues: yValues,
},
},
}
buffer := bytes.NewBuffer([]byte{})
err := graph.Render(chart.PNG, buffer)
if err != nil {
log.Fatal("chart.Render error:", err)
}
ioutil.WriteFile("/tmp/resh-graph_"+graphName+".png", buffer.Bytes(), 0644)
}
func sortMapByvalue(input map[string]int) []Pair {
p := make(PairList, len(input))
i := 0
for k, v := range input {
p[i] = Pair{k, v}
i++
}
sort.Sort(sort.Reverse(p))
return p
}
// Pair - A data structure to hold key/value pairs
type Pair struct {
Key string
Value int
}
// PairList - A slice of pairs that implements sort.Interface to sort by values
type PairList []Pair
func (p PairList) Len() int { return len(p) }
func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value }

@ -1 +1 @@
1.1.1
1.1.2

Loading…
Cancel
Save