diff --git a/common/resh-common.go b/common/resh-common.go index e23fe52..69fb7cf 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -224,7 +224,6 @@ func (r *Record) Enrich() { // Validate - returns error if the record is invalid func (r *Record) Validate() error { - return nil } diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index cfb7624..c82db98 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -16,6 +16,24 @@ PLOT_HEIGHT = 7 # inches PLOT_SIZE_zipf = 20 data = json.load(sys.stdin) + +DATA_records = [] +DATA_records_by_session = defaultdict(list) +for user in data["UsersRecords"]: + for device in user["Devices"]: + for record in device["Records"]: + if record["invalid"]: + continue + + DATA_records.append(record) + DATA_records_by_session[record["sessionPid"]].append(record) + +DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeBeforeLocal"])) + +for pid, session in DATA_records_by_session.items(): + session = list(sorted(session, key=lambda x: x["realtimeBeforeLocal"])) + + # for strategy in data["Strategies"]: # print(json.dumps(strategy)) @@ -33,10 +51,7 @@ def trim(text, length, add_elipse=True): # Figure 3.1. The normalized command frequency, compared with Zipf. def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): cmdLine_count = defaultdict(int) - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmdLine_count[record["cmdLine"]] += 1 tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] @@ -60,10 +75,7 @@ def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): # similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf. def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): cmd_count = defaultdict(int) - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmd = record["firstWord"] if cmd == "": continue @@ -90,10 +102,7 @@ def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): def plot_cmdVocabularySize_cmdLinesEntered(): cmd_vocabulary = set() y_cmd_count = [0] - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmd = record["firstWord"] if cmd in cmd_vocabulary: # repeat last value @@ -103,7 +112,7 @@ def plot_cmdVocabularySize_cmdLinesEntered(): # append last value +1 y_cmd_count.append(y_cmd_count[-1] + 1) - print(cmd_vocabulary) + # print(cmd_vocabulary) x_cmds_entered = range(0, len(y_cmd_count)) plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) @@ -118,23 +127,27 @@ def plot_cmdVocabularySize_cmdLinesEntered(): # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, # solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). def graph_cmdSequences(node_count=33, edge_minValue=0.05): + START_CMD = "_start_" cmd_count = defaultdict(int) cmdSeq_count = defaultdict(lambda: defaultdict(int)) cmd_id = dict() - prev_cmd = "" # XXX: not actually session init yet - cmd_id[prev_cmd] = str(-1) - for x, record in enumerate(data["Records"]): - if record["invalid"]: - continue - - cmd = record["firstWord"] - cmdSeq_count[prev_cmd][cmd] += 1 - cmd_count[cmd] += 1 - cmd_id[cmd] = str(x) - prev_cmd = cmd + x = 0 + cmd_id[START_CMD] = str(x) + for pid, session in DATA_records_by_session.items(): + cmd_count[START_CMD] += 1 + prev_cmd = START_CMD + for record in session: + cmd = record["firstWord"] + cmdSeq_count[prev_cmd][cmd] += 1 + cmd_count[cmd] += 1 + if cmd not in cmd_id: + x += 1 + cmd_id[cmd] = str(x) + prev_cmd = cmd # get `node_count` of largest nodes sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True) + print(sorted_cmd_count) cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count] # use 3 biggest nodes as a reference point for scaling @@ -298,13 +311,15 @@ def plot_strategy_recency(): +# plot_cmdLineFrq_rank() +# plot_cmdFrq_rank() +# plot_cmdVocabularySize_cmdLinesEntered() + # plot_strategy_recency() -graph_cmdSequences(node_count=28, edge_minValue=0.06) -# plot_cmdVocabularySize_cmdLinesEntered() -# plot_cmdLineFrq_rank() -# plot_cmdFrq_rank() +graph_cmdSequences() +# graph_cmdSequences(node_count=28, edge_minValue=0.06) # be careful and check if labels fit the display \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 3917a8b..07bb34c 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -6,6 +6,7 @@ import ( "encoding/json" "flag" "fmt" + "io/ioutil" "log" "os" "os/exec" @@ -25,30 +26,25 @@ func main() { usr, _ := user.Current() dir := usr.HomeDir historyPath := filepath.Join(dir, ".resh_history.json") + historyPathBatchMode := filepath.Join(dir, "resh_history.json") sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") // tmpPath := "/tmp/resh-evaluate-tmp.json" showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") - inputPath := flag.String("input", "", + input := flag.String("input", "", "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ " depending on --sanitized-input option)") // outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") sanitizedInput := flag.Bool("sanitized-input", false, "Handle input as sanitized (also changes default value for input argument)") plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") + inputDataRoot := flag.String("input-data-root", "", + "Input data root, enables batch mode, looks for files matching --input option") flag.Parse() - // set default input - if *inputPath == "" { - if *sanitizedInput { - *inputPath = sanitizedHistoryPath - } else { - *inputPath = historyPath - } - } - + // handle show{Version,Revision} options if *showVersion == true { fmt.Println(Version) os.Exit(0) @@ -58,10 +54,33 @@ func main() { os.Exit(0) } - evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50} - err := evaluator.init(*inputPath) - if err != nil { - log.Fatal("Evaluator init() error:", err) + // handle batch mode + batchMode := false + if *inputDataRoot != "" { + batchMode = true + } + // set default input + if *input == "" { + if *sanitizedInput { + *input = sanitizedHistoryPath + } else if batchMode { + *input = historyPathBatchMode + } else { + *input = historyPath + } + } + + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode} + if batchMode { + err := evaluator.initBatchMode(*input, *inputDataRoot) + if err != nil { + log.Fatal("Evaluator initBatchMode() error:", err) + } + } else { + err := evaluator.init(*input) + if err != nil { + log.Fatal("Evaluator init() error:", err) + } } var strategies []strategy @@ -73,12 +92,11 @@ func main() { strategies = append(strategies, &recent) for _, strat := range strategies { - err = evaluator.evaluate(strat) + err := evaluator.evaluate(strat) if err != nil { log.Println("Evaluator evaluate() error:", err) } } - // evaluator.dumpJSON(tmpPath) evaluator.calculateStatsAndPlot(*plottingScript) } @@ -102,26 +120,42 @@ type strategyJSON struct { Matches []matchJSON } -type evaluateJSON struct { - Strategies []strategyJSON - Records []common.Record +type deviceRecords struct { + Name string + Records []common.Record +} + +type userRecords struct { + Name string + Devices []deviceRecords } type evaluator struct { sanitizedInput bool + BatchMode bool maxCandidates int - historyRecords []common.Record - data evaluateJSON + UsersRecords []userRecords + Strategies []strategyJSON +} + +func (e *evaluator) initBatchMode(input string, inputDataRoot string) error { + e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot) + e.processRecords() + return nil } func (e *evaluator) init(inputPath string) error { - e.historyRecords = e.loadHistoryRecords(inputPath) + records := e.loadHistoryRecords(inputPath) + device := deviceRecords{Records: records} + user := userRecords{} + user.Devices = append(user.Devices, device) + e.UsersRecords = append(e.UsersRecords, user) e.processRecords() return nil } func (e *evaluator) calculateStatsAndPlot(scriptName string) { - evalJSON, err := json.Marshal(e.data) + evalJSON, err := json.Marshal(e) if err != nil { log.Fatal("json marshal error", err) } @@ -140,25 +174,28 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) { // enrich records and add them to serializable structure func (e *evaluator) processRecords() { - for _, record := range e.historyRecords { + for i := range e.UsersRecords { + for j := range e.UsersRecords[i].Devices { + for k, record := range e.UsersRecords[i].Devices[j].Records { + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + } + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } - // assert - if record.Sanitized != e.sanitizedInput { - if e.sanitizedInput { - log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + e.UsersRecords[i].Devices[j].Records[k].Enrich() + // device.Records = append(device.Records, record) } - log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") } - - record.Enrich() - e.data.Records = append(e.data.Records, record) } } func (e *evaluator) evaluate(strategy strategy) error { title, description := strategy.GetTitleAndDescription() strategyData := strategyJSON{Title: title, Description: description} - for _, record := range e.historyRecords { + for _, record := range e.UsersRecords[0].Devices[0].Records { candidates := strategy.GetCandidates() matchFound := false @@ -183,10 +220,67 @@ func (e *evaluator) evaluate(strategy strategy) error { return err } } - e.data.Strategies = append(e.data.Strategies, strategyData) + e.Strategies = append(e.Strategies, strategyData) return nil } +func (e *evaluator) loadHistoryRecordsBatchMode(fname string, dataRootPath string) []userRecords { + var records []userRecords + info, err := os.Stat(dataRootPath) + if err != nil { + log.Fatal("Error: Directory", dataRootPath, "does not exist - exiting! (", err, ")") + } + if info.IsDir() == false { + log.Fatal("Error:", dataRootPath, "is not a directory - exiting!") + } + users, err := ioutil.ReadDir(dataRootPath) + if err != nil { + log.Fatal("Could not read directory:", dataRootPath) + } + fmt.Println("Listing users in <", dataRootPath, ">...") + for _, user := range users { + userRecords := userRecords{Name: user.Name()} + userFullPath := filepath.Join(dataRootPath, user.Name()) + if user.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", userFullPath, "> - skipping.") + continue + } + fmt.Println() + fmt.Printf("*- %s\n", user.Name()) + devices, err := ioutil.ReadDir(userFullPath) + if err != nil { + log.Fatal("Could not read directory:", userFullPath) + } + for _, device := range devices { + deviceRecords := deviceRecords{Name: device.Name()} + deviceFullPath := filepath.Join(userFullPath, device.Name()) + if device.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", deviceFullPath, "> - skipping.") + continue + } + fmt.Printf(" \\- %s\n", device.Name()) + files, err := ioutil.ReadDir(deviceFullPath) + if err != nil { + log.Fatal("Could not read directory:", deviceFullPath) + } + for _, file := range files { + fileFullPath := filepath.Join(deviceFullPath, file.Name()) + if file.Name() == fname { + fmt.Printf(" \\- %s - loading ...", file.Name()) + // load the data + deviceRecords.Records = e.loadHistoryRecords(fileFullPath) + fmt.Println(" OK ✓") + } else { + fmt.Printf(" \\- %s - skipped\n", file.Name()) + } + } + userRecords.Devices = append(userRecords.Devices, deviceRecords) + } + records = append(records, userRecords) + } + return records +} + func (e *evaluator) loadHistoryRecords(fname string) []common.Record { file, err := os.Open(fname) if err != nil {