From 20e24bfa6e4f290db1ba3c7f7b6d21e5349777a3 Mon Sep 17 00:00:00 2001
From: Simon Let <simon.a.let@gmail.com>
Date: Fri, 20 Mar 2020 13:55:03 +0100
Subject: [PATCH] plot noncummulative

---
 scripts/resh-evaluate-plot.py | 178 +++++++++++++++++++++++++++++++++-
 1 file changed, 176 insertions(+), 2 deletions(-)

diff --git a/scripts/resh-evaluate-plot.py b/scripts/resh-evaluate-plot.py
index 6fb4ae7..c17dd44 100755
--- a/scripts/resh-evaluate-plot.py
+++ b/scripts/resh-evaluate-plot.py
@@ -596,6 +596,158 @@ def plot_strategies_charsRecalled_prefix(plot_size=50, selected_strategies=[]):
         plt.show()
 
 
+def plot_strategies_charsRecalled_noncummulative(plot_size=50, selected_strategies=["recent (bash-like)"], show_strat_title=False):
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.title("Average characters recalled at distance (noncumulative) <{}>".format(datetime.now().strftime('%H:%M:%S')))
+    plt.ylabel("Average characters recalled")
+    plt.xlabel("Distance")
+    x_values = range(1, plot_size+1)
+    legend = []
+    saved_charsRecalled_total = None
+    saved_dataPoint_count = None
+    for strategy in data["Strategies"]:
+        strategy_title = strategy["Title"]
+        # strategy_description = strategy["Description"]
+
+        dataPoint_count = 0
+        matches = [0] * plot_size
+        matches_total = 0
+        charsRecalled = [0] * plot_size
+        charsRecalled_total = 0
+        
+        for match in strategy["Matches"]:
+            dataPoint_count += 1
+
+            if not match["Match"]:
+                continue
+
+            chars = match["CharsRecalled"]
+            charsRecalled_total += chars 
+            matches_total += 1
+
+            dist = match["Distance"]  
+            if dist > plot_size:
+                continue
+
+            matches[dist-1] += 1
+            charsRecalled[dist-1] += chars
+            
+        # recent is very simple strategy so we will believe 
+        #       that there is no bug in it and we can use it to determine total
+        if strategy_title == "recent":
+            saved_charsRecalled_total = charsRecalled_total
+            saved_dataPoint_count = dataPoint_count
+
+        if len(selected_strategies) and strategy_title not in selected_strategies:
+            continue
+
+        # acc = 0
+        # charsRecalled_cumulative = []
+        # for x in charsRecalled:
+        #     acc += x
+        #     charsRecalled_cumulative.append(acc)
+        # charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative))
+        charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled))
+
+        plt.plot(x_values, charsRecalled_average, 'o-')
+        legend.append(strategy_title)
+
+    assert(saved_charsRecalled_total is not None)
+    assert(saved_dataPoint_count is not None)
+    # max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values)
+    # print("% >>> Max avg recalled characters = {}".format(max_values[0]))
+    # plt.plot(x_values, max_values, 'r-')
+    # legend.append("maximum possible")
+
+    x_ticks = list(range(1, plot_size+1, 2))
+    x_labels = x_ticks[:]
+    plt.xticks(x_ticks, x_labels)
+    if show_strat_title:
+        plt.legend(legend, loc="best")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+
+def plot_strategies_charsRecalled_prefix_noncummulative(plot_size=50, selected_strategies=["recent (bash-like)"], show_strat_title=False):
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.title("Average characters recalled at distance (including prefix matches) (noncummulative) <{}>".format(datetime.now().strftime('%H:%M:%S'))) 
+    plt.ylabel("Average characters recalled (including prefix matches)")
+    plt.xlabel("Distance")
+    x_values = range(1, plot_size+1)
+    legend = []
+    saved_charsRecalled_total = None
+    saved_dataPoint_count = None
+    for strategy in data["Strategies"]:
+        strategy_title = strategy["Title"]
+        # strategy_description = strategy["Description"]
+
+        dataPoint_count = 0
+        matches_total = 0
+        charsRecalled = [0] * plot_size
+        charsRecalled_total = 0
+        
+        for multiMatch in strategy["PrefixMatches"]:
+            dataPoint_count += 1
+
+            if not multiMatch["Match"]:
+                continue
+            matches_total += 1
+
+            last_charsRecalled = 0
+            for match in multiMatch["Entries"]:
+
+                chars = match["CharsRecalled"]
+                charsIncrease = chars - last_charsRecalled
+                assert(charsIncrease > 0)
+                charsRecalled_total += charsIncrease 
+
+                dist = match["Distance"]  
+                if dist > plot_size:
+                    continue
+
+                charsRecalled[dist-1] += charsIncrease
+                last_charsRecalled = chars
+            
+        # recent is very simple strategy so we will believe 
+        #       that there is no bug in it and we can use it to determine total
+        if strategy_title == "recent":
+            saved_charsRecalled_total = charsRecalled_total
+            saved_dataPoint_count = dataPoint_count
+
+        if len(selected_strategies) and strategy_title not in selected_strategies:
+            continue
+
+        # acc = 0
+        # charsRecalled_cumulative = []
+        # for x in charsRecalled:
+        #     acc += x
+        #     charsRecalled_cumulative.append(acc)
+        # charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative))
+        charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled))
+
+        plt.plot(x_values, charsRecalled_average, 'o-')
+        legend.append(strategy_title)
+
+    assert(saved_charsRecalled_total is not None)
+    assert(saved_dataPoint_count is not None)
+    # max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values)
+    # print("% >>> Max avg recalled characters (including prefix matches) = {}".format(max_values[0]))
+    # plt.plot(x_values, max_values, 'r-')
+    # legend.append("maximum possible")
+
+    x_ticks = list(range(1, plot_size+1, 2))
+    x_labels = x_ticks[:]
+    plt.xticks(x_ticks, x_labels)
+    if show_strat_title:
+        plt.legend(legend, loc="best")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+
 def print_top_cmds(num_cmds=20):
     cmd_count = defaultdict(int)
     cmd_total = 0
@@ -615,6 +767,7 @@ def print_top_cmds(num_cmds=20):
     # print(sorted_cmd_count)
     # cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:cmd_count]
 
+
 def print_top_cmds_by_user(num_cmds=20):
     for user in DATA_records_by_user.items():
         name, records = user
@@ -635,10 +788,28 @@ def print_top_cmds_by_user(num_cmds=20):
         # print(sorted_cmd_count)
         # cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:cmd_count]
 
+
+def print_avg_cmdline_length():
+    cmd_len_total = 0
+    cmd_total = 0
+    for pid, session in DATA_records_by_session.items():
+        for record in session:
+            cmd = record["cmdLine"]
+            if cmd == "":
+                continue
+            cmd_len_total += len(cmd) 
+            cmd_total += 1
+
+    print("% ALL avg cmdline = {}".format(cmd_len_total / cmd_total))
+    # print(sorted_cmd_count)
+    # cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:cmd_count]
+
+
 # plot_cmdLineFrq_rank()
-plot_cmdFrq_rank()
+# plot_cmdFrq_rank()
 print_top_cmds(30)
 print_top_cmds_by_user(30)
+print_avg_cmdline_length()
 #         
 # plot_cmdLineVocabularySize_cmdLinesEntered()
 # plot_cmdVocabularySize_cmdLinesEntered()
@@ -646,7 +817,10 @@ print_top_cmds_by_user(30)
 # plot_strategies_matches(20)
 # plot_strategies_charsRecalled(20)
 # plot_strategies_charsRecalled_prefix(20)
-# 
+recent_strats=("recent", "recent (bash-like)")
+# plot_strategies_charsRecalled_noncummulative(20, selected_strategies=recent_strats)
+plot_strategies_charsRecalled_noncummulative(20)
+plot_strategies_charsRecalled_prefix_noncummulative(20)
 # graph_cmdSequences(node_count=33, edge_minValue=0.048)
 # 
 # graph_cmdSequences(node_count=28, edge_minValue=0.06)