Merge pull request #13 from curusarn/dev_2

sanitization release
evaluation progress
pull/15/head
Šimon Let 6 years ago committed by GitHub
commit 22a0bf3f5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .gitignore
  2. 51
      Makefile
  3. 7
      README.md
  4. 3
      collect/resh-collect.go
  5. 180
      common/resh-common.go
  6. 8
      daemon/resh-daemon.go
  7. 438
      evaluate/resh-evaluate-plot.py
  8. 340
      evaluate/resh-evaluate.go
  9. 42
      evaluate/strategy-directory-sensitive.go
  10. 24
      evaluate/strategy-dummy.go
  11. 47
      evaluate/strategy-frequent.go
  12. 32
      evaluate/strategy-recent.go
  13. 9
      go.mod
  14. 11
      go.sum
  15. 424
      sanitize-history/resh-sanitize-history.go
  16. 7
      sanitizer_data/copyright_information.md
  17. 1195
      sanitizer_data/whitelist.txt
  18. 15
      shellrc.sh
  19. 2
      version

2
.gitignore vendored

@ -1,2 +1,4 @@
resh-collect
resh-daemon
resh-sanitize-history
resh-evaluate

@ -6,8 +6,43 @@ GOFLAGS=-ldflags "-X main.Version=${VERSION} -X main.Revision=${REVISION}"
autoinstall:
./install_helper.sh
sanitize:
#
#
# I'm going to create a sanitized version of your resh history.
# Everything is done locally - your history won't leave this machine.
# The way this works is that any sensitive information in your history is going to be replaced with its SHA1 hash.
# There is also going to be a second version with hashes trimed to 12 characters for readability
#
#
# > full hashes: ~/resh_history_sanitized.json
# > 12 char hashes: ~/resh_history_sanitized_trim12.json
#
#
# Encountered any issues? Got questions? -> Hit me up at https://github.com/curusarn/resh/issues
#
#
# Running history sanitization ...
resh-sanitize-history -trim-hashes 0 --output ~/resh_history_sanitized.json
resh-sanitize-history -trim-hashes 12 --output ~/resh_history_sanitized_trim12.json
#
#
# SUCCESS - ALL DONE!
#
#
# PLEASE HAVE A LOOK AT THE RESULT USING THESE COMMANDS:
#
# > pretty print JSON:
@echo 'cat ~/resh_history_sanitized_trim12.json | jq'
#
# > only show executed commands, don't show metadata:
@echo "cat ~/resh_history_sanitized_trim12.json | jq '.[\"cmdLine\"]'"
#
#
#
build: submodules resh-collect resh-daemon
build: submodules resh-collect resh-daemon resh-sanitize-history resh-evaluate
rebuild:
make clean
@ -23,6 +58,8 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu
cp -f shellrc.sh ~/.resh/shellrc
cp -f uuid.sh ~/.resh/bin/resh-uuid
cp -f resh-* ~/.resh/bin/
cp -f evaluate/resh-evaluate-plot.py ~/.resh/bin/
cp -fr sanitizer_data ~/.resh/
# backward compatibility: We have a new location for resh history file
[ ! -f ~/.resh/history.json ] || mv ~/.resh/history.json ~/.resh_history.json
# Adding resh shellrc to .bashrc ...
@ -40,24 +77,31 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu
# Final touch
touch ~/.resh_history.json
#
#
#
##########################################################
# #
# SUCCESS - thank you for trying out this project! #
# #
##########################################################
#
#
# WHAT'S NEXT
# Please RESTART ALL OPEN TERMINAL WINDOWS (or reload your rc files)
# Your resh history is located in `~/.resh_history.json`
# You can look at it using e.g. `tail -f ~/.resh_history.json | jq`
#
#
# ISSUES
# If anything looks broken create an issue: https://github.com/curusarn/resh/issues
# You can uninstall this at any time by running `rm -rf ~/.resh/`
# You won't lose any collected history by removing `~/.resh` directory
#
#
# Please give me some contact info using this form: https://forms.gle/227SoyJ5c2iteKt98
#
#
#
uninstall:
# Uninstalling ...
@ -69,6 +113,11 @@ resh-daemon: daemon/resh-daemon.go common/resh-common.go version
resh-collect: collect/resh-collect.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $<
resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $<
resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version
go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go
$(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config:
# Creating dirs ...

@ -4,7 +4,7 @@
This project is the first phase of my Master project.
It records shell history with rich set of metadata and saves it locally. (device, dir, git, ... see example below)
It records shell history with rich set of metadata and saves it locally. (device, directory, git, time, terminal session pid, ... see example below)
It doesn't change the way your shell and your shell history behaves.
@ -17,8 +17,9 @@ If you are not happy with it you can uninstall it with a single command (`rm -rf
The ultimate point of my thesis is to provide a context-based replacement/enhancement for bash and zsh shell history.
The idea is to:
- Save each command with metadata (device, dir, gitdir, ...)
- Recommend history based on saved metadata (e.g. it will be easier to get to commands specific to your project)
- Save each command with metadata (device, directory, git, time, terminal session pid, ... see example below)
- Recommend history based on saved metadata
- e.g. it will be easier to get to commands specific to the project you are currently working on (based on directory, git repository url, ...)
- Provide a simple way to search whole history by command itself and/or metadata (e.g. imagine searching by project, directory, device, ...)
- Synchronize history across devices
- Provide an API (to make the project easily extensible)

@ -20,7 +20,10 @@ import (
"strings"
)
// Version from git set during build
var Version string
// Revision from git set during build
var Revision string
func main() {

@ -1,5 +1,13 @@
package common
import (
"log"
"strconv"
"github.com/mattn/go-shellwords"
)
// Record representing single executed command with its metadata
type Record struct {
// core
CmdLine string `json:"cmdLine"`
@ -60,8 +68,180 @@ type Record struct {
ReshUuid string `json:"reshUuid"`
ReshVersion string `json:"reshVersion"`
ReshRevision string `json:"reshRevision"`
// added by sanitizatizer
Sanitized bool `json:"sanitized"`
CmdLength int `json:"cmdLength,omitempty"`
// enriching fields - added "later"
FirstWord string `json:"firstWord,omitempty"`
Invalid bool `json:"invalid,omitempty"`
SeqSessionID uint64 `json:"seqSessionID,omitempty"`
}
// FallbackRecord when record is too old and can't be parsed into regular Record
type FallbackRecord struct {
// older version of the record where cols and lines are int
// core
CmdLine string `json:"cmdLine"`
ExitCode int `json:"exitCode"`
Shell string `json:"shell"`
Uname string `json:"uname"`
SessionId string `json:"sessionId"`
// posix
Cols int `json:"cols"` // notice the in type
Lines int `json:"lines"` // notice the in type
Home string `json:"home"`
Lang string `json:"lang"`
LcAll string `json:"lcAll"`
Login string `json:"login"`
//Path string `json:"path"`
Pwd string `json:"pwd"`
PwdAfter string `json:"pwdAfter"`
ShellEnv string `json:"shellEnv"`
Term string `json:"term"`
// non-posix"`
RealPwd string `json:"realPwd"`
RealPwdAfter string `json:"realPwdAfter"`
Pid int `json:"pid"`
SessionPid int `json:"sessionPid"`
Host string `json:"host"`
Hosttype string `json:"hosttype"`
Ostype string `json:"ostype"`
Machtype string `json:"machtype"`
Shlvl int `json:"shlvl"`
// before after
TimezoneBefore string `json:"timezoneBefore"`
TimezoneAfter string `json:"timezoneAfter"`
RealtimeBefore float64 `json:"realtimeBefore"`
RealtimeAfter float64 `json:"realtimeAfter"`
RealtimeBeforeLocal float64 `json:"realtimeBeforeLocal"`
RealtimeAfterLocal float64 `json:"realtimeAfterLocal"`
RealtimeDuration float64 `json:"realtimeDuration"`
RealtimeSinceSessionStart float64 `json:"realtimeSinceSessionStart"`
RealtimeSinceBoot float64 `json:"realtimeSinceBoot"`
//Logs []string `json: "logs"`
GitDir string `json:"gitDir"`
GitRealDir string `json:"gitRealDir"`
GitOriginRemote string `json:"gitOriginRemote"`
MachineId string `json:"machineId"`
OsReleaseId string `json:"osReleaseId"`
OsReleaseVersionId string `json:"osReleaseVersionId"`
OsReleaseIdLike string `json:"osReleaseIdLike"`
OsReleaseName string `json:"osReleaseName"`
OsReleasePrettyName string `json:"osReleasePrettyName"`
ReshUuid string `json:"reshUuid"`
ReshVersion string `json:"reshVersion"`
ReshRevision string `json:"reshRevision"`
}
// ConvertRecord from FallbackRecord to Record
func ConvertRecord(r *FallbackRecord) Record {
return Record{
// core
CmdLine: r.CmdLine,
ExitCode: r.ExitCode,
Shell: r.Shell,
Uname: r.Uname,
SessionId: r.SessionId,
// posix
// these two lines are the only reason we are doing this
Cols: strconv.Itoa(r.Cols),
Lines: strconv.Itoa(r.Lines),
Home: r.Home,
Lang: r.Lang,
LcAll: r.LcAll,
Login: r.Login,
// Path: r.path,
Pwd: r.Pwd,
PwdAfter: r.PwdAfter,
ShellEnv: r.ShellEnv,
Term: r.Term,
// non-posix
RealPwd: r.RealPwd,
RealPwdAfter: r.RealPwdAfter,
Pid: r.Pid,
SessionPid: r.SessionPid,
Host: r.Host,
Hosttype: r.Hosttype,
Ostype: r.Ostype,
Machtype: r.Machtype,
Shlvl: r.Shlvl,
// before after
TimezoneBefore: r.TimezoneBefore,
TimezoneAfter: r.TimezoneAfter,
RealtimeBefore: r.RealtimeBefore,
RealtimeAfter: r.RealtimeAfter,
RealtimeBeforeLocal: r.RealtimeBeforeLocal,
RealtimeAfterLocal: r.RealtimeAfterLocal,
RealtimeDuration: r.RealtimeDuration,
RealtimeSinceSessionStart: r.RealtimeSinceSessionStart,
RealtimeSinceBoot: r.RealtimeSinceBoot,
GitDir: r.GitDir,
GitRealDir: r.GitRealDir,
GitOriginRemote: r.GitOriginRemote,
MachineId: r.MachineId,
OsReleaseId: r.OsReleaseId,
OsReleaseVersionId: r.OsReleaseVersionId,
OsReleaseIdLike: r.OsReleaseIdLike,
OsReleaseName: r.OsReleaseName,
OsReleasePrettyName: r.OsReleasePrettyName,
ReshUuid: r.ReshUuid,
ReshVersion: r.ReshVersion,
ReshRevision: r.ReshRevision,
}
}
// Enrich - adds additional fields to the record
func (r *Record) Enrich() {
// Get command/first word from commandline
r.FirstWord = GetCommandFromCommandLine(r.CmdLine)
err := r.Validate()
if err != nil {
log.Println("Invalid command:", r.CmdLine)
r.Invalid = true
}
r.Invalid = false
// TODO: Detect and mark simple commands r.Simple
}
// Validate - returns error if the record is invalid
func (r *Record) Validate() error {
return nil
}
// GetCommandFromCommandLine func
func GetCommandFromCommandLine(cmdLine string) string {
args, err := shellwords.Parse(cmdLine)
if err != nil {
log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
return "<error>"
}
if len(args) > 0 {
return args[0]
}
return ""
}
// Config struct
type Config struct {
Port int
}

@ -3,8 +3,6 @@ package main
import (
"encoding/json"
//"flag"
"github.com/BurntSushi/toml"
common "github.com/curusarn/resh/common"
"io/ioutil"
"log"
"net/http"
@ -14,9 +12,15 @@ import (
"path/filepath"
"strconv"
"strings"
"github.com/BurntSushi/toml"
common "github.com/curusarn/resh/common"
)
// Version from git set during build
var Version string
// Revision from git set during build
var Revision string
func main() {

@ -0,0 +1,438 @@
#!/usr/bin/env python3
import traceback
import sys
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.path as mpath
import numpy as np
from graphviz import Digraph
PLOT_WIDTH = 10 # inches
PLOT_HEIGHT = 7 # inches
PLOT_SIZE_zipf = 20
data = json.load(sys.stdin)
DATA_records = []
DATA_records_by_session = defaultdict(list)
for user in data["UsersRecords"]:
for device in user["Devices"]:
for record in device["Records"]:
if record["invalid"]:
continue
DATA_records.append(record)
DATA_records_by_session[record["sessionId"]].append(record)
DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeAfterLocal"]))
for pid, session in DATA_records_by_session.items():
session = list(sorted(session, key=lambda x: x["realtimeAfterLocal"]))
# TODO: this should be a cmdline option
async_draw = True
# for strategy in data["Strategies"]:
# print(json.dumps(strategy))
def zipf(length):
return list(map(lambda x: 1/2**x, range(0, length)))
def trim(text, length, add_elipse=True):
if add_elipse and len(text) > length:
return text[:length-1] + ""
return text[:length]
# Figure 3.1. The normalized command frequency, compared with Zipf.
def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False):
cmdLine_count = defaultdict(int)
for record in DATA_records:
cmdLine_count[record["cmdLine"]] += 1
tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize]
cmdLineFrq = list(map(lambda x: x[1] / tmp[0][1], tmp))
labels = list(map(lambda x: trim(x[0], 7), tmp))
ranks = range(1, len(cmdLineFrq)+1)
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.plot(ranks, zipf(len(ranks)), '-')
plt.plot(ranks, cmdLineFrq, 'o-')
plt.title("Commandline frequency / rank")
plt.ylabel("Normalized commandline frequency")
plt.xlabel("Commandline rank")
plt.legend(("Zipf", "Commandline"), loc="best")
if show_labels:
plt.xticks(ranks, labels, rotation=-60)
# TODO: make xticks integral
if async_draw:
plt.draw()
else:
plt.show()
# similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf.
def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False):
cmd_count = defaultdict(int)
for record in DATA_records:
cmd = record["firstWord"]
if cmd == "":
continue
cmd_count[cmd] += 1
tmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:plotSize]
cmdFrq = list(map(lambda x: x[1] / tmp[0][1], tmp))
labels = list(map(lambda x: trim(x[0], 7), tmp))
ranks = range(1, len(cmdFrq)+1)
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.plot(ranks, zipf(len(ranks)), 'o-')
plt.plot(ranks, cmdFrq, 'o-')
plt.title("Command frequency / rank")
plt.ylabel("Normalized command frequency")
plt.xlabel("Command rank")
plt.legend(("Zipf", "Command"), loc="best")
if show_labels:
plt.xticks(ranks, labels, rotation=-60)
# TODO: make xticks integral
if async_draw:
plt.draw()
else:
plt.show()
# Figure 3.2. Command vocabulary size vs. the number of command lines entered for four individuals.
def plot_cmdVocabularySize_cmdLinesEntered():
cmd_vocabulary = set()
y_cmd_count = [0]
for record in DATA_records:
cmd = record["firstWord"]
if cmd in cmd_vocabulary:
# repeat last value
y_cmd_count.append(y_cmd_count[-1])
else:
cmd_vocabulary.add(cmd)
# append last value +1
y_cmd_count.append(y_cmd_count[-1] + 1)
# print(cmd_vocabulary)
x_cmds_entered = range(0, len(y_cmd_count))
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.plot(x_cmds_entered, y_cmd_count, '-')
plt.title("Command vocabulary size vs. the number of command lines entered")
plt.ylabel("Command vocabulary size")
plt.xlabel("# of command lines entered")
if async_draw:
plt.draw()
else:
plt.show()
# Figure 5.6. Command line vocabulary size vs. the number of commands entered for four typical individuals.
def plot_cmdLineVocabularySize_cmdLinesEntered():
cmdLine_vocabulary = set()
y_cmdLine_count = [0]
for record in DATA_records:
cmdLine = record["cmdLine"]
if cmdLine in cmdLine_vocabulary:
# repeat last value
y_cmdLine_count.append(y_cmdLine_count[-1])
else:
cmdLine_vocabulary.add(cmdLine)
# append last value +1
y_cmdLine_count.append(y_cmdLine_count[-1] + 1)
# print(cmdLine_vocabulary)
x_cmdLines_entered = range(0, len(y_cmdLine_count))
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.plot(x_cmdLines_entered, y_cmdLine_count, '-')
plt.title("Command line vocabulary size vs. the number of command lines entered")
plt.ylabel("Command line vocabulary size")
plt.xlabel("# of command lines entered")
if async_draw:
plt.draw()
else:
plt.show()
# Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984).
# Ball diameters are proportional to stationary probability. Lines indicate significant dependencies,
# solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001).
def graph_cmdSequences(node_count=33, edge_minValue=0.05):
START_CMD = "_start_"
cmd_count = defaultdict(int)
cmdSeq_count = defaultdict(lambda: defaultdict(int))
cmd_id = dict()
x = 0
cmd_id[START_CMD] = str(x)
for pid, session in DATA_records_by_session.items():
cmd_count[START_CMD] += 1
prev_cmd = START_CMD
for record in session:
cmd = record["firstWord"]
cmdSeq_count[prev_cmd][cmd] += 1
cmd_count[cmd] += 1
if cmd not in cmd_id:
x += 1
cmd_id[cmd] = str(x)
prev_cmd = cmd
# get `node_count` of largest nodes
sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)
print(sorted_cmd_count)
cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count]
# use 3 biggest nodes as a reference point for scaling
biggest_node = cmd_count[cmds_to_graph[0]]
nd_biggest_node = cmd_count[cmds_to_graph[1]]
rd_biggest_node = cmd_count[cmds_to_graph[1]]
count2scale_coef = 3 / (biggest_node + nd_biggest_node + rd_biggest_node)
# scaling constant
# affects node size and node label
base_scaling_factor = 21
# extra scaling for experiments - not really useful imho
# affects everything nodes, edges, node labels, treshold for turning label into xlabel, xlabel size, ...
extra_scaling_factor = 1.0
for x in range(0, 10):
# graphviz is not the most reliable piece of software
# -> retry on fail but scale nodes down by 1%
scaling_factor = base_scaling_factor * (1 - x * 0.01)
# overlap: scale -> solve overlap by scaling the graph
# overlap_shrink -> try to shrink the graph a bit after you are done
# splines -> don't draw edges over nodes
# sep: 2.5 -> assume that nodes are 2.5 inches larger
graph_attr={'overlap':'scale', 'overlap_shrink':'true',
'splines':'true', 'sep':'0.25'}
graph = Digraph(name='command_sequentiality', engine='neato', graph_attr=graph_attr)
# iterate over all nodes
for cmd in cmds_to_graph:
seq = cmdSeq_count[cmd]
count = cmd_count[cmd]
# iterate over all "following" commands (for each node)
for seq_entry in seq.items():
cmd2, seq_count = seq_entry
relative_seq_count = seq_count / count
# check if "follow" command is supposed to be in the graph
if cmd2 not in cmds_to_graph:
continue
# check if the edge value is high enough
if relative_seq_count < edge_minValue:
continue
# create starting node and end node for the edge
# duplicates don't matter
for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)):
count_ = cmd_count[cmd_]
scale_ = count_ * count2scale_coef * scaling_factor * extra_scaling_factor
width_ = 0.08 * scale_
fontsize_ = 8.5 * scale_ / (len(cmd_) + 3)
width_ = str(width_)
if fontsize_ < 12 * extra_scaling_factor:
graph.node(id_, ' ', shape='circle', fixedsize='true', fontname='monospace bold',
width=width_, fontsize=str(12 * extra_scaling_factor), forcelabels='true', xlabel=cmd_)
else:
fontsize_ = str(fontsize_)
graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='monospace bold',
width=width_, fontsize=fontsize_, forcelabels='true', labelloc='c')
# value of the edge (percentage) 1.0 is max
scale_ = seq_count / cmd_count[cmd]
penwidth_ = str((0.5 + 4.5 * scale_) * extra_scaling_factor)
#penwidth_bold_ = str(8 * scale_)
if scale_ > 0.5:
graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
penwidth=penwidth_, style='bold')
elif scale_ > 0.2:
graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
penwidth=penwidth_, arrowhead='open')
elif scale_ > 0.1:
graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
penwidth=penwidth_, style='dashed', arrowhead='open')
else:
graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved',
penwidth=penwidth_, style='dotted', arrowhead='empty')
# graphviz sometimes fails - see above
try:
graph.view()
# graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True)
break
except Exception as e:
trace = traceback.format_exc()
print("GRAPHVIZ EXCEPTION: <{}>\nGRAPHVIZ TRACE: <{}>".format(str(e), trace))
def plot_strategies_matches(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Matches at distance")
plt.ylabel('%' + " of matches")
plt.xlabel("Distance")
legend = []
x_values = range(1, plot_size+1)
saved_matches_total = None
saved_dataPoint_count = None
for strategy in data["Strategies"]:
strategy_title = strategy["Title"]
# strategy_description = strategy["Description"]
if len(selected_strategies) and strategy_title not in selected_strategies:
continue
dataPoint_count = 0
matches = [0] * plot_size
matches_total = 0
charsRecalled = [0] * plot_size
charsRecalled_total = 0
for match in strategy["Matches"]:
dataPoint_count += 1
if not match["Match"]:
continue
chars = match["CharsRecalled"]
charsRecalled_total += chars
matches_total += 1
dist = match["Distance"]
if dist > plot_size:
continue
matches[dist-1] += 1
charsRecalled[dist-1] += chars
# recent is very simple strategy so we will believe
# that there is no bug in it and we can use it to determine total
if strategy_title == "recent":
saved_matches_total = matches_total
saved_dataPoint_count = dataPoint_count
if len(selected_strategies) and strategy_title not in selected_strategies:
continue
acc = 0
matches_cumulative = []
for x in matches:
acc += x
matches_cumulative.append(acc)
# matches_cumulative.append(matches_total)
matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative))
plt.plot(x_values, matches_percent, 'o-')
legend.append(strategy_title)
assert(saved_matches_total is not None)
assert(saved_dataPoint_count is not None)
max_values = [100 * saved_matches_total / saved_dataPoint_count] * len(x_values)
plt.plot(x_values, max_values, 'r-')
legend.append("maximum possible")
x_ticks = list(range(1, plot_size+1, 2))
x_labels = x_ticks[:]
plt.xticks(x_ticks, x_labels)
plt.legend(legend, loc="best")
if async_draw:
plt.draw()
else:
plt.show()
def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
plt.title("Average characters recalled at distance")
plt.ylabel("Average characters recalled")
plt.xlabel("Distance")
x_values = range(1, plot_size+1)
legend = []
saved_charsRecalled_total = None
saved_dataPoint_count = None
for strategy in data["Strategies"]:
strategy_title = strategy["Title"]
# strategy_description = strategy["Description"]
dataPoint_count = 0
matches = [0] * plot_size
matches_total = 0
charsRecalled = [0] * plot_size
charsRecalled_total = 0
for match in strategy["Matches"]:
dataPoint_count += 1
if not match["Match"]:
continue
chars = match["CharsRecalled"]
charsRecalled_total += chars
matches_total += 1
dist = match["Distance"]
if dist > plot_size:
continue
matches[dist-1] += 1
charsRecalled[dist-1] += chars
# recent is very simple strategy so we will believe
# that there is no bug in it and we can use it to determine total
if strategy_title == "recent":
saved_charsRecalled_total = charsRecalled_total
saved_dataPoint_count = dataPoint_count
if len(selected_strategies) and strategy_title not in selected_strategies:
continue
acc = 0
charsRecalled_cumulative = []
for x in charsRecalled:
acc += x
charsRecalled_cumulative.append(acc)
charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative))
plt.plot(x_values, charsRecalled_average, 'o-')
legend.append(strategy_title)
assert(saved_charsRecalled_total is not None)
assert(saved_dataPoint_count is not None)
max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values)
plt.plot(x_values, max_values, 'r-')
legend.append("maximum possible")
x_ticks = list(range(1, plot_size+1, 2))
x_labels = x_ticks[:]
plt.xticks(x_ticks, x_labels)
plt.legend(legend, loc="best")
if async_draw:
plt.draw()
else:
plt.show()
# graph_cmdSequences(node_count=33, edge_minValue=0.05)
graph_cmdSequences(node_count=28, edge_minValue=0.06)
plot_cmdLineFrq_rank()
plot_cmdFrq_rank()
plot_cmdLineVocabularySize_cmdLinesEntered()
plot_cmdVocabularySize_cmdLinesEntered()
plot_strategies_matches(20)
plot_strategies_charsRecalled(20)
if async_draw:
plt.show()
# be careful and check if labels fit the display

@ -0,0 +1,340 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"os/user"
"path/filepath"
"sort"
"github.com/curusarn/resh/common"
)
// Version from git set during build
var Version string
// Revision from git set during build
var Revision string
func main() {
usr, _ := user.Current()
dir := usr.HomeDir
historyPath := filepath.Join(dir, ".resh_history.json")
historyPathBatchMode := filepath.Join(dir, "resh_history.json")
sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json")
// tmpPath := "/tmp/resh-evaluate-tmp.json"
showVersion := flag.Bool("version", false, "Show version and exit")
showRevision := flag.Bool("revision", false, "Show git revision and exit")
input := flag.String("input", "",
"Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+
" depending on --sanitized-input option)")
// outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory")
sanitizedInput := flag.Bool("sanitized-input", false,
"Handle input as sanitized (also changes default value for input argument)")
plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting")
inputDataRoot := flag.String("input-data-root", "",
"Input data root, enables batch mode, looks for files matching --input option")
flag.Parse()
// handle show{Version,Revision} options
if *showVersion == true {
fmt.Println(Version)
os.Exit(0)
}
if *showRevision == true {
fmt.Println(Revision)
os.Exit(0)
}
// handle batch mode
batchMode := false
if *inputDataRoot != "" {
batchMode = true
}
// set default input
if *input == "" {
if *sanitizedInput {
*input = sanitizedHistoryPath
} else if batchMode {
*input = historyPathBatchMode
} else {
*input = historyPath
}
}
evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode}
if batchMode {
err := evaluator.initBatchMode(*input, *inputDataRoot)
if err != nil {
log.Fatal("Evaluator initBatchMode() error:", err)
}
} else {
err := evaluator.init(*input)
if err != nil {
log.Fatal("Evaluator init() error:", err)
}
}
var strategies []strategy
// dummy := strategyDummy{}
// strategies = append(strategies, &dummy)
recent := strategyRecent{}
frequent := strategyFrequent{}
frequent.init()
directory := strategyDirectorySensitive{}
directory.init()
strategies = append(strategies, &recent, &frequent, &directory)
for _, strat := range strategies {
err := evaluator.evaluate(strat)
if err != nil {
log.Println("Evaluator evaluate() error:", err)
}
}
evaluator.calculateStatsAndPlot(*plottingScript)
}
type strategy interface {
GetTitleAndDescription() (string, string)
GetCandidates() []string
AddHistoryRecord(record *common.Record) error
ResetHistory() error
}
type matchJSON struct {
Match bool
Distance int
CharsRecalled int
}
type strategyJSON struct {
Title string
Description string
Matches []matchJSON
}
type deviceRecords struct {
Name string
Records []common.Record
}
type userRecords struct {
Name string
Devices []deviceRecords
}
type evaluator struct {
sanitizedInput bool
BatchMode bool
maxCandidates int
UsersRecords []userRecords
Strategies []strategyJSON
}
func (e *evaluator) initBatchMode(input string, inputDataRoot string) error {
e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot)
e.processRecords()
return nil
}
func (e *evaluator) init(inputPath string) error {
records := e.loadHistoryRecords(inputPath)
device := deviceRecords{Records: records}
user := userRecords{}
user.Devices = append(user.Devices, device)
e.UsersRecords = append(e.UsersRecords, user)
e.processRecords()
return nil
}
func (e *evaluator) calculateStatsAndPlot(scriptName string) {
evalJSON, err := json.Marshal(e)
if err != nil {
log.Fatal("json marshal error", err)
}
buffer := bytes.Buffer{}
buffer.Write(evalJSON)
// run python script to stat and plot/
cmd := exec.Command(scriptName)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = &buffer
err = cmd.Run()
if err != nil {
log.Printf("Command finished with error: %v", err)
}
}
// enrich records and add them to serializable structure
func (e *evaluator) processRecords() {
for i := range e.UsersRecords {
for j, device := range e.UsersRecords[i].Devices {
sessionIDs := map[string]uint64{}
var nextID uint64
nextID = 0
for k, record := range e.UsersRecords[i].Devices[j].Records {
id, found := sessionIDs[record.SessionId]
if found == false {
id = nextID
sessionIDs[record.SessionId] = id
nextID++
}
record.SeqSessionID = id
// assert
if record.Sanitized != e.sanitizedInput {
if e.sanitizedInput {
log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
}
log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
}
e.UsersRecords[i].Devices[j].Records[k].Enrich()
// device.Records = append(device.Records, record)
}
sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal
}
return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID
})
}
}
}
func (e *evaluator) evaluate(strategy strategy) error {
title, description := strategy.GetTitleAndDescription()
strategyData := strategyJSON{Title: title, Description: description}
for _, record := range e.UsersRecords[0].Devices[0].Records {
candidates := strategy.GetCandidates()
matchFound := false
for i, candidate := range candidates {
// make an option (--calculate-total) to turn this on/off ?
// if i >= e.maxCandidates {
// break
// }
if candidate == record.CmdLine {
match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength}
strategyData.Matches = append(strategyData.Matches, match)
matchFound = true
break
}
}
if matchFound == false {
strategyData.Matches = append(strategyData.Matches, matchJSON{})
}
err := strategy.AddHistoryRecord(&record)
if err != nil {
log.Println("Error while evauating", err)
return err
}
}
e.Strategies = append(e.Strategies, strategyData)
return nil
}
func (e *evaluator) loadHistoryRecordsBatchMode(fname string, dataRootPath string) []userRecords {
var records []userRecords
info, err := os.Stat(dataRootPath)
if err != nil {
log.Fatal("Error: Directory", dataRootPath, "does not exist - exiting! (", err, ")")
}
if info.IsDir() == false {
log.Fatal("Error:", dataRootPath, "is not a directory - exiting!")
}
users, err := ioutil.ReadDir(dataRootPath)
if err != nil {
log.Fatal("Could not read directory:", dataRootPath)
}
fmt.Println("Listing users in <", dataRootPath, ">...")
for _, user := range users {
userRecords := userRecords{Name: user.Name()}
userFullPath := filepath.Join(dataRootPath, user.Name())
if user.IsDir() == false {
log.Println("Warn: Unexpected file (not a directory) <", userFullPath, "> - skipping.")
continue
}
fmt.Println()
fmt.Printf("*- %s\n", user.Name())
devices, err := ioutil.ReadDir(userFullPath)
if err != nil {
log.Fatal("Could not read directory:", userFullPath)
}
for _, device := range devices {
deviceRecords := deviceRecords{Name: device.Name()}
deviceFullPath := filepath.Join(userFullPath, device.Name())
if device.IsDir() == false {
log.Println("Warn: Unexpected file (not a directory) <", deviceFullPath, "> - skipping.")
continue
}
fmt.Printf(" \\- %s\n", device.Name())
files, err := ioutil.ReadDir(deviceFullPath)
if err != nil {
log.Fatal("Could not read directory:", deviceFullPath)
}
for _, file := range files {
fileFullPath := filepath.Join(deviceFullPath, file.Name())
if file.Name() == fname {
fmt.Printf(" \\- %s - loading ...", file.Name())
// load the data
deviceRecords.Records = e.loadHistoryRecords(fileFullPath)
fmt.Println(" OK ✓")
} else {
fmt.Printf(" \\- %s - skipped\n", file.Name())
}
}
userRecords.Devices = append(userRecords.Devices, deviceRecords)
}
records = append(records, userRecords)
}
return records
}
func (e *evaluator) loadHistoryRecords(fname string) []common.Record {
file, err := os.Open(fname)
if err != nil {
log.Fatal("Open() resh history file error:", err)
}
defer file.Close()
var records []common.Record
scanner := bufio.NewScanner(file)
for scanner.Scan() {
record := common.Record{}
fallbackRecord := common.FallbackRecord{}
line := scanner.Text()
err = json.Unmarshal([]byte(line), &record)
if err != nil {
err = json.Unmarshal([]byte(line), &fallbackRecord)
if err != nil {
log.Println("Line:", line)
log.Fatal("Decoding error:", err)
}
record = common.ConvertRecord(&fallbackRecord)
}
if e.sanitizedInput == false {
if record.CmdLength != 0 {
log.Fatal("Assert failed - 'cmdLength' is set in raw data. Maybe you want to use '--sanitized-input' option?")
}
record.CmdLength = len(record.CmdLine)
}
if record.CmdLength == 0 {
log.Fatal("Assert failed - 'cmdLength' is unset in the data. This should not happen.")
}
records = append(records, record)
}
return records
}

@ -0,0 +1,42 @@
package main
import (
"github.com/curusarn/resh/common"
)
type strategyDirectorySensitive struct {
history map[string][]string
lastPwd string
}
func (s *strategyDirectorySensitive) init() {
s.history = map[string][]string{}
}
func (s *strategyDirectorySensitive) GetTitleAndDescription() (string, string) {
return "directory sensitive (recent)", "Use recent commands executed is the same directory"
}
func (s *strategyDirectorySensitive) GetCandidates() []string {
return s.history[s.lastPwd]
}
func (s *strategyDirectorySensitive) AddHistoryRecord(record *common.Record) error {
// work on history for PWD
pwd := record.Pwd
// remove previous occurance of record
for i, cmd := range s.history[pwd] {
if cmd == record.CmdLine {
s.history[pwd] = append(s.history[pwd][:i], s.history[pwd][i+1:]...)
}
}
// append new record
s.history[pwd] = append([]string{record.CmdLine}, s.history[pwd]...)
s.lastPwd = record.PwdAfter
return nil
}
func (s *strategyDirectorySensitive) ResetHistory() error {
s.history = map[string][]string{}
return nil
}

@ -0,0 +1,24 @@
package main
import "github.com/curusarn/resh/common"
type strategyDummy struct {
history []string
}
func (s *strategyDummy) GetTitleAndDescription() (string, string) {
return "dummy", "Return empty candidate list"
}
func (s *strategyDummy) GetCandidates() []string {
return nil
}
func (s *strategyDummy) AddHistoryRecord(record *common.Record) error {
s.history = append(s.history, record.CmdLine)
return nil
}
func (s *strategyDummy) ResetHistory() error {
return nil
}

@ -0,0 +1,47 @@
package main
import (
"sort"
"github.com/curusarn/resh/common"
)
type strategyFrequent struct {
history map[string]int
}
type strFrqEntry struct {
cmdLine string
count int
}
func (s *strategyFrequent) init() {
s.history = map[string]int{}
}
func (s *strategyFrequent) GetTitleAndDescription() (string, string) {
return "frequent", "Use frequent commands"
}
func (s *strategyFrequent) GetCandidates() []string {
var mapItems []strFrqEntry
for cmdLine, count := range s.history {
mapItems = append(mapItems, strFrqEntry{cmdLine, count})
}
sort.Slice(mapItems, func(i int, j int) bool { return mapItems[i].count > mapItems[j].count })
var hist []string
for _, item := range mapItems {
hist = append(hist, item.cmdLine)
}
return hist
}
func (s *strategyFrequent) AddHistoryRecord(record *common.Record) error {
s.history[record.CmdLine]++
return nil
}
func (s *strategyFrequent) ResetHistory() error {
s.history = map[string]int{}
return nil
}

@ -0,0 +1,32 @@
package main
import "github.com/curusarn/resh/common"
type strategyRecent struct {
history []string
}
func (s *strategyRecent) GetTitleAndDescription() (string, string) {
return "recent", "Use recent commands"
}
func (s *strategyRecent) GetCandidates() []string {
return s.history
}
func (s *strategyRecent) AddHistoryRecord(record *common.Record) error {
// remove previous occurance of record
for i, cmd := range s.history {
if cmd == record.CmdLine {
s.history = append(s.history[:i], s.history[i+1:]...)
}
}
// append new record
s.history = append([]string{record.CmdLine}, s.history...)
return nil
}
func (s *strategyRecent) ResetHistory() error {
s.history = nil
return nil
}

@ -2,4 +2,11 @@ module github.com/curusarn/resh
go 1.12
require github.com/BurntSushi/toml v0.3.1
require (
github.com/BurntSushi/toml v0.3.1
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
github.com/mattn/go-shellwords v1.0.6
github.com/wcharczuk/go-chart v2.0.1+incompatible
github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa
golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 // indirect
)

@ -1,2 +1,13 @@
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI=
github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
github.com/wcharczuk/go-chart v2.0.1+incompatible h1:0pz39ZAycJFF7ju/1mepnk26RLVLBCWz1STcD3doU0A=
github.com/wcharczuk/go-chart v2.0.1+incompatible/go.mod h1:PF5tmL4EIx/7Wf+hEkpCqYi5He4u90sw+0+6FhrryuE=
github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk=
github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4=
golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 h1:4dQcAORh9oYBwVSBVIkP489LUPC+f1HBkTYXgmqfR+o=
golang.org/x/image v0.0.0-20190902063713-cb417be4ba39/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

@ -0,0 +1,424 @@
package main
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"encoding/json"
"errors"
"flag"
"fmt"
"log"
"net/url"
"os"
"os/user"
"path"
"path/filepath"
"strconv"
"strings"
"unicode"
"github.com/curusarn/resh/common"
giturls "github.com/whilp/git-urls"
)
// Version from git set during build
var Version string
// Revision from git set during build
var Revision string
func main() {
usr, _ := user.Current()
dir := usr.HomeDir
historyPath := filepath.Join(dir, ".resh_history.json")
// outputPath := filepath.Join(dir, "resh_history_sanitized.json")
sanitizerDataPath := filepath.Join(dir, ".resh", "sanitizer_data")
showVersion := flag.Bool("version", false, "Show version and exit")
showRevision := flag.Bool("revision", false, "Show git revision and exit")
trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters, '0' turns off trimming")
inputPath := flag.String("input", historyPath, "Input file")
outputPath := flag.String("output", "", "Output file (default: use stdout)")
flag.Parse()
if *showVersion == true {
fmt.Println(Version)
os.Exit(0)
}
if *showRevision == true {
fmt.Println(Revision)
os.Exit(0)
}
sanitizer := sanitizer{hashLength: *trimHashes}
err := sanitizer.init(sanitizerDataPath)
if err != nil {
log.Fatal("Sanitizer init() error:", err)
}
inputFile, err := os.Open(*inputPath)
if err != nil {
log.Fatal("Open() resh history file error:", err)
}
defer inputFile.Close()
var writer *bufio.Writer
if *outputPath == "" {
writer = bufio.NewWriter(os.Stdout)
} else {
outputFile, err := os.Create(*outputPath)
if err != nil {
log.Fatal("Create() output file error:", err)
}
defer outputFile.Close()
writer = bufio.NewWriter(outputFile)
}
defer writer.Flush()
scanner := bufio.NewScanner(inputFile)
for scanner.Scan() {
record := common.Record{}
fallbackRecord := common.FallbackRecord{}
line := scanner.Text()
err = json.Unmarshal([]byte(line), &record)
if err != nil {
err = json.Unmarshal([]byte(line), &fallbackRecord)
if err != nil {
log.Println("Line:", line)
log.Fatal("Decoding error:", err)
}
record = common.ConvertRecord(&fallbackRecord)
}
err = sanitizer.sanitizeRecord(&record)
if err != nil {
log.Println("Line:", line)
log.Fatal("Sanitization error:", err)
}
outLine, err := json.Marshal(&record)
if err != nil {
log.Println("Line:", line)
log.Fatal("Encoding error:", err)
}
// fmt.Println(string(outLine))
n, err := writer.WriteString(string(outLine) + "\n")
if err != nil {
log.Fatal(err)
}
if n == 0 {
log.Fatal("Nothing was written", n)
}
}
}
type sanitizer struct {
hashLength int
whitelist map[string]bool
}
func (s *sanitizer) init(dataPath string) error {
globalData := path.Join(dataPath, "whitelist.txt")
s.whitelist = loadData(globalData)
return nil
}
func loadData(fname string) map[string]bool {
file, err := os.Open(fname)
if err != nil {
log.Fatal("Open() file error:", err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
data := make(map[string]bool)
for scanner.Scan() {
line := scanner.Text()
data[line] = true
}
return data
}
func (s *sanitizer) sanitizeRecord(record *common.Record) error {
// hash directories of the paths
record.Pwd = s.sanitizePath(record.Pwd)
record.RealPwd = s.sanitizePath(record.RealPwd)
record.PwdAfter = s.sanitizePath(record.PwdAfter)
record.RealPwdAfter = s.sanitizePath(record.RealPwdAfter)
record.GitDir = s.sanitizePath(record.GitDir)
record.GitRealDir = s.sanitizePath(record.GitRealDir)
record.Home = s.sanitizePath(record.Home)
record.ShellEnv = s.sanitizePath(record.ShellEnv)
// hash the most sensitive info, do not tokenize
record.Host = s.hashToken(record.Host)
record.Login = s.hashToken(record.Login)
record.MachineId = s.hashToken(record.MachineId)
var err error
// this changes git url a bit but I'm still happy with the result
// e.g. "git@github.com:curusarn/resh" becomes "ssh://git@github.com/3385162f14d7/5a7b2909005c"
// notice the "ssh://" prefix
record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote)
if err != nil {
log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err)
return err
}
// sanitization destroys original CmdLine length -> save it
record.CmdLength = len(record.CmdLine)
record.CmdLine, err = s.sanitizeCmdLine(record.CmdLine)
if err != nil {
log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err)
}
// add a flag to signify that the record has been sanitized
record.Sanitized = true
return nil
}
func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) {
const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`:@^/+%." // all bash control characters, '=', ...
const optionAllowedChars = "-_" // characters commonly found inside of options
sanCmdLine := ""
buff := ""
// simple options shouldn't be sanitized
// 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or any of "=;)"
var optionDetected bool
prevR3 := ' '
prevR2 := ' '
prevR := ' '
for _, r := range cmdLine {
switch optionDetected {
case true:
if unicode.IsSpace(r) || strings.ContainsRune(optionEndingChars, r) {
// whitespace or option ends the option
// => add option unsanitized
optionDetected = false
if len(buff) > 0 {
sanCmdLine += buff
buff = ""
}
sanCmdLine += string(r)
} else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false &&
strings.ContainsRune(optionAllowedChars, r) == false {
// r is not any of allowed chars for an option: letter, digit, "-" or "_"
// => sanitize
if len(buff) > 0 {
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
// return cmdLine, err
}
sanCmdLine += sanToken
buff = ""
}
sanCmdLine += string(r)
} else {
buff += string(r)
}
case false:
// split command on all non-letter and non-digit characters
if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false {
// split token
if len(buff) > 0 {
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
// return cmdLine, err
}
sanCmdLine += sanToken
buff = ""
}
sanCmdLine += string(r)
} else {
if (unicode.IsSpace(prevR2) && prevR == '-') ||
(unicode.IsSpace(prevR3) && prevR2 == '-' && prevR == '-') {
optionDetected = true
}
buff += string(r)
}
}
prevR3 = prevR2
prevR2 = prevR
prevR = r
}
if len(buff) <= 0 {
// nothing in the buffer => work is done
return sanCmdLine, nil
}
if optionDetected {
// option detected => dont sanitize
sanCmdLine += buff
return sanCmdLine, nil
}
// sanitize
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
// return cmdLine, err
}
sanCmdLine += sanToken
return sanCmdLine, nil
}
func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) {
if len(rawURL) <= 0 {
return rawURL, nil
}
parsedURL, err := giturls.Parse(rawURL)
if err != nil {
return rawURL, err
}
return s.sanitizeParsedURL(parsedURL)
}
func (s *sanitizer) sanitizeURL(rawURL string) (string, error) {
if len(rawURL) <= 0 {
return rawURL, nil
}
parsedURL, err := url.Parse(rawURL)
if err != nil {
return rawURL, err
}
return s.sanitizeParsedURL(parsedURL)
}
func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) {
parsedURL.Opaque = s.sanitizeToken(parsedURL.Opaque)
userinfo := parsedURL.User.Username() // only get username => password won't even make it to the sanitized data
if len(userinfo) > 0 {
parsedURL.User = url.User(s.sanitizeToken(userinfo))
} else {
// we need to do this because `gitUrls.Parse()` sets `User` to `url.User("")` instead of `nil`
parsedURL.User = nil
}
var err error
parsedURL.Host, err = s.sanitizeTwoPartToken(parsedURL.Host, ":")
if err != nil {
return parsedURL.String(), err
}
parsedURL.Path = s.sanitizePath(parsedURL.Path)
// ForceQuery bool
parsedURL.RawQuery = s.sanitizeToken(parsedURL.RawQuery)
parsedURL.Fragment = s.sanitizeToken(parsedURL.Fragment)
return parsedURL.String(), nil
}
func (s *sanitizer) sanitizePath(path string) string {
var sanPath string
for _, token := range strings.Split(path, "/") {
if s.whitelist[token] != true {
token = s.hashToken(token)
}
sanPath += token + "/"
}
if len(sanPath) > 0 {
sanPath = sanPath[:len(sanPath)-1]
}
return sanPath
}
func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string, error) {
tokenParts := strings.Split(token, delimeter)
if len(tokenParts) <= 1 {
return s.sanitizeToken(token), nil
}
if len(tokenParts) == 2 {
return s.sanitizeToken(tokenParts[0]) + delimeter + s.sanitizeToken(tokenParts[1]), nil
}
return token, errors.New("Token has more than two parts")
}
func (s *sanitizer) sanitizeCmdToken(token string) (string, error) {
// there shouldn't be tokens with letters or digits mixed together with symbols
if len(token) <= 1 {
// NOTE: do not sanitize single letter tokens
return token, nil
}
if s.isInWhitelist(token) == true {
return token, nil
}
isLettersOrDigits := true
// isDigits := true
isOtherCharacters := true
for _, r := range token {
if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false {
isLettersOrDigits = false
// isDigits = false
}
// if unicode.IsDigit(r) == false {
// isDigits = false
// }
if unicode.IsDigit(r) || unicode.IsLetter(r) {
isOtherCharacters = false
}
}
// NOTE: I decided that I don't want a special sanitization for numbers
// if isDigits {
// return s.hashNumericToken(token), nil
// }
if isLettersOrDigits {
return s.hashToken(token), nil
}
if isOtherCharacters {
return token, nil
}
log.Println("WARN: cmd token is made of mix of letters or digits and other characters; token:", token)
// return token, errors.New("cmd token is made of mix of letters or digits and other characters")
return s.hashToken(token), errors.New("cmd token is made of mix of letters or digits and other characters")
}
func (s *sanitizer) sanitizeToken(token string) string {
if len(token) <= 1 {
// NOTE: do not sanitize single letter tokens
return token
}
if s.isInWhitelist(token) {
return token
}
return s.hashToken(token)
}
func (s *sanitizer) hashToken(token string) string {
if len(token) <= 0 {
return token
}
// hash with sha1
h := sha1.New()
h.Write([]byte(token))
sum := h.Sum(nil)
return s.trimHash(hex.EncodeToString(sum))
}
func (s *sanitizer) hashNumericToken(token string) string {
if len(token) <= 0 {
return token
}
h := sha1.New()
h.Write([]byte(token))
sum := h.Sum(nil)
sumInt := int(binary.LittleEndian.Uint64(sum))
if sumInt < 0 {
return strconv.Itoa(sumInt * -1)
}
return s.trimHash(strconv.Itoa(sumInt))
}
func (s *sanitizer) trimHash(hash string) string {
length := s.hashLength
if length <= 0 || len(hash) < length {
length = len(hash)
}
return hash[:length]
}
func (s *sanitizer) isInWhitelist(token string) bool {
return s.whitelist[strings.ToLower(token)] == true
}

@ -0,0 +1,7 @@
# copyright information
Whitelist contains content from variety of sources.
Part of the whitelist (`./whitelist.txt`) is made of copyrighted content from [FileInfo.com](https://fileinfo.com/filetypes/common).
This content was used with permission from FileInfo.com.

File diff suppressed because it is too large Load Diff

@ -153,10 +153,19 @@ __resh_precmd() {
__RESH_PWD_AFTER="$PWD"
if [ -n "${__RESH_COLLECT}" ]; then
if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then
echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})"
source ~/.resh/shellrc
if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then
echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})"
else
echo "RESH INFO: New RESH shellrc script was loaded - if you encounter any issues please restart this terminal session."
fi
elif [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then
echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -revision); resh version of this terminal session: ${__RESH_REVISION})"
else
source ~/.resh/shellrc
if [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then
echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh revision: $(resh-collect -revision); resh revision of this terminal session: ${__RESH_REVISION})"
fi
fi
if [ "$__RESH_VERSION" == $(resh-collect -version) ] && [ "$__RESH_REVISION" == $(resh-collect -revision) ]; then
resh-collect -requireVersion "$__RESH_VERSION" \
-requireRevision "$__RESH_REVISION" \
-cmdLine "$__RESH_CMDLINE" \

@ -1 +1 @@
1.1.1
1.1.3

Loading…
Cancel
Save