From 4b7f063d0b11f6213144e63c583987d170cfdcf5 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sat, 10 Aug 2019 12:29:41 +0200 Subject: [PATCH 01/41] cosmetic changes --- collect/resh-collect.go | 3 +++ daemon/resh-daemon.go | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/collect/resh-collect.go b/collect/resh-collect.go index ae52923..b96cc72 100644 --- a/collect/resh-collect.go +++ b/collect/resh-collect.go @@ -20,7 +20,10 @@ import ( "strings" ) +// Version from git set during build var Version string + +// Revision from git set during build var Revision string func main() { diff --git a/daemon/resh-daemon.go b/daemon/resh-daemon.go index fb1b31a..c8bcb0e 100644 --- a/daemon/resh-daemon.go +++ b/daemon/resh-daemon.go @@ -3,8 +3,6 @@ package main import ( "encoding/json" //"flag" - "github.com/BurntSushi/toml" - common "github.com/curusarn/resh/common" "io/ioutil" "log" "net/http" @@ -14,9 +12,15 @@ import ( "path/filepath" "strconv" "strings" + + "github.com/BurntSushi/toml" + common "github.com/curusarn/resh/common" ) +// Version from git set during build var Version string + +// Revision from git set during build var Revision string func main() { From f08d35eab5168fc60dcae376d9520c8968ac5a3c Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sat, 10 Aug 2019 23:03:21 +0200 Subject: [PATCH 02/41] add draft of sanitizer, sanitizer data --- Makefile | 5 +- go.mod | 6 +- go.sum | 4 + sanitize-history/resh-sanitize-history.go | 262 ++++++++++++++++++++++ sanitizer_data/path_whitelist.txt | 23 ++ sanitizer_data/whitelist.txt | 28 +++ 6 files changed, 326 insertions(+), 2 deletions(-) create mode 100644 sanitize-history/resh-sanitize-history.go create mode 100644 sanitizer_data/path_whitelist.txt create mode 100644 sanitizer_data/whitelist.txt diff --git a/Makefile b/Makefile index e63c1539..b59032d 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ autoinstall: ./install_helper.sh -build: submodules resh-collect resh-daemon +build: submodules resh-collect resh-daemon resh-sanitize-history rebuild: make clean @@ -23,6 +23,7 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu cp -f shellrc.sh ~/.resh/shellrc cp -f uuid.sh ~/.resh/bin/resh-uuid cp -f resh-* ~/.resh/bin/ + cp -fr sanitizer_data ~/.resh/ # backward compatibility: We have a new location for resh history file [ ! -f ~/.resh/history.json ] || mv ~/.resh/history.json ~/.resh_history.json # Adding resh shellrc to .bashrc ... @@ -69,6 +70,8 @@ resh-daemon: daemon/resh-daemon.go common/resh-common.go version resh-collect: collect/resh-collect.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< +resh-sanitize-history: collect/resh-sanitize-history.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/go.mod b/go.mod index 86da97e..9ff85f1 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,8 @@ module github.com/curusarn/resh go 1.12 -require github.com/BurntSushi/toml v0.3.1 +require ( + github.com/BurntSushi/toml v0.3.1 + github.com/mattn/go-shellwords v1.0.5 + github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa +) diff --git a/go.sum b/go.sum index 9cb2df8..629918a 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,6 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/mattn/go-shellwords v1.0.5 h1:JhhFTIOslh5ZsPrpa3Wdg8bF0WI3b44EMblmU9wIsXc= +github.com/mattn/go-shellwords v1.0.5/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= +github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk= +github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4= diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go new file mode 100644 index 0000000..cd5b8bb --- /dev/null +++ b/sanitize-history/resh-sanitize-history.go @@ -0,0 +1,262 @@ +package main + +import ( + "bufio" + "crypto/sha1" + "encoding/hex" + "encoding/json" + "errors" + "flag" + "fmt" + "log" + "net/url" + "os" + "os/user" + "path" + "path/filepath" + "strings" + + "github.com/curusarn/resh/common" + "github.com/mattn/go-shellwords" + giturls "github.com/whilp/git-urls" +) + +// Version from git set during build +var Version string + +// Revision from git set during build +var Revision string + +func main() { + usr, _ := user.Current() + dir := usr.HomeDir + historyPath := filepath.Join(dir, ".resh_history.json") + // outputPath := filepath.Join(dir, "resh_history_sanitized.json") + sanitizerDataPath := filepath.Join(dir, ".resh", "sanitizer_data") + + showVersion := flag.Bool("version", false, "Show version and exit") + showRevision := flag.Bool("revision", false, "Show git revision and exit") + // outputToStdout := flag.Bool("stdout", false, "Print output to stdout instead of file") + + flag.Parse() + + if *showVersion == true { + fmt.Println(Version) + os.Exit(0) + } + if *showRevision == true { + fmt.Println(Revision) + os.Exit(0) + } + sanitizer := sanitizer{} + err := sanitizer.init(sanitizerDataPath) + if err != nil { + log.Fatal("Sanitizer init() error:", err) + } + + file, err := os.Open(historyPath) + if err != nil { + log.Fatal("Open() resh history file error:", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + record := common.Record{} + line := scanner.Text() + err = json.Unmarshal([]byte(line), &record) + if err != nil { + log.Println("Decoding error:", err) + log.Println("Line:", line) + return + } + err = sanitizer.sanitize(&record) + if err != nil { + log.Println("Sanitization error:", err) + log.Println("Line:", line) + return + } + outLine, err := json.Marshal(&record) + if err != nil { + log.Println("Encoding error:", err) + log.Println("Line:", line) + return + } + fmt.Println(string(outLine)) + } +} + +type sanitizer struct { + GlobalWhitelist map[string]bool + PathWhitelist map[string]bool + // CmdWhitelist []string +} + +func (s *sanitizer) init(dataPath string) error { + globalData := path.Join(dataPath, "whitelist.txt") + s.GlobalWhitelist = loadData(globalData) + pathData := path.Join(dataPath, "path_whitelist.txt") + s.PathWhitelist = loadData(pathData) + return nil +} + +func loadData(fname string) map[string]bool { + file, err := os.Open(fname) + if err != nil { + log.Fatal("Open() file error:", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + data := make(map[string]bool) + for scanner.Scan() { + line := scanner.Text() + data[line] = true + } + return data +} + +func (s *sanitizer) sanitize(record *common.Record) error { + record.Pwd = s.sanitizePath(record.Pwd) + record.RealPwd = s.sanitizePath(record.RealPwd) + record.PwdAfter = s.sanitizePath(record.PwdAfter) + record.RealPwdAfter = s.sanitizePath(record.RealPwdAfter) + record.GitDir = s.sanitizePath(record.GitDir) + record.GitRealDir = s.sanitizePath(record.GitRealDir) + record.Home = s.sanitizePath(record.Home) + record.ShellEnv = s.sanitizePath(record.ShellEnv) + + record.Host = s.sanitizeTokenDontUseWhitelist(record.Host) + record.Uname = s.sanitizeTokenDontUseWhitelist(record.Uname) + record.Login = s.sanitizeTokenDontUseWhitelist(record.Login) + record.MachineId = s.sanitizeTokenDontUseWhitelist(record.MachineId) + + var err error + record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote) + if err != nil { + log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err) + return err + } + + fmt.Println("....") + parser := shellwords.NewParser() + + args, err := parser.Parse(record.CmdLine) + if err != nil { + log.Println("Parsing error @ position", parser.Position, ":", err) + log.Println("CmdLine:", record.CmdLine) + return err + } + fmt.Println(args) + + return nil + + // var tokens []string + // word := "" + // for _, char := range strings.Split(, "") { + // if unicode.IsSpace([]rune(char)[0]) { + // if len(word) > 0 { + // tokens = append(tokens, word) + // word = "" + // } + // tokens = append(tokens, char) + // } else { + // word += char + // } + // } + // if len(word) > 0 { + // tokens = append(tokens, word) + // } + // for _, token := range tokens { + // fmt.Println(token) + // } + // return nil +} + +func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { + parsedURL, err := giturls.Parse(rawURL) + if err != nil { + return rawURL, err + } + return s.sanitizeParsedURL(parsedURL) +} + +func (s *sanitizer) sanitizeURL(rawURL string) (string, error) { + parsedURL, err := url.Parse(rawURL) + if err != nil { + return rawURL, err + } + return s.sanitizeParsedURL(parsedURL) +} + +func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) { + // Scheme string + parsedURL.Opaque = s.sanitizeToken(parsedURL.Opaque) + + userinfo := parsedURL.User.Username() // only get username => password won't even make it to the sanitized data + if len(userinfo) > 0 { + parsedURL.User = url.User(s.sanitizeToken(userinfo)) + } else { + // we need to do this because `gitUrls.Parse()` sets `User` to `url.User("")` instead of `nil` + parsedURL.User = nil + } + var err error + parsedURL.Host, err = s.sanitizeTwoPartToken(parsedURL.Host, ":") + if err != nil { + return parsedURL.String(), err + } + parsedURL.Path = s.sanitizePath(parsedURL.Path) + // ForceQuery bool + parsedURL.RawQuery = s.sanitizeToken(parsedURL.RawQuery) + parsedURL.Fragment = s.sanitizeToken(parsedURL.Fragment) + + return parsedURL.String(), nil +} + +func (s *sanitizer) sanitizePath(path string) string { + var sanPath string + for _, token := range strings.Split(path, "/") { + if s.PathWhitelist[token] != true { + token = s.sanitizeToken(token) + } + sanPath += token + "/" + } + if len(sanPath) > 0 { + sanPath = sanPath[:len(sanPath)-1] + } + return sanPath +} + +func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string, error) { + tokenParts := strings.Split(token, delimeter) + if len(tokenParts) <= 1 { + return s.sanitizeToken(token), nil + } + if len(tokenParts) == 2 { + return s.sanitizeToken(tokenParts[0]) + delimeter + s.sanitizeToken(tokenParts[1]), nil + } + return token, errors.New("Token has more than two parts") +} + +func (s *sanitizer) sanitizeToken(token string) string { + return s._sanitizeToken(token, true) +} + +func (s *sanitizer) sanitizeTokenDontUseWhitelist(token string) string { + return s._sanitizeToken(token, false) +} + +func (s *sanitizer) _sanitizeToken(token string, useWhitelist bool) string { + if len(token) <= 0 { + return token + } + if useWhitelist == true && s.GlobalWhitelist[token] == true { + return token + } + // hash with sha1 + // trim to 12 characters + h := sha1.New() + h.Write([]byte(token)) + sum := h.Sum(nil) + return hex.EncodeToString(sum)[:12] +} diff --git a/sanitizer_data/path_whitelist.txt b/sanitizer_data/path_whitelist.txt new file mode 100644 index 0000000..75c1ac1 --- /dev/null +++ b/sanitizer_data/path_whitelist.txt @@ -0,0 +1,23 @@ + +. +.. +bin +boot +dev +etc +home +lib +lib64 +lost+found +media +mnt +opt +proc +root +run +sbin +srv +sys +tmp +usr +var diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt new file mode 100644 index 0000000..fb1776c --- /dev/null +++ b/sanitizer_data/whitelist.txt @@ -0,0 +1,28 @@ + +. +.. +bin +boot +dev +etc +home +lib +lib64 +lost+found +media +mnt +opt +proc +root +run +sbin +srv +sys +tmp +usr +var +bash +zsh +fish +github.com +git \ No newline at end of file From b67b4eef13adb0f8a3e1af56eaf84acd38c13699 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 16:21:13 +0200 Subject: [PATCH 03/41] sanitizer works --- common/resh-common.go | 3 + sanitize-history/resh-sanitize-history.go | 222 +- sanitizer_data/path_whitelist.txt | 23 - sanitizer_data/whitelist.txt | 3001 ++++++++++++++++++++- 4 files changed, 3173 insertions(+), 76 deletions(-) delete mode 100644 sanitizer_data/path_whitelist.txt diff --git a/common/resh-common.go b/common/resh-common.go index aa2bb92..0de34ea 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -60,6 +60,9 @@ type Record struct { ReshUuid string `json:"reshUuid"` ReshVersion string `json:"reshVersion"` ReshRevision string `json:"reshRevision"` + + // added by sanitizatizer + CmdLength int `json:"cmdLength"` } type Config struct { diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index cd5b8bb..92d3cbe 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -3,6 +3,7 @@ package main import ( "bufio" "crypto/sha1" + "encoding/binary" "encoding/hex" "encoding/json" "errors" @@ -14,10 +15,11 @@ import ( "os/user" "path" "path/filepath" + "strconv" "strings" + "unicode" "github.com/curusarn/resh/common" - "github.com/mattn/go-shellwords" giturls "github.com/whilp/git-urls" ) @@ -48,7 +50,7 @@ func main() { fmt.Println(Revision) os.Exit(0) } - sanitizer := sanitizer{} + sanitizer := sanitizer{hashLength: 4} err := sanitizer.init(sanitizerDataPath) if err != nil { log.Fatal("Sanitizer init() error:", err) @@ -70,7 +72,7 @@ func main() { log.Println("Line:", line) return } - err = sanitizer.sanitize(&record) + err = sanitizer.sanitizeRecord(&record) if err != nil { log.Println("Sanitization error:", err) log.Println("Line:", line) @@ -87,16 +89,13 @@ func main() { } type sanitizer struct { - GlobalWhitelist map[string]bool - PathWhitelist map[string]bool - // CmdWhitelist []string + hashLength int + whitelist map[string]bool } func (s *sanitizer) init(dataPath string) error { globalData := path.Join(dataPath, "whitelist.txt") - s.GlobalWhitelist = loadData(globalData) - pathData := path.Join(dataPath, "path_whitelist.txt") - s.PathWhitelist = loadData(pathData) + s.whitelist = loadData(globalData) return nil } @@ -116,7 +115,7 @@ func loadData(fname string) map[string]bool { return data } -func (s *sanitizer) sanitize(record *common.Record) error { +func (s *sanitizer) sanitizeRecord(record *common.Record) error { record.Pwd = s.sanitizePath(record.Pwd) record.RealPwd = s.sanitizePath(record.RealPwd) record.PwdAfter = s.sanitizePath(record.PwdAfter) @@ -126,51 +125,109 @@ func (s *sanitizer) sanitize(record *common.Record) error { record.Home = s.sanitizePath(record.Home) record.ShellEnv = s.sanitizePath(record.ShellEnv) - record.Host = s.sanitizeTokenDontUseWhitelist(record.Host) - record.Uname = s.sanitizeTokenDontUseWhitelist(record.Uname) - record.Login = s.sanitizeTokenDontUseWhitelist(record.Login) - record.MachineId = s.sanitizeTokenDontUseWhitelist(record.MachineId) + record.Host = s.hashToken(record.Host) + record.Login = s.hashToken(record.Login) + record.MachineId = s.hashToken(record.MachineId) var err error + // this changes git url a bit but I'm still happy with the result + // e.g. "git@github.com:curusarn/resh" becomes "ssh://git@github.com/3385162f14d7/5a7b2909005c" + // notice the "ssh://" prefix record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote) if err != nil { log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err) return err } - fmt.Println("....") - parser := shellwords.NewParser() + // sanitization destroys original CmdLine length -> save it + record.CmdLength = len(record.CmdLine) - args, err := parser.Parse(record.CmdLine) + record.CmdLine, err = s.sanitizeCmdLine(record.CmdLine) if err != nil { - log.Println("Parsing error @ position", parser.Position, ":", err) - log.Println("CmdLine:", record.CmdLine) - return err + log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err) } - fmt.Println(args) - return nil +} - // var tokens []string - // word := "" - // for _, char := range strings.Split(, "") { - // if unicode.IsSpace([]rune(char)[0]) { - // if len(word) > 0 { - // tokens = append(tokens, word) - // word = "" - // } - // tokens = append(tokens, char) - // } else { - // word += char - // } - // } - // if len(word) > 0 { - // tokens = append(tokens, word) - // } - // for _, token := range tokens { - // fmt.Println(token) - // } - // return nil +func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { + sanCmdLine := "" + buff := "" + + // simple options shouldn't be sanitized + // 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or "=" + var optionDetected bool + + prevR3 := ' ' + prevR2 := ' ' + prevR := ' ' + for _, r := range cmdLine { + switch optionDetected { + case true: + if unicode.IsSpace(r) || r == '=' || r == ';' { + // whitespace, "=" or ";" ends the option + // => add option unsanitized + optionDetected = false + if len(buff) > 0 { + sanCmdLine += buff + buff = "" + } + sanCmdLine += string(r) + } else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false && r != '-' && r != '_' { + // r is not any of allowed chars for an option: letter, digit, "-" or "_" + // => sanitize + if len(buff) > 0 { + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + return cmdLine, err + } + sanCmdLine += sanToken + buff = "" + } + sanCmdLine += string(r) + } else { + buff += string(r) + } + case false: + // split command on all non-letter and non-digit characters + if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false { + // split token + if len(buff) > 0 { + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + return cmdLine, err + } + sanCmdLine += sanToken + buff = "" + } + sanCmdLine += string(r) + } else { + if (unicode.IsSpace(prevR2) && prevR == '-') || + (unicode.IsSpace(prevR3) && prevR2 == '-' && prevR == '-') { + optionDetected = true + } + buff += string(r) + } + } + prevR3 = prevR2 + prevR2 = prevR + prevR = r + } + if len(buff) <= 0 { + // nothing in the buffer => work is done + return sanCmdLine, nil + } + if optionDetected { + // option detected => dont sanitize + sanCmdLine += buff + return sanCmdLine, nil + } + // sanitize + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + return cmdLine, err + } + sanCmdLine += sanToken + return sanCmdLine, nil } func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { @@ -216,8 +273,8 @@ func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) { func (s *sanitizer) sanitizePath(path string) string { var sanPath string for _, token := range strings.Split(path, "/") { - if s.PathWhitelist[token] != true { - token = s.sanitizeToken(token) + if s.whitelist[token] != true { + token = s.hashToken(token) } sanPath += token + "/" } @@ -238,19 +295,55 @@ func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string return token, errors.New("Token has more than two parts") } -func (s *sanitizer) sanitizeToken(token string) string { - return s._sanitizeToken(token, true) -} +func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { + // there shouldn't be tokens with letters or digits mixed together with symbols + if len(token) <= 0 { + return token, nil + } + if s.whitelist[token] == true { + return token, nil + } -func (s *sanitizer) sanitizeTokenDontUseWhitelist(token string) string { - return s._sanitizeToken(token, false) + isLettersOrDigits := true + isDigits := true + isOtherCharacters := true + for _, r := range token { + if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false { + isLettersOrDigits = false + isDigits = false + } + if unicode.IsDigit(r) == false { + isDigits = false + } + if unicode.IsDigit(r) || unicode.IsLetter(r) { + isOtherCharacters = false + } + } + if isDigits { + return s.hashNumericToken(token), nil + } + if isLettersOrDigits { + return s.hashToken(token), nil + } + if isOtherCharacters { + return token, nil + } + log.Println("token:", token) + return token, errors.New("cmd token is made of mix of letters or digits and other characters") } -func (s *sanitizer) _sanitizeToken(token string, useWhitelist bool) string { +func (s *sanitizer) sanitizeToken(token string) string { if len(token) <= 0 { return token } - if useWhitelist == true && s.GlobalWhitelist[token] == true { + if s.whitelist[token] { + return token + } + return s.hashToken(token) +} + +func (s *sanitizer) hashToken(token string) string { + if len(token) <= 0 { return token } // hash with sha1 @@ -258,5 +351,30 @@ func (s *sanitizer) _sanitizeToken(token string, useWhitelist bool) string { h := sha1.New() h.Write([]byte(token)) sum := h.Sum(nil) - return hex.EncodeToString(sum)[:12] + // TODO: extend hashes to 12 + return s.trimHash(hex.EncodeToString(sum)) +} + +func (s *sanitizer) hashNumericToken(token string) string { + if len(token) <= 0 { + return token + } + // hash with fnv + // trim to 12 characters + h := sha1.New() + h.Write([]byte(token)) + sum := h.Sum(nil) + sumInt := int(binary.LittleEndian.Uint64(sum)) + if sumInt < 0 { + return strconv.Itoa(sumInt * -1) + } + return s.trimHash(strconv.Itoa(sumInt)) +} + +func (s *sanitizer) trimHash(hash string) string { + length := s.hashLength + if length <= 0 || len(hash) < length { + length = len(hash) + } + return hash[:length] } diff --git a/sanitizer_data/path_whitelist.txt b/sanitizer_data/path_whitelist.txt deleted file mode 100644 index 75c1ac1..0000000 --- a/sanitizer_data/path_whitelist.txt +++ /dev/null @@ -1,23 +0,0 @@ - -. -.. -bin -boot -dev -etc -home -lib -lib64 -lost+found -media -mnt -opt -proc -root -run -sbin -srv -sys -tmp -usr -var diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index fb1776c..debb8fd 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -25,4 +25,3003 @@ bash zsh fish github.com -git \ No newline at end of file +git +bash +bunzip2 +bzcat +bzcmp +bzdiff +bzegrep +bzexe +bzfgrep +bzgrep +bzip2 +bzip2recover +bzless +bzmore +cat +chgrp +chmod +chown +cp +dash +date +dd +df +dir +dmesg +dnsdomainname +domainname +echo +egrep +false +fgrep +findmnt +grep +gunzip +gzexe +gzip +hostname +kill +ln +login +ls +lsblk +mkdir +mknod +mktemp +more +mount +mountpoint +mv +nisdomainname +pidof +ps +pwd +rbash +readlink +rm +rmdir +run-parts +sed +sh +sh.distrib +sleep +stty +su +sync +tar +tempfile +touch +true +umount +uname +uncompress +vdir +wdctl +which +ypdomainname +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew +agetty +badblocks +blkdiscard +blkid +blockdev +cfdisk +chcpu +ctrlaltdel +debugfs +dumpe2fs +e2fsck +e2image +e2label +e2undo +fdisk +findfs +fsck +fsck.cramfs +fsck.ext2 +fsck.ext3 +fsck.ext4 +fsck.minix +fsfreeze +fstab-decode +fstrim +getty +hwclock +initctl +installkernel +isosize +killall5 +ldconfig +ldconfig.real +logsave +losetup +mke2fs +mkfs +mkfs.bfs +mkfs.cramfs +mkfs.ext2 +mkfs.ext3 +mkfs.ext4 +mkfs.minix +mkhomedir_helper +mkswap +pam_extrausers_chkpwd +pam_extrausers_update +pam_tally +pam_tally2 +pivot_root +raw +resize2fs +runuser +sfdisk +shadowconfig +start-stop-daemon +sulogin +swaplabel +swapoff +swapon +switch_root +sysctl +tune2fs +unix_chkpwd +unix_update +wipefs +zramctl +[ +addpart +apt +apt-cache +apt-cdrom +apt-config +apt-get +apt-key +apt-mark +arch +awk +b2sum +base32 +base64 +basename +bashbug +captoinfo +catchsegv +chage +chattr +chcon +chfn +chrt +chsh +cksum +clear +clear_console +cmp +comm +csplit +cut +deb-systemd-helper +deb-systemd-invoke +debconf +debconf-apt-progress +debconf-communicate +debconf-copydb +debconf-escape +debconf-set-selections +debconf-show +delpart +diff +diff3 +dircolors +dirname +dpkg +dpkg-deb +dpkg-divert +dpkg-maintscript-helper +dpkg-query +dpkg-split +dpkg-statoverride +dpkg-trigger +du +env +expand +expiry +expr +factor +faillog +fallocate +find +flock +fmt +fold +free +getconf +getent +getopt +gpasswd +gpgv +groups +head +hostid +i386 +iconv +id +infocmp +infotocap +install +ionice +ipcmk +ipcrm +ipcs +ischroot +join +last +lastb +lastlog +ldd +link +linux32 +linux64 +locale +locale-check +localedef +logger +logname +lsattr +lscpu +lsipc +lslocks +lslogins +lsmem +lsns +mawk +mcookie +md5sum +md5sum.textutils +mesg +mkfifo +namei +nawk +newgrp +nice +nl +nohup +nproc +nsenter +numfmt +od +pager +partx +passwd +paste +pathchk +perl +perl5.26.1 +pgrep +pinky +pkill +pldd +pmap +pr +printenv +printf +prlimit +ptx +pwdx +realpath +rename.ul +renice +reset +resizepart +rev +rgrep +runcon +savelog +script +scriptreplay +sdiff +select-editor +sensible-browser +sensible-editor +sensible-pager +seq +setarch +setsid +setterm +sg +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +shred +shuf +skill +slabtop +snice +sort +split +stat +stdbuf +sum +tabs +tac +tail +taskset +tee +test +tic +timeout +tload +toe +top +touch +tput +tr +truncate +tset +tsort +tty +tzselect +unexpand +uniq +unlink +unshare +update-alternatives +uptime +users +utmpdump +vmstat +w +w.procps +wall +watch +wc +whereis +which +who +whoami +x86_64 +xargs +yes +zdump +unminimize +add-shell +addgroup +adduser +chgpasswd +chmem +chpasswd +chroot +cpgr +cppw +delgroup +deluser +dpkg-preconfigure +dpkg-reconfigure +e2freefrag +e4crypt +e4defrag +fdformat +filefrag +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +iconvconfig +invoke-rc.d +ldattach +mklost+found +newusers +nologin +pam-auth-update +pam_getenv +pam_timestamp_check +policy-rc.d +pwck +pwconv +pwunconv +readprofile +remove-shell +rmt +rmt-tar +rtcwake +service +tarcat +update-passwd +update-rc.d +useradd +userdel +usermod +vigr +vipw +zic +docker +go +ssh +cz +com +http +https +localhost +jq +code +make +ping +ffmpeg +add-shell +addgroup +adduser +arpd +chgpasswd +chmem +chpasswd +chroot +cpgr +cppw +delgroup +deluser +dpkg-preconfigure +dpkg-reconfigure +e2freefrag +e4crypt +e4defrag +fdformat +filefrag +genl +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +iconvconfig +invoke-rc.d +ldattach +mklost+found +newusers +nologin +pam-auth-update +pam_getenv +pam_timestamp_check +policy-rc.d +pwck +pwconv +pwunconv +readprofile +remove-shell +rmt +rmt-tar +rtcwake +service +tarcat +tzconfig +update-passwd +update-rc.d +useradd +userdel +usermod +vigr +vipw +zic +[ +addpart +apt +apt-cache +apt-cdrom +apt-config +apt-get +apt-key +apt-mark +arch +awk +b2sum +base32 +base64 +basename +bashbug +captoinfo +catchsegv +chage +chattr +chcon +chfn +choom +chrt +chsh +cksum +clear +clear_console +cmp +comm +csplit +ctstat +cut +deb-systemd-helper +deb-systemd-invoke +debconf +debconf-apt-progress +debconf-communicate +debconf-copydb +debconf-escape +debconf-set-selections +debconf-show +delpart +diff +diff3 +dircolors +dirname +dpkg +dpkg-deb +dpkg-divert +dpkg-maintscript-helper +dpkg-query +dpkg-split +dpkg-statoverride +dpkg-trigger +du +env +expand +expiry +expr +factor +faillog +fallocate +fincore +find +flock +fmt +fold +getconf +getent +getopt +gpasswd +gpgv +groups +head +hostid +i386 +iconv +id +infocmp +infotocap +install +ionice +ipcmk +ipcrm +ipcs +ischroot +join +last +lastb +lastlog +ldd +link +linux32 +linux64 +lnstat +locale +localedef +logger +logname +lsattr +lscpu +lsipc +lslocks +lslogins +lsmem +lsns +mawk +mcookie +md5sum +md5sum.textutils +mesg +mkfifo +namei +nawk +newgrp +nice +nl +nohup +nproc +nsenter +nstat +numfmt +od +pager +partx +passwd +paste +pathchk +perl +perl5.28.1 +pinky +pldd +pr +printenv +printf +prlimit +ptx +rdma +realpath +rename.ul +renice +reset +resizepart +rev +rgrep +routef +routel +rtstat +runcon +savelog +script +scriptreplay +sdiff +seq +setarch +setpriv +setsid +setterm +sg +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +shred +shuf +sort +split +stat +stdbuf +sum +tabs +tac +tail +taskset +tee +test +tic +timeout +toe +touch +tput +tr +truncate +tset +tsort +tty +tzselect +unexpand +uniq +unlink +unshare +update-alternatives +users +utmpdump +wall +wc +whereis +which +who +whoami +x86_64 +xargs +yes +zdump +agetty +badblocks +blkdiscard +blkid +blkzone +blockdev +bridge +capsh +cfdisk +chcpu +ctrlaltdel +debugfs +devlink +dumpe2fs +e2fsck +e2image +e2label +e2mmpstatus +e2undo +fdisk +findfs +fsck +fsck.cramfs +fsck.ext2 +fsck.ext3 +fsck.ext4 +fsck.minix +fsfreeze +fstab-decode +fstrim +getcap +getpcaps +getty +hwclock +installkernel +ip +isosize +killall5 +ldconfig +logsave +losetup +mke2fs +mkfs +mkfs.bfs +mkfs.cramfs +mkfs.ext2 +mkfs.ext3 +mkfs.ext4 +mkfs.minix +mkhomedir_helper +mkswap +pam_tally +pam_tally2 +pivot_root +raw +resize2fs +rtacct +rtmon +runuser +setcap +sfdisk +shadowconfig +start-stop-daemon +sulogin +swaplabel +swapoff +swapon +switch_root +tc +tipc +tune2fs +unix_chkpwd +unix_update +wipefs +zramctl +bash +cat +chgrp +chmod +chown +cp +dash +date +dd +df +dir +dmesg +dnsdomainname +domainname +echo +egrep +false +fgrep +findmnt +grep +gunzip +gzexe +gzip +hostname +ip +ln +login +ls +lsblk +mkdir +mknod +mktemp +more +mount +mountpoint +mv +nisdomainname +pidof +ping +ping4 +ping6 +pwd +rbash +readlink +rm +rmdir +run-parts +sed +sh +sleep +ss +stty +su +sync +tar +tempfile +touch +true +umount +uname +uncompress +vdir +wdctl +which +ypdomainname +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew +addgnupghome +addpart +adduser +agetty +alternatives +applygnupgdefaults +blkdeactivate +blkdiscard +blkid +blkzone +blockdev +build-locale-archive +capsh +cfdisk +chcpu +chgpasswd +chpasswd +chroot +clock +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +create-cracklib-dict +ctrlaltdel +delpart +dmfilemapd +dmsetup +dmstats +faillock +fdformat +fdisk +findfs +fsck +fsck.cramfs +fsck.minix +fsfreeze +fstrim +g13-syshelp +getcap +getpcaps +glibc_post_upgrade.x86_64 +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +halt +hwclock +iconvconfig +init +ldattach +ldconfig +losetup +mkfs +mkfs.cramfs +mkfs.minix +mkhomedir_helper +mkswap +newusers +nologin +pam_console_apply +pam_timestamp_check +partx +pivot_root +poweroff +pwck +pwconv +pwhistory_helper +pwunconv +readprofile +reboot +resizepart +resolvconf +rfkill +rtcwake +runlevel +runuser +sasldblistusers2 +saslpasswd2 +setcap +sfdisk +shutdown +sulogin +swaplabel +swapoff +swapon +switch_root +telinit +unbound-anchor +unix_chkpwd +unix_update +update-alternatives +useradd +userdel +usermod +vigr +vipw +visudo +wipefs +zdump +zic +zramctl +[ +alias +applydeltarpm +arch +awk +b2sum +base32 +base64 +basename +basenc +bash +bashbug +bashbug-64 +bg +brotli +busctl +ca-legacy +cal +captoinfo +cat +catchsegv +cd +chacl +chage +chcon +chgrp +chmem +chmod +choom +chown +chrt +cksum +clear +cmp +col +colcrt +colrm +column +combinedeltarpm +comm +command +coredumpctl +cp +csplit +curl +cut +cvtsudoers +date +db_archive +db_checkpoint +db_deadlock +db_dump +db_dump185 +db_hotbackup +db_load +db_log_verify +db_printlog +db_recover +db_replicate +db_stat +db_tuner +db_upgrade +db_verify +dbus-broker +dbus-broker-launch +dd +df +diff +diff3 +dir +dircolors +dirmngr +dirmngr-client +dirname +dmesg +dnf +dnf-3 +du +easy_install-3.7 +echo +egrep +eject +env +evmctl +ex +expand +expr +factor +fallocate +false +fc +fg +fgrep +fincore +findmnt +fips-finish-install +fips-mode-setup +flock +fmt +fold +g13 +gapplication +gawk +gdbus +gencat +getconf +getent +getfacl +getopt +getopts +gio +gio-launch-desktop +gio-querymodules-64 +glib-compile-schemas +gpasswd +gpg +gpg-agent +gpg-connect-agent +gpg-error +gpg-wks-server +gpg-zip +gpg2 +gpgconf +gpgme-json +gpgparsemail +gpgsplit +gpgv +gpgv2 +grep +groups +gsettings +gtar +gunzip +gzexe +gzip +hash +head +hexdump +hostid +hostnamectl +i386 +iconv +id +infocmp +infotocap +install +ionice +ipcmk +ipcrm +ipcs +isosize +jobs +join +journalctl +kill +last +lastb +lastlog +ldd +link +linux32 +linux64 +ln +locale +localectl +localedef +logger +login +loginctl +logname +look +ls +lsblk +lscpu +lsipc +lslocks +lslogins +lsmem +lsns +make-dummy-cert +makedb +makedeltarpm +mcookie +md5sum +mesg +mkdir +mkfifo +mknod +mkpasswd +mktemp +modulemd-validator-v1 +more +mount +mountpoint +mv +namei +networkctl +newgidmap +newgrp +newuidmap +nice +nl +nohup +nproc +nsenter +numfmt +od +openssl +p11-kit +paste +pathchk +pinky +pip-3 +pip-3.7 +pip3 +pip3.7 +pldd +portablectl +pr +printenv +printf +prlimit +ptx +pwd +pwmake +pwscore +pydoc3 +pydoc3.7 +python3 +python3.7 +python3.7m +pyvenv +pyvenv-3.7 +raw +read +readlink +realpath +rename +renew-dummy-cert +renice +reset +resolvectl +rev +rm +rmdir +rpm +rpm2archive +rpm2cpio +rpmdb +rpmdumpheader +rpmkeys +rpmquery +rpmverify +runcon +rvi +rview +script +scriptreplay +sdiff +sed +seq +setarch +setfacl +setpriv +setsid +setterm +sg +sh +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +shred +shuf +sleep +sort +sotruss +split +sprof +stat +stdbuf +stty +su +sudo +sudoedit +sudoreplay +sum +sync +systemctl +systemd-analyze +systemd-ask-password +systemd-cat +systemd-cgls +systemd-cgtop +systemd-delta +systemd-detect-virt +systemd-escape +systemd-firstboot +systemd-id128 +systemd-inhibit +systemd-machine-id-setup +systemd-mount +systemd-notify +systemd-path +systemd-resolve +systemd-run +systemd-socket-activate +systemd-stdio-bridge +systemd-sysusers +systemd-tmpfiles +systemd-tty-ask-password-agent +systemd-umount +tabs +tac +tail +tar +taskset +tee +test +tic +timedatectl +timeout +toe +touch +tput +tr +true +truncate +trust +tset +tsort +tty +type +tzselect +ul +ulimit +umask +umount +unalias +uname +uname26 +unexpand +uniq +unlink +unshare +update-ca-trust +update-crypto-policies +users +utmpdump +uuidgen +uuidparse +vdir +vi +view +wait +wall +watchgnupg +wc +wdctl +whereis +who +whoami +write +x86_64 +xmlcatalog +xmllint +xmlwf +yes +yum +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew +addgnupghome +addpart +adduser +agetty +alternatives +applygnupgdefaults +blkdeactivate +blkdiscard +blkid +blkzone +blockdev +build-locale-archive +capsh +cfdisk +chcpu +chgpasswd +chpasswd +chroot +clock +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +create-cracklib-dict +ctrlaltdel +delpart +dmfilemapd +dmsetup +dmstats +faillock +fdformat +fdisk +findfs +fsck +fsck.cramfs +fsck.minix +fsfreeze +fstrim +g13-syshelp +getcap +getpcaps +glibc_post_upgrade.x86_64 +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +halt +hwclock +iconvconfig +init +ldattach +ldconfig +losetup +mkfs +mkfs.cramfs +mkfs.minix +mkhomedir_helper +mkswap +newusers +nologin +pam_console_apply +pam_timestamp_check +partx +pivot_root +poweroff +pwck +pwconv +pwhistory_helper +pwunconv +readprofile +reboot +resizepart +resolvconf +rfkill +rtcwake +runlevel +runuser +sasldblistusers2 +saslpasswd2 +setcap +sfdisk +shutdown +sulogin +swaplabel +swapoff +swapon +switch_root +telinit +unbound-anchor +unix_chkpwd +unix_update +update-alternatives +useradd +userdel +usermod +vigr +vipw +visudo +wipefs +zdump +zic +zramctl +[ +alias +applydeltarpm +arch +awk +b2sum +base32 +base64 +basename +basenc +bash +bashbug +bashbug-64 +bg +brotli +busctl +ca-legacy +cal +captoinfo +cat +catchsegv +cd +chacl +chage +chcon +chgrp +chmem +chmod +choom +chown +chrt +cksum +clear +cmp +col +colcrt +colrm +column +combinedeltarpm +comm +command +coredumpctl +cp +csplit +curl +cut +cvtsudoers +date +db_archive +db_checkpoint +db_deadlock +db_dump +db_dump185 +db_hotbackup +db_load +db_log_verify +db_printlog +db_recover +db_replicate +db_stat +db_tuner +db_upgrade +db_verify +dbus-broker +dbus-broker-launch +dd +df +diff +diff3 +dir +dircolors +dirmngr +dirmngr-client +dirname +dmesg +dnf +dnf-3 +du +easy_install-3.7 +echo +egrep +eject +env +evmctl +ex +expand +expr +factor +fallocate +false +fc +fg +fgrep +fincore +findmnt +fips-finish-install +fips-mode-setup +flock +fmt +fold +g13 +gapplication +gawk +gdbus +gencat +getconf +getent +getfacl +getopt +getopts +gio +gio-launch-desktop +gio-querymodules-64 +glib-compile-schemas +gpasswd +gpg +gpg-agent +gpg-connect-agent +gpg-error +gpg-wks-server +gpg-zip +gpg2 +gpgconf +gpgme-json +gpgparsemail +gpgsplit +gpgv +gpgv2 +grep +groups +gsettings +gtar +gunzip +gzexe +gzip +hash +head +hexdump +hostid +hostnamectl +i386 +iconv +id +infocmp +infotocap +install +ionice +ipcmk +ipcrm +ipcs +isosize +jobs +join +journalctl +kill +last +lastb +lastlog +ldd +link +linux32 +linux64 +ln +locale +localectl +localedef +logger +login +loginctl +logname +look +ls +lsblk +lscpu +lsipc +lslocks +lslogins +lsmem +lsns +make-dummy-cert +makedb +makedeltarpm +mcookie +md5sum +mesg +mkdir +mkfifo +mknod +mkpasswd +mktemp +modulemd-validator-v1 +more +mount +mountpoint +mv +namei +networkctl +newgidmap +newgrp +newuidmap +nice +nl +nohup +nproc +nsenter +numfmt +od +openssl +p11-kit +paste +pathchk +pinky +pip-3 +pip-3.7 +pip3 +pip3.7 +pldd +portablectl +pr +printenv +printf +prlimit +ptx +pwd +pwmake +pwscore +pydoc3 +pydoc3.7 +python3 +python3.7 +python3.7m +pyvenv +pyvenv-3.7 +raw +read +readlink +realpath +rename +renew-dummy-cert +renice +reset +resolvectl +rev +rm +rmdir +rpm +rpm2archive +rpm2cpio +rpmdb +rpmdumpheader +rpmkeys +rpmquery +rpmverify +runcon +rvi +rview +script +scriptreplay +sdiff +sed +seq +setarch +setfacl +setpriv +setsid +setterm +sg +sh +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +shred +shuf +sleep +sort +sotruss +split +sprof +stat +stdbuf +stty +su +sudo +sudoedit +sudoreplay +sum +sync +systemctl +systemd-analyze +systemd-ask-password +systemd-cat +systemd-cgls +systemd-cgtop +systemd-delta +systemd-detect-virt +systemd-escape +systemd-firstboot +systemd-id128 +systemd-inhibit +systemd-machine-id-setup +systemd-mount +systemd-notify +systemd-path +systemd-resolve +systemd-run +systemd-socket-activate +systemd-stdio-bridge +systemd-sysusers +systemd-tmpfiles +systemd-tty-ask-password-agent +systemd-umount +tabs +tac +tail +tar +taskset +tee +test +tic +timedatectl +timeout +toe +touch +tput +tr +true +truncate +trust +tset +tsort +tty +type +tzselect +ul +ulimit +umask +umount +unalias +uname +uname26 +unexpand +uniq +unlink +unshare +update-ca-trust +update-crypto-policies +users +utmpdump +uuidgen +uuidparse +vdir +vi +view +wait +wall +watchgnupg +wc +wdctl +whereis +who +whoami +write +x86_64 +xmlcatalog +xmllint +xmlwf +yes +yum +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew +addgnupghome +addpart +adduser +agetty +alternatives +applygnupgdefaults +arping +blkdeactivate +blkdiscard +blkid +blockdev +build-locale-archive +capsh +cfdisk +chcpu +chkconfig +chpasswd +chroot +clock +clockdiff +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +create-cracklib-dict +ctrlaltdel +delpart +depmod +dmfilemapd +dmsetup +dmstats +dracut +faillock +fdformat +fdisk +findfs +fsck +fsck.cramfs +fsck.minix +fsfreeze +fstrim +getcap +getpcaps +glibc_post_upgrade.x86_64 +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +halt +hardlink +hwclock +iconvconfig +iconvconfig.x86_64 +ifenslave +init +insmod +install-info +kpartx +lchage +ldattach +ldconfig +lgroupadd +lgroupdel +lgroupmod +lid +lnewusers +losetup +lpasswd +lsmod +luseradd +luserdel +lusermod +mkdict +mkfs +mkfs.cramfs +mkfs.minix +mkhomedir_helper +mkswap +modinfo +modprobe +newusers +nologin +packer +pam_console_apply +pam_tally2 +pam_timestamp_check +partx +ping6 +pivot_root +poweroff +pwck +pwconv +pwhistory_helper +pwunconv +rdisc +readprofile +reboot +resizepart +rmmod +rtcwake +runlevel +runuser +sasldblistusers2 +saslpasswd2 +sefcontext_compile +setcap +sfdisk +shutdown +sln +sulogin +swaplabel +swapoff +swapon +switch_root +sysctl +telinit +tracepath +tracepath6 +udevadm +unix_chkpwd +unix_update +update-alternatives +useradd +userdel +usermod +vigr +vipw +weak-modules +wipefs +yum-complete-transaction +yumdb +zdump +zic +zramctl +[ +addr2line +alias +ar +arch +as +awk +base64 +basename +bash +bashbug +bashbug-64 +bg +bootctl +busctl +c++filt +ca-legacy +cal +captoinfo +cat +catchsegv +cd +certutil +chacl +chage +chardetect +chcon +chfn +chgrp +chmem +chmod +chown +chrt +chsh +cksum +clear +cmp +cmsutil +col +colcrt +colrm +column +comm +command +coredumpctl +cp +cpio +crlutil +csplit +curl +cut +date +db_archive +db_checkpoint +db_deadlock +db_dump +db_dump185 +db_hotbackup +db_load +db_log_verify +db_printlog +db_recover +db_replicate +db_stat +db_tuner +db_upgrade +db_verify +dbus-binding-tool +dbus-cleanup-sockets +dbus-daemon +dbus-monitor +dbus-run-session +dbus-send +dbus-test-tool +dbus-update-activation-environment +dbus-uuidgen +dd +debuginfo-install +df +dgawk +diff +diff3 +dir +dircolors +dirname +dmesg +dnsdomainname +domainname +dracut +du +dwp +echo +egrep +eject +elfedit +env +ex +expand +expr +factor +fallocate +false +fc +fg +fgrep +find +find-repos-of-install +findmnt +flock +fmt +fold +free +gapplication +gawk +gdbus +gencat +getconf +getent +getfacl +getopt +getopts +gio +gio-querymodules-64 +glib-compile-schemas +gpasswd +gpg +gpg-agent +gpg-connect-agent +gpg-error +gpg-zip +gpg2 +gpgconf +gpgparsemail +gpgsplit +gpgv +gpgv2 +gprof +grep +groups +gsettings +gtar +gunzip +gzexe +gzip +head +hexdump +hostid +hostname +hostnamectl +i386 +iconv +id +idn +igawk +info +infocmp +infokey +infotocap +install +ionice +ipcmk +ipcrm +ipcs +isosize +jobs +join +journalctl +kernel-install +kill +kmod +lastlog +lchfn +lchsh +ld +ld.bfd +ld.gold +ldd +link +linux32 +linux64 +ln +locale +localectl +localedef +logger +login +loginctl +logname +look +ls +lsblk +lscpu +lsinitrd +lsipc +lslocks +lslogins +lsmem +lsns +lua +luac +lz4 +lz4c +lz4cat +machinectl +makedb +mcookie +md5sum +mkdir +mkfifo +mkinitrd +mknod +mktemp +modutil +more +mount +mountpoint +mv +namei +needs-restarting +newgrp +nice +nisdomainname +nl +nm +nohup +nproc +nsenter +numfmt +objcopy +objdump +od +oldfind +p11-kit +package-cleanup +passwd +paste +pathchk +pgawk +pgrep +pinentry +pinentry-curses +ping +ping6 +pinky +pk12util +pkg-config +pkill +pldd +pmap +pr +printenv +printf +prlimit +ps +ptx +pwd +pwdx +pwmake +pwscore +pydoc +python +python2 +python2.7 +ranlib +raw +read +readelf +readlink +realpath +rename +renice +repo-graph +repo-rss +repoclosure +repodiff +repomanage +repoquery +reposync +repotrack +reset +rev +rm +rmdir +rpcgen +rpm +rpm2cpio +rpmdb +rpmkeys +rpmquery +rpmverify +runcon +rvi +rview +script +scriptreplay +sdiff +sed +seq +setarch +setfacl +setpriv +setsid +setterm +setup-nsssysinit +setup-nsssysinit.sh +sg +sh +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +show-changed-rco +show-installed +shred +shuf +signtool +signver +size +skill +slabtop +sleep +snice +sort +sotruss +split +sprof +sqlite3 +ssltap +stat +stdbuf +strings +strip +stty +su +sum +sync +systemctl +systemd-analyze +systemd-ask-password +systemd-cat +systemd-cgls +systemd-cgtop +systemd-coredumpctl +systemd-delta +systemd-detect-virt +systemd-escape +systemd-firstboot +systemd-hwdb +systemd-inhibit +systemd-loginctl +systemd-machine-id-setup +systemd-notify +systemd-nspawn +systemd-path +systemd-run +systemd-stdio-bridge +systemd-tmpfiles +systemd-tty-ask-password-agent +tabs +tac +tail +tailf +tar +taskset +tee +test +testgdbm +tic +timedatectl +timeout +tload +toe +top +touch +tput +tr +tracepath +tracepath6 +true +truncate +trust +tset +tsort +tty +tzselect +udevadm +ul +umask +umount +unalias +uname +unexpand +uniq +unlink +unlz4 +unshare +unxz +update-ca-trust +update-mime-database +uptime +urlgrabber +users +utmpdump +uuidgen +vdir +verifytree +vi +view +vmstat +w +wait +watch +watchgnupg +wc +wdctl +whereis +who +whoami +write +x86_64 +xargs +xmlcatalog +xmllint +xmlwf +xz +xzcat +xzcmp +xzdec +xzdiff +xzegrep +xzfgrep +xzgrep +xzless +xzmore +yes +ypdomainname +yum +yum-builddep +yum-config-manager +yum-debug-dump +yum-debug-restore +yum-groups-manager +yumdownloader +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew +addgnupghome +addpart +adduser +agetty +alternatives +applygnupgdefaults +arping +blkdeactivate +blkdiscard +blkid +blockdev +build-locale-archive +capsh +cfdisk +chcpu +chkconfig +chpasswd +chroot +clock +clockdiff +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +create-cracklib-dict +ctrlaltdel +delpart +depmod +dmfilemapd +dmsetup +dmstats +dracut +faillock +fdformat +fdisk +findfs +fsck +fsck.cramfs +fsck.minix +fsfreeze +fstrim +getcap +getpcaps +glibc_post_upgrade.x86_64 +groupadd +groupdel +groupmems +groupmod +grpck +grpconv +grpunconv +halt +hardlink +hwclock +iconvconfig +iconvconfig.x86_64 +ifenslave +init +insmod +install-info +kpartx +lchage +ldattach +ldconfig +lgroupadd +lgroupdel +lgroupmod +lid +lnewusers +losetup +lpasswd +lsmod +luseradd +luserdel +lusermod +mkdict +mkfs +mkfs.cramfs +mkfs.minix +mkhomedir_helper +mkswap +modinfo +modprobe +newusers +nologin +packer +pam_console_apply +pam_tally2 +pam_timestamp_check +partx +ping6 +pivot_root +poweroff +pwck +pwconv +pwhistory_helper +pwunconv +rdisc +readprofile +reboot +resizepart +rmmod +rtcwake +runlevel +runuser +sasldblistusers2 +saslpasswd2 +sefcontext_compile +setcap +sfdisk +shutdown +sln +sulogin +swaplabel +swapoff +swapon +switch_root +sysctl +telinit +tracepath +tracepath6 +udevadm +unix_chkpwd +unix_update +update-alternatives +useradd +userdel +usermod +vigr +vipw +weak-modules +wipefs +yum-complete-transaction +yumdb +zdump +zic +zramctl +[ +addr2line +alias +ar +arch +as +awk +base64 +basename +bash +bashbug +bashbug-64 +bg +bootctl +busctl +c++filt +ca-legacy +cal +captoinfo +cat +catchsegv +cd +certutil +chacl +chage +chardetect +chcon +chfn +chgrp +chmem +chmod +chown +chrt +chsh +cksum +clear +cmp +cmsutil +col +colcrt +colrm +column +comm +command +coredumpctl +cp +cpio +crlutil +csplit +curl +cut +date +db_archive +db_checkpoint +db_deadlock +db_dump +db_dump185 +db_hotbackup +db_load +db_log_verify +db_printlog +db_recover +db_replicate +db_stat +db_tuner +db_upgrade +db_verify +dbus-binding-tool +dbus-cleanup-sockets +dbus-daemon +dbus-monitor +dbus-run-session +dbus-send +dbus-test-tool +dbus-update-activation-environment +dbus-uuidgen +dd +debuginfo-install +df +dgawk +diff +diff3 +dir +dircolors +dirname +dmesg +dnsdomainname +domainname +dracut +du +dwp +echo +egrep +eject +elfedit +env +ex +expand +expr +factor +fallocate +false +fc +fg +fgrep +find +find-repos-of-install +findmnt +flock +fmt +fold +free +gapplication +gawk +gdbus +gencat +getconf +getent +getfacl +getopt +getopts +gio +gio-querymodules-64 +glib-compile-schemas +gpasswd +gpg +gpg-agent +gpg-connect-agent +gpg-error +gpg-zip +gpg2 +gpgconf +gpgparsemail +gpgsplit +gpgv +gpgv2 +gprof +grep +groups +gsettings +gtar +gunzip +gzexe +gzip +head +hexdump +hostid +hostname +hostnamectl +i386 +iconv +id +idn +igawk +info +infocmp +infokey +infotocap +install +ionice +ipcmk +ipcrm +ipcs +isosize +jobs +join +journalctl +kernel-install +kill +kmod +lastlog +lchfn +lchsh +ld +ld.bfd +ld.gold +ldd +link +linux32 +linux64 +ln +locale +localectl +localedef +logger +login +loginctl +logname +look +ls +lsblk +lscpu +lsinitrd +lsipc +lslocks +lslogins +lsmem +lsns +lua +luac +lz4 +lz4c +lz4cat +machinectl +makedb +mcookie +md5sum +mkdir +mkfifo +mkinitrd +mknod +mktemp +modutil +more +mount +mountpoint +mv +namei +needs-restarting +newgrp +nice +nisdomainname +nl +nm +nohup +nproc +nsenter +numfmt +objcopy +objdump +od +oldfind +p11-kit +package-cleanup +passwd +paste +pathchk +pgawk +pgrep +pinentry +pinentry-curses +ping +ping6 +pinky +pk12util +pkg-config +pkill +pldd +pmap +pr +printenv +printf +prlimit +ps +ptx +pwd +pwdx +pwmake +pwscore +pydoc +python +python2 +python2.7 +ranlib +raw +read +readelf +readlink +realpath +rename +renice +repo-graph +repo-rss +repoclosure +repodiff +repomanage +repoquery +reposync +repotrack +reset +rev +rm +rmdir +rpcgen +rpm +rpm2cpio +rpmdb +rpmkeys +rpmquery +rpmverify +runcon +rvi +rview +script +scriptreplay +sdiff +sed +seq +setarch +setfacl +setpriv +setsid +setterm +setup-nsssysinit +setup-nsssysinit.sh +sg +sh +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +show-changed-rco +show-installed +shred +shuf +signtool +signver +size +skill +slabtop +sleep +snice +sort +sotruss +split +sprof +sqlite3 +ssltap +stat +stdbuf +strings +strip +stty +su +sum +sync +systemctl +systemd-analyze +systemd-ask-password +systemd-cat +systemd-cgls +systemd-cgtop +systemd-coredumpctl +systemd-delta +systemd-detect-virt +systemd-escape +systemd-firstboot +systemd-hwdb +systemd-inhibit +systemd-loginctl +systemd-machine-id-setup +systemd-notify +systemd-nspawn +systemd-path +systemd-run +systemd-stdio-bridge +systemd-tmpfiles +systemd-tty-ask-password-agent +tabs +tac +tail +tailf +tar +taskset +tee +test +testgdbm +tic +timedatectl +timeout +tload +toe +top +touch +tput +tr +tracepath +tracepath6 +true +truncate +trust +tset +tsort +tty +tzselect +udevadm +ul +umask +umount +unalias +uname +unexpand +uniq +unlink +unlz4 +unshare +unxz +update-ca-trust +update-mime-database +uptime +urlgrabber +users +utmpdump +uuidgen +vdir +verifytree +vi +view +vmstat +w +wait +watch +watchgnupg +wc +wdctl +whereis +who +whoami +write +x86_64 +xargs +xmlcatalog +xmllint +xmlwf +xz +xzcat +xzcmp +xzdec +xzdiff +xzegrep +xzfgrep +xzgrep +xzless +xzmore +yes +ypdomainname +yum +yum-builddep +yum-config-manager +yum-debug-dump +yum-debug-restore +yum-groups-manager +yumdownloader +zcat +zcmp +zdiff +zegrep +zfgrep +zforce +zgrep +zless +zmore +znew \ No newline at end of file From fcafddca29bb88a3448e6312becafc72bc5001ab Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 16:25:11 +0200 Subject: [PATCH 04/41] uniq whitelist --- sanitizer_data/whitelist.txt | 2975 +++++----------------------------- 1 file changed, 393 insertions(+), 2582 deletions(-) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index debb8fd..0ed66e8 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -1,2627 +1,128 @@ . .. -bin -boot -dev -etc -home -lib -lib64 -lost+found -media -mnt -opt -proc -root -run -sbin -srv -sys -tmp -usr -var -bash -zsh -fish -github.com -git -bash -bunzip2 -bzcat -bzcmp -bzdiff -bzegrep -bzexe -bzfgrep -bzgrep -bzip2 -bzip2recover -bzless -bzmore -cat -chgrp -chmod -chown -cp -dash -date -dd -df -dir -dmesg -dnsdomainname -domainname -echo -egrep -false -fgrep -findmnt -grep -gunzip -gzexe -gzip -hostname -kill -ln -login -ls -lsblk -mkdir -mknod -mktemp -more -mount -mountpoint -mv -nisdomainname -pidof -ps -pwd -rbash -readlink -rm -rmdir -run-parts -sed -sh -sh.distrib -sleep -stty -su -sync -tar -tempfile -touch -true -umount -uname -uncompress -vdir -wdctl -which -ypdomainname -zcat -zcmp -zdiff -zegrep -zfgrep -zforce -zgrep -zless -zmore -znew -agetty -badblocks -blkdiscard -blkid -blockdev -cfdisk -chcpu -ctrlaltdel -debugfs -dumpe2fs -e2fsck -e2image -e2label -e2undo -fdisk -findfs -fsck -fsck.cramfs -fsck.ext2 -fsck.ext3 -fsck.ext4 -fsck.minix -fsfreeze -fstab-decode -fstrim -getty -hwclock -initctl -installkernel -isosize -killall5 -ldconfig -ldconfig.real -logsave -losetup -mke2fs -mkfs -mkfs.bfs -mkfs.cramfs -mkfs.ext2 -mkfs.ext3 -mkfs.ext4 -mkfs.minix -mkhomedir_helper -mkswap -pam_extrausers_chkpwd -pam_extrausers_update -pam_tally -pam_tally2 -pivot_root -raw -resize2fs -runuser -sfdisk -shadowconfig -start-stop-daemon -sulogin -swaplabel -swapoff -swapon -switch_root -sysctl -tune2fs -unix_chkpwd -unix_update -wipefs -zramctl -[ -addpart -apt -apt-cache -apt-cdrom -apt-config -apt-get -apt-key -apt-mark -arch -awk -b2sum -base32 -base64 -basename -bashbug -captoinfo -catchsegv -chage -chattr -chcon -chfn -chrt -chsh -cksum -clear -clear_console -cmp -comm -csplit -cut -deb-systemd-helper -deb-systemd-invoke -debconf -debconf-apt-progress -debconf-communicate -debconf-copydb -debconf-escape -debconf-set-selections -debconf-show -delpart -diff -diff3 -dircolors -dirname -dpkg -dpkg-deb -dpkg-divert -dpkg-maintscript-helper -dpkg-query -dpkg-split -dpkg-statoverride -dpkg-trigger -du -env -expand -expiry -expr -factor -faillog -fallocate -find -flock -fmt -fold -free -getconf -getent -getopt -gpasswd -gpgv -groups -head -hostid -i386 -iconv -id -infocmp -infotocap -install -ionice -ipcmk -ipcrm -ipcs -ischroot -join -last -lastb -lastlog -ldd -link -linux32 -linux64 -locale -locale-check -localedef -logger -logname -lsattr -lscpu -lsipc -lslocks -lslogins -lsmem -lsns -mawk -mcookie -md5sum -md5sum.textutils -mesg -mkfifo -namei -nawk -newgrp -nice -nl -nohup -nproc -nsenter -numfmt -od -pager -partx -passwd -paste -pathchk -perl -perl5.26.1 -pgrep -pinky -pkill -pldd -pmap -pr -printenv -printf -prlimit -ptx -pwdx -realpath -rename.ul -renice -reset -resizepart -rev -rgrep -runcon -savelog -script -scriptreplay -sdiff -select-editor -sensible-browser -sensible-editor -sensible-pager -seq -setarch -setsid -setterm -sg -sha1sum -sha224sum -sha256sum -sha384sum -sha512sum -shred -shuf -skill -slabtop -snice -sort -split -stat -stdbuf -sum -tabs -tac -tail -taskset -tee -test -tic -timeout -tload -toe -top -touch -tput -tr -truncate -tset -tsort -tty -tzselect -unexpand -uniq -unlink -unshare -update-alternatives -uptime -users -utmpdump -vmstat -w -w.procps -wall -watch -wc -whereis -which -who -whoami -x86_64 -xargs -yes -zdump -unminimize -add-shell -addgroup -adduser -chgpasswd -chmem -chpasswd -chroot -cpgr -cppw -delgroup -deluser -dpkg-preconfigure -dpkg-reconfigure -e2freefrag -e4crypt -e4defrag -fdformat -filefrag -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -iconvconfig -invoke-rc.d -ldattach -mklost+found -newusers -nologin -pam-auth-update -pam_getenv -pam_timestamp_check -policy-rc.d -pwck -pwconv -pwunconv -readprofile -remove-shell -rmt -rmt-tar -rtcwake -service -tarcat -update-passwd -update-rc.d -useradd -userdel -usermod -vigr -vipw -zic -docker -go -ssh -cz -com -http -https -localhost -jq -code -make -ping -ffmpeg -add-shell -addgroup -adduser -arpd -chgpasswd -chmem -chpasswd -chroot -cpgr -cppw -delgroup -deluser -dpkg-preconfigure -dpkg-reconfigure -e2freefrag -e4crypt -e4defrag -fdformat -filefrag -genl -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -iconvconfig -invoke-rc.d -ldattach -mklost+found -newusers -nologin -pam-auth-update -pam_getenv -pam_timestamp_check -policy-rc.d -pwck -pwconv -pwunconv -readprofile -remove-shell -rmt -rmt-tar -rtcwake -service -tarcat -tzconfig -update-passwd -update-rc.d -useradd -userdel -usermod -vigr -vipw -zic -[ -addpart -apt -apt-cache -apt-cdrom -apt-config -apt-get -apt-key -apt-mark -arch -awk -b2sum -base32 -base64 -basename -bashbug -captoinfo -catchsegv -chage -chattr -chcon -chfn -choom -chrt -chsh -cksum -clear -clear_console -cmp -comm -csplit -ctstat -cut -deb-systemd-helper -deb-systemd-invoke -debconf -debconf-apt-progress -debconf-communicate -debconf-copydb -debconf-escape -debconf-set-selections -debconf-show -delpart -diff -diff3 -dircolors -dirname -dpkg -dpkg-deb -dpkg-divert -dpkg-maintscript-helper -dpkg-query -dpkg-split -dpkg-statoverride -dpkg-trigger -du -env -expand -expiry -expr -factor -faillog -fallocate -fincore -find -flock -fmt -fold -getconf -getent -getopt -gpasswd -gpgv -groups -head -hostid -i386 -iconv -id -infocmp -infotocap -install -ionice -ipcmk -ipcrm -ipcs -ischroot -join -last -lastb -lastlog -ldd -link -linux32 -linux64 -lnstat -locale -localedef -logger -logname -lsattr -lscpu -lsipc -lslocks -lslogins -lsmem -lsns -mawk -mcookie -md5sum -md5sum.textutils -mesg -mkfifo -namei -nawk -newgrp -nice -nl -nohup -nproc -nsenter -nstat -numfmt -od -pager -partx -passwd -paste -pathchk -perl -perl5.28.1 -pinky -pldd -pr -printenv -printf -prlimit -ptx -rdma -realpath -rename.ul -renice -reset -resizepart -rev -rgrep -routef -routel -rtstat -runcon -savelog -script -scriptreplay -sdiff -seq -setarch -setpriv -setsid -setterm -sg -sha1sum -sha224sum -sha256sum -sha384sum -sha512sum -shred -shuf -sort -split -stat -stdbuf -sum -tabs -tac -tail -taskset -tee -test -tic -timeout -toe -touch -tput -tr -truncate -tset -tsort -tty -tzselect -unexpand -uniq -unlink -unshare -update-alternatives -users -utmpdump -wall -wc -whereis -which -who -whoami -x86_64 -xargs -yes -zdump -agetty -badblocks -blkdiscard -blkid -blkzone -blockdev -bridge -capsh -cfdisk -chcpu -ctrlaltdel -debugfs -devlink -dumpe2fs -e2fsck -e2image -e2label -e2mmpstatus -e2undo -fdisk -findfs -fsck -fsck.cramfs -fsck.ext2 -fsck.ext3 -fsck.ext4 -fsck.minix -fsfreeze -fstab-decode -fstrim -getcap -getpcaps -getty -hwclock -installkernel -ip -isosize -killall5 -ldconfig -logsave -losetup -mke2fs -mkfs -mkfs.bfs -mkfs.cramfs -mkfs.ext2 -mkfs.ext3 -mkfs.ext4 -mkfs.minix -mkhomedir_helper -mkswap -pam_tally -pam_tally2 -pivot_root -raw -resize2fs -rtacct -rtmon -runuser -setcap -sfdisk -shadowconfig -start-stop-daemon -sulogin -swaplabel -swapoff -swapon -switch_root -tc -tipc -tune2fs -unix_chkpwd -unix_update -wipefs -zramctl -bash -cat -chgrp -chmod -chown -cp -dash -date -dd -df -dir -dmesg -dnsdomainname -domainname -echo -egrep -false -fgrep -findmnt -grep -gunzip -gzexe -gzip -hostname -ip -ln -login -ls -lsblk -mkdir -mknod -mktemp -more -mount -mountpoint -mv -nisdomainname -pidof -ping -ping4 -ping6 -pwd -rbash -readlink -rm -rmdir -run-parts -sed -sh -sleep -ss -stty -su -sync -tar -tempfile -touch -true -umount -uname -uncompress -vdir -wdctl -which -ypdomainname -zcat -zcmp -zdiff -zegrep -zfgrep -zforce -zgrep -zless -zmore -znew -addgnupghome -addpart -adduser -agetty -alternatives -applygnupgdefaults -blkdeactivate -blkdiscard -blkid -blkzone -blockdev -build-locale-archive -capsh -cfdisk -chcpu -chgpasswd -chpasswd -chroot -clock -cracklib-check -cracklib-format -cracklib-packer -cracklib-unpacker -create-cracklib-dict -ctrlaltdel -delpart -dmfilemapd -dmsetup -dmstats -faillock -fdformat -fdisk -findfs -fsck -fsck.cramfs -fsck.minix -fsfreeze -fstrim -g13-syshelp -getcap -getpcaps -glibc_post_upgrade.x86_64 -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -halt -hwclock -iconvconfig -init -ldattach -ldconfig -losetup -mkfs -mkfs.cramfs -mkfs.minix -mkhomedir_helper -mkswap -newusers -nologin -pam_console_apply -pam_timestamp_check -partx -pivot_root -poweroff -pwck -pwconv -pwhistory_helper -pwunconv -readprofile -reboot -resizepart -resolvconf -rfkill -rtcwake -runlevel -runuser -sasldblistusers2 -saslpasswd2 -setcap -sfdisk -shutdown -sulogin -swaplabel -swapoff -swapon -switch_root -telinit -unbound-anchor -unix_chkpwd -unix_update -update-alternatives -useradd -userdel -usermod -vigr -vipw -visudo -wipefs -zdump -zic -zramctl -[ -alias -applydeltarpm -arch -awk -b2sum -base32 -base64 -basename -basenc -bash -bashbug -bashbug-64 -bg -brotli -busctl -ca-legacy -cal -captoinfo -cat -catchsegv -cd -chacl -chage -chcon -chgrp -chmem -chmod -choom -chown -chrt -cksum -clear -cmp -col -colcrt -colrm -column -combinedeltarpm -comm -command -coredumpctl -cp -csplit -curl -cut -cvtsudoers -date -db_archive -db_checkpoint -db_deadlock -db_dump -db_dump185 -db_hotbackup -db_load -db_log_verify -db_printlog -db_recover -db_replicate -db_stat -db_tuner -db_upgrade -db_verify -dbus-broker -dbus-broker-launch -dd -df -diff -diff3 -dir -dircolors -dirmngr -dirmngr-client -dirname -dmesg -dnf -dnf-3 -du -easy_install-3.7 -echo -egrep -eject -env -evmctl -ex -expand -expr -factor -fallocate -false -fc -fg -fgrep -fincore -findmnt -fips-finish-install -fips-mode-setup -flock -fmt -fold -g13 -gapplication -gawk -gdbus -gencat -getconf -getent -getfacl -getopt -getopts -gio -gio-launch-desktop -gio-querymodules-64 -glib-compile-schemas -gpasswd -gpg -gpg-agent -gpg-connect-agent -gpg-error -gpg-wks-server -gpg-zip -gpg2 -gpgconf -gpgme-json -gpgparsemail -gpgsplit -gpgv -gpgv2 -grep -groups -gsettings -gtar -gunzip -gzexe -gzip -hash -head -hexdump -hostid -hostnamectl -i386 -iconv -id -infocmp -infotocap -install -ionice -ipcmk -ipcrm -ipcs -isosize -jobs -join -journalctl -kill -last -lastb -lastlog -ldd -link -linux32 -linux64 -ln -locale -localectl -localedef -logger -login -loginctl -logname -look -ls -lsblk -lscpu -lsipc -lslocks -lslogins -lsmem -lsns -make-dummy-cert -makedb -makedeltarpm -mcookie -md5sum -mesg -mkdir -mkfifo -mknod -mkpasswd -mktemp -modulemd-validator-v1 -more -mount -mountpoint -mv -namei -networkctl -newgidmap -newgrp -newuidmap -nice -nl -nohup -nproc -nsenter -numfmt -od -openssl -p11-kit -paste -pathchk -pinky -pip-3 -pip-3.7 -pip3 -pip3.7 -pldd -portablectl -pr -printenv -printf -prlimit -ptx -pwd -pwmake -pwscore -pydoc3 -pydoc3.7 -python3 -python3.7 -python3.7m -pyvenv -pyvenv-3.7 -raw -read -readlink -realpath -rename -renew-dummy-cert -renice -reset -resolvectl -rev -rm -rmdir -rpm -rpm2archive -rpm2cpio -rpmdb -rpmdumpheader -rpmkeys -rpmquery -rpmverify -runcon -rvi -rview -script -scriptreplay -sdiff -sed -seq -setarch -setfacl -setpriv -setsid -setterm -sg -sh -sha1sum -sha224sum -sha256sum -sha384sum -sha512sum -shred -shuf -sleep -sort -sotruss -split -sprof -stat -stdbuf -stty -su -sudo -sudoedit -sudoreplay -sum -sync -systemctl -systemd-analyze -systemd-ask-password -systemd-cat -systemd-cgls -systemd-cgtop -systemd-delta -systemd-detect-virt -systemd-escape -systemd-firstboot -systemd-id128 -systemd-inhibit -systemd-machine-id-setup -systemd-mount -systemd-notify -systemd-path -systemd-resolve -systemd-run -systemd-socket-activate -systemd-stdio-bridge -systemd-sysusers -systemd-tmpfiles -systemd-tty-ask-password-agent -systemd-umount -tabs -tac -tail -tar -taskset -tee -test -tic -timedatectl -timeout -toe -touch -tput -tr -true -truncate -trust -tset -tsort -tty -type -tzselect -ul -ulimit -umask -umount -unalias -uname -uname26 -unexpand -uniq -unlink -unshare -update-ca-trust -update-crypto-policies -users -utmpdump -uuidgen -uuidparse -vdir -vi -view -wait -wall -watchgnupg -wc -wdctl -whereis -who -whoami -write -x86_64 -xmlcatalog -xmllint -xmlwf -yes -yum -zcat -zcmp -zdiff -zegrep -zfgrep -zforce -zgrep -zless -zmore -znew -addgnupghome -addpart -adduser -agetty -alternatives -applygnupgdefaults -blkdeactivate -blkdiscard -blkid -blkzone -blockdev -build-locale-archive -capsh -cfdisk -chcpu -chgpasswd -chpasswd -chroot -clock -cracklib-check -cracklib-format -cracklib-packer -cracklib-unpacker -create-cracklib-dict -ctrlaltdel -delpart -dmfilemapd -dmsetup -dmstats -faillock -fdformat -fdisk -findfs -fsck -fsck.cramfs -fsck.minix -fsfreeze -fstrim -g13-syshelp -getcap -getpcaps -glibc_post_upgrade.x86_64 -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -halt -hwclock -iconvconfig -init -ldattach -ldconfig -losetup -mkfs -mkfs.cramfs -mkfs.minix -mkhomedir_helper -mkswap -newusers -nologin -pam_console_apply -pam_timestamp_check -partx -pivot_root -poweroff -pwck -pwconv -pwhistory_helper -pwunconv -readprofile -reboot -resizepart -resolvconf -rfkill -rtcwake -runlevel -runuser -sasldblistusers2 -saslpasswd2 -setcap -sfdisk -shutdown -sulogin -swaplabel -swapoff -swapon -switch_root -telinit -unbound-anchor -unix_chkpwd -unix_update -update-alternatives -useradd -userdel -usermod -vigr -vipw -visudo -wipefs -zdump -zic -zramctl -[ -alias -applydeltarpm -arch -awk -b2sum -base32 -base64 -basename -basenc -bash -bashbug -bashbug-64 -bg -brotli -busctl -ca-legacy -cal -captoinfo -cat -catchsegv -cd -chacl -chage -chcon -chgrp -chmem -chmod -choom -chown -chrt -cksum -clear -cmp -col -colcrt -colrm -column -combinedeltarpm -comm -command -coredumpctl -cp -csplit -curl -cut -cvtsudoers -date -db_archive -db_checkpoint -db_deadlock -db_dump -db_dump185 -db_hotbackup -db_load -db_log_verify -db_printlog -db_recover -db_replicate -db_stat -db_tuner -db_upgrade -db_verify -dbus-broker -dbus-broker-launch -dd -df -diff -diff3 -dir -dircolors -dirmngr -dirmngr-client -dirname -dmesg -dnf -dnf-3 -du -easy_install-3.7 -echo -egrep -eject -env -evmctl -ex -expand -expr -factor -fallocate -false -fc -fg -fgrep -fincore -findmnt -fips-finish-install -fips-mode-setup -flock -fmt -fold -g13 -gapplication -gawk -gdbus -gencat -getconf -getent -getfacl -getopt -getopts -gio -gio-launch-desktop -gio-querymodules-64 -glib-compile-schemas -gpasswd -gpg -gpg-agent -gpg-connect-agent -gpg-error -gpg-wks-server -gpg-zip -gpg2 -gpgconf -gpgme-json -gpgparsemail -gpgsplit -gpgv -gpgv2 -grep -groups -gsettings -gtar -gunzip -gzexe -gzip -hash -head -hexdump -hostid -hostnamectl -i386 -iconv -id -infocmp -infotocap -install -ionice -ipcmk -ipcrm -ipcs -isosize -jobs -join -journalctl -kill -last -lastb -lastlog -ldd -link -linux32 -linux64 -ln -locale -localectl -localedef -logger -login -loginctl -logname -look -ls -lsblk -lscpu -lsipc -lslocks -lslogins -lsmem -lsns -make-dummy-cert -makedb -makedeltarpm -mcookie -md5sum -mesg -mkdir -mkfifo -mknod -mkpasswd -mktemp -modulemd-validator-v1 -more -mount -mountpoint -mv -namei -networkctl -newgidmap -newgrp -newuidmap -nice -nl -nohup -nproc -nsenter -numfmt -od -openssl -p11-kit -paste -pathchk -pinky -pip-3 -pip-3.7 -pip3 -pip3.7 -pldd -portablectl -pr -printenv -printf -prlimit -ptx -pwd -pwmake -pwscore -pydoc3 -pydoc3.7 -python3 -python3.7 -python3.7m -pyvenv -pyvenv-3.7 -raw -read -readlink -realpath -rename -renew-dummy-cert -renice -reset -resolvectl -rev -rm -rmdir -rpm -rpm2archive -rpm2cpio -rpmdb -rpmdumpheader -rpmkeys -rpmquery -rpmverify -runcon -rvi -rview -script -scriptreplay -sdiff -sed -seq -setarch -setfacl -setpriv -setsid -setterm -sg -sh -sha1sum -sha224sum -sha256sum -sha384sum -sha512sum -shred -shuf -sleep -sort -sotruss -split -sprof -stat -stdbuf -stty -su -sudo -sudoedit -sudoreplay -sum -sync -systemctl -systemd-analyze -systemd-ask-password -systemd-cat -systemd-cgls -systemd-cgtop -systemd-delta -systemd-detect-virt -systemd-escape -systemd-firstboot -systemd-id128 -systemd-inhibit -systemd-machine-id-setup -systemd-mount -systemd-notify -systemd-path -systemd-resolve -systemd-run -systemd-socket-activate -systemd-stdio-bridge -systemd-sysusers -systemd-tmpfiles -systemd-tty-ask-password-agent -systemd-umount -tabs -tac -tail -tar -taskset -tee -test -tic -timedatectl -timeout -toe -touch -tput -tr -true -truncate -trust -tset -tsort -tty -type -tzselect -ul -ulimit -umask -umount -unalias -uname -uname26 -unexpand -uniq -unlink -unshare -update-ca-trust -update-crypto-policies -users -utmpdump -uuidgen -uuidparse -vdir -vi -view -wait -wall -watchgnupg -wc -wdctl -whereis -who -whoami -write -x86_64 -xmlcatalog -xmllint -xmlwf -yes -yum -zcat -zcmp -zdiff -zegrep -zfgrep -zforce -zgrep -zless -zmore -znew -addgnupghome -addpart -adduser -agetty -alternatives -applygnupgdefaults -arping -blkdeactivate -blkdiscard -blkid -blockdev -build-locale-archive -capsh -cfdisk -chcpu -chkconfig -chpasswd -chroot -clock -clockdiff -cracklib-check -cracklib-format -cracklib-packer -cracklib-unpacker -create-cracklib-dict -ctrlaltdel -delpart -depmod -dmfilemapd -dmsetup -dmstats -dracut -faillock -fdformat -fdisk -findfs -fsck -fsck.cramfs -fsck.minix -fsfreeze -fstrim -getcap -getpcaps -glibc_post_upgrade.x86_64 -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -halt -hardlink -hwclock -iconvconfig -iconvconfig.x86_64 -ifenslave -init -insmod -install-info -kpartx -lchage -ldattach -ldconfig -lgroupadd -lgroupdel -lgroupmod -lid -lnewusers -losetup -lpasswd -lsmod -luseradd -luserdel -lusermod -mkdict -mkfs -mkfs.cramfs -mkfs.minix -mkhomedir_helper -mkswap -modinfo -modprobe -newusers -nologin -packer -pam_console_apply -pam_tally2 -pam_timestamp_check -partx -ping6 -pivot_root -poweroff -pwck -pwconv -pwhistory_helper -pwunconv -rdisc -readprofile -reboot -resizepart -rmmod -rtcwake -runlevel -runuser -sasldblistusers2 -saslpasswd2 -sefcontext_compile -setcap -sfdisk -shutdown -sln -sulogin -swaplabel -swapoff -swapon -switch_root -sysctl -telinit -tracepath -tracepath6 -udevadm -unix_chkpwd -unix_update -update-alternatives -useradd -userdel -usermod -vigr -vipw -weak-modules -wipefs -yum-complete-transaction -yumdb -zdump -zic -zramctl -[ -addr2line -alias -ar -arch -as -awk -base64 -basename -bash -bashbug -bashbug-64 -bg -bootctl -busctl -c++filt -ca-legacy -cal -captoinfo -cat -catchsegv -cd -certutil -chacl -chage -chardetect -chcon -chfn -chgrp -chmem -chmod -chown -chrt -chsh -cksum -clear -cmp -cmsutil -col -colcrt -colrm -column -comm -command -coredumpctl -cp -cpio -crlutil -csplit -curl -cut -date -db_archive -db_checkpoint -db_deadlock -db_dump -db_dump185 -db_hotbackup -db_load -db_log_verify -db_printlog -db_recover -db_replicate -db_stat -db_tuner -db_upgrade -db_verify -dbus-binding-tool -dbus-cleanup-sockets -dbus-daemon -dbus-monitor -dbus-run-session -dbus-send -dbus-test-tool -dbus-update-activation-environment -dbus-uuidgen -dd -debuginfo-install -df -dgawk -diff -diff3 -dir -dircolors -dirname -dmesg -dnsdomainname -domainname -dracut -du -dwp -echo -egrep -eject -elfedit -env -ex -expand -expr -factor -fallocate -false -fc -fg -fgrep -find -find-repos-of-install -findmnt -flock -fmt -fold -free -gapplication -gawk -gdbus -gencat -getconf -getent -getfacl -getopt -getopts -gio -gio-querymodules-64 -glib-compile-schemas -gpasswd -gpg -gpg-agent -gpg-connect-agent -gpg-error -gpg-zip -gpg2 -gpgconf -gpgparsemail -gpgsplit -gpgv -gpgv2 -gprof -grep -groups -gsettings -gtar -gunzip -gzexe -gzip -head -hexdump -hostid -hostname -hostnamectl -i386 -iconv -id -idn -igawk -info -infocmp -infokey -infotocap -install -ionice -ipcmk -ipcrm -ipcs -isosize -jobs -join -journalctl -kernel-install -kill -kmod -lastlog -lchfn -lchsh -ld -ld.bfd -ld.gold -ldd -link -linux32 -linux64 -ln -locale -localectl -localedef -logger -login -loginctl -logname -look -ls -lsblk -lscpu -lsinitrd -lsipc -lslocks -lslogins -lsmem -lsns -lua -luac -lz4 -lz4c -lz4cat -machinectl -makedb -mcookie -md5sum -mkdir -mkfifo -mkinitrd -mknod -mktemp -modutil -more -mount -mountpoint -mv -namei -needs-restarting -newgrp -nice -nisdomainname -nl -nm -nohup -nproc -nsenter -numfmt -objcopy -objdump -od -oldfind -p11-kit -package-cleanup -passwd -paste -pathchk -pgawk -pgrep -pinentry -pinentry-curses -ping -ping6 -pinky -pk12util -pkg-config -pkill -pldd -pmap -pr -printenv -printf -prlimit -ps -ptx -pwd -pwdx -pwmake -pwscore -pydoc -python -python2 -python2.7 -ranlib -raw -read -readelf -readlink -realpath -rename -renice -repo-graph -repo-rss -repoclosure -repodiff -repomanage -repoquery -reposync -repotrack -reset -rev -rm -rmdir -rpcgen -rpm -rpm2cpio -rpmdb -rpmkeys -rpmquery -rpmverify -runcon -rvi -rview -script -scriptreplay -sdiff -sed -seq -setarch -setfacl -setpriv -setsid -setterm -setup-nsssysinit -setup-nsssysinit.sh -sg -sh -sha1sum -sha224sum -sha256sum -sha384sum -sha512sum -show-changed-rco -show-installed -shred -shuf -signtool -signver -size -skill -slabtop -sleep -snice -sort -sotruss -split -sprof -sqlite3 -ssltap -stat -stdbuf -strings -strip -stty -su -sum -sync -systemctl -systemd-analyze -systemd-ask-password -systemd-cat -systemd-cgls -systemd-cgtop -systemd-coredumpctl -systemd-delta -systemd-detect-virt -systemd-escape -systemd-firstboot -systemd-hwdb -systemd-inhibit -systemd-loginctl -systemd-machine-id-setup -systemd-notify -systemd-nspawn -systemd-path -systemd-run -systemd-stdio-bridge -systemd-tmpfiles -systemd-tty-ask-password-agent -tabs -tac -tail -tailf -tar -taskset -tee -test -testgdbm -tic -timedatectl -timeout -tload -toe -top -touch -tput -tr -tracepath -tracepath6 -true -truncate -trust -tset -tsort -tty -tzselect -udevadm -ul -umask -umount -unalias -uname -unexpand -uniq -unlink -unlz4 -unshare -unxz -update-ca-trust -update-mime-database -uptime -urlgrabber -users -utmpdump -uuidgen -vdir -verifytree -vi -view -vmstat -w -wait -watch -watchgnupg -wc -wdctl -whereis -who -whoami -write -x86_64 -xargs -xmlcatalog -xmllint -xmlwf -xz -xzcat -xzcmp -xzdec -xzdiff -xzegrep -xzfgrep -xzgrep -xzless -xzmore -yes -ypdomainname -yum -yum-builddep -yum-config-manager -yum-debug-dump -yum-debug-restore -yum-groups-manager -yumdownloader -zcat -zcmp -zdiff -zegrep -zfgrep -zforce -zgrep -zless -zmore -znew -addgnupghome -addpart -adduser -agetty -alternatives -applygnupgdefaults -arping -blkdeactivate -blkdiscard -blkid -blockdev -build-locale-archive -capsh -cfdisk -chcpu -chkconfig -chpasswd -chroot -clock -clockdiff -cracklib-check -cracklib-format -cracklib-packer -cracklib-unpacker -create-cracklib-dict -ctrlaltdel -delpart -depmod -dmfilemapd -dmsetup -dmstats -dracut -faillock -fdformat -fdisk -findfs -fsck -fsck.cramfs -fsck.minix -fsfreeze -fstrim -getcap -getpcaps -glibc_post_upgrade.x86_64 -groupadd -groupdel -groupmems -groupmod -grpck -grpconv -grpunconv -halt -hardlink -hwclock -iconvconfig -iconvconfig.x86_64 -ifenslave -init -insmod -install-info -kpartx -lchage -ldattach -ldconfig -lgroupadd -lgroupdel -lgroupmod -lid -lnewusers -losetup -lpasswd -lsmod -luseradd -luserdel -lusermod -mkdict -mkfs -mkfs.cramfs -mkfs.minix -mkhomedir_helper -mkswap -modinfo -modprobe -newusers -nologin -packer -pam_console_apply -pam_tally2 -pam_timestamp_check -partx -ping6 -pivot_root -poweroff -pwck -pwconv -pwhistory_helper -pwunconv -rdisc -readprofile -reboot -resizepart -rmmod -rtcwake -runlevel -runuser -sasldblistusers2 -saslpasswd2 -sefcontext_compile -setcap -sfdisk -shutdown -sln -sulogin -swaplabel -swapoff -swapon -switch_root -sysctl -telinit -tracepath -tracepath6 -udevadm -unix_chkpwd -unix_update -update-alternatives -useradd -userdel -usermod -vigr -vipw -weak-modules -wipefs -yum-complete-transaction -yumdb -zdump -zic -zramctl [ +addgnupghome +addgroup +addpart addr2line +add-shell +adduser +agetty alias +alternatives +applydeltarpm +applygnupgdefaults +apt +apt-cache +apt-cdrom +apt-config +apt-get +apt-key +apt-mark ar arch +arpd +arping as awk +b2sum +badblocks +base32 base64 basename +basenc bash bashbug bashbug-64 bg +bin +blkdeactivate +blkdiscard +blkid +blkzone +blockdev +boot bootctl +bridge +brotli +build-locale-archive +bunzip2 busctl -c++filt -ca-legacy +bzcat +bzcmp +bzdiff +bzegrep +bzexe +bzfgrep +bzgrep +bzip2 +bzip2recover +bzless +bzmore cal +ca-legacy +capsh captoinfo cat catchsegv cd certutil +cfdisk +c++filt chacl chage chardetect +chattr chcon +chcpu chfn +chgpasswd chgrp +chkconfig chmem chmod +choom chown +chpasswd +chroot chrt chsh cksum clear +clear_console +clock +clockdiff cmp cmsutil +code col colcrt colrm column +com +combinedeltarpm comm command coredumpctl cp +cpgr cpio +cppw +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +create-cracklib-dict crlutil csplit +ctrlaltdel +ctstat curl cut +cvtsudoers +cz +dash date db_archive db_checkpoint @@ -2637,8 +138,9 @@ db_replicate db_stat db_tuner db_upgrade -db_verify dbus-binding-tool +dbus-broker +dbus-broker-launch dbus-cleanup-sockets dbus-daemon dbus-monitor @@ -2647,120 +149,250 @@ dbus-send dbus-test-tool dbus-update-activation-environment dbus-uuidgen +db_verify dd +debconf +debconf-apt-progress +debconf-communicate +debconf-copydb +debconf-escape +debconf-set-selections +debconf-show +deb-systemd-helper +deb-systemd-invoke +debugfs debuginfo-install +delgroup +delpart +deluser +depmod +dev +devlink df dgawk diff diff3 dir dircolors +dirmngr +dirmngr-client dirname dmesg +dmfilemapd +dmsetup +dmstats +dnf +dnf-3 dnsdomainname +docker domainname +dpkg +dpkg-deb +dpkg-divert +dpkg-maintscript-helper +dpkg-preconfigure +dpkg-query +dpkg-reconfigure +dpkg-split +dpkg-statoverride +dpkg-trigger dracut du +dumpe2fs dwp +e2freefrag +e2fsck +e2image +e2label +e2mmpstatus +e2undo +e4crypt +e4defrag +easy_install-3.7 echo egrep eject elfedit env +etc +evmctl ex expand +expiry expr factor +faillock +faillog fallocate false fc +fdformat +fdisk +ffmpeg fg fgrep +filefrag +fincore find -find-repos-of-install +findfs findmnt +find-repos-of-install +fips-finish-install +fips-mode-setup +fish flock fmt fold free +fsck +fsck.cramfs +fsck.ext2 +fsck.ext3 +fsck.ext4 +fsck.minix +fsfreeze +fstab-decode +fstrim +g13 +g13-syshelp gapplication gawk gdbus gencat +genl +getcap getconf getent getfacl getopt getopts +getpcaps +getty gio +gio-launch-desktop gio-querymodules-64 +git +github.com glib-compile-schemas +glibc_post_upgrade.x86_64 +go gpasswd gpg +gpg2 gpg-agent +gpgconf gpg-connect-agent gpg-error -gpg-zip -gpg2 -gpgconf +gpgme-json gpgparsemail gpgsplit gpgv gpgv2 +gpg-wks-server +gpg-zip gprof grep +groupadd +groupdel +groupmems +groupmod groups +grpck +grpconv +grpunconv gsettings gtar gunzip gzexe gzip +halt +hardlink +hash head hexdump +home hostid hostname hostnamectl +http +https +hwclock i386 iconv +iconvconfig +iconvconfig.x86_64 id idn +ifenslave igawk info infocmp infokey infotocap +init +initctl +insmod install +install-info +installkernel +invoke-rc.d ionice +ip ipcmk ipcrm ipcs +ischroot isosize jobs join journalctl +jq kernel-install kill +killall5 kmod +kpartx +last +lastb lastlog +lchage lchfn lchsh ld +ldattach ld.bfd -ld.gold +ldconfig +ldconfig.real ldd +ld.gold +lgroupadd +lgroupdel +lgroupmod +lib +lib64 +lid link linux32 linux64 ln +lnewusers +lnstat locale +locale-check localectl localedef +localhost logger login loginctl logname +logsave look +losetup +lost+found +lpasswd ls +lsattr lsblk lscpu lsinitrd @@ -2768,114 +400,229 @@ lsipc lslocks lslogins lsmem +lsmod lsns lua luac +luseradd +luserdel +lusermod lz4 lz4c lz4cat machinectl +make makedb +makedeltarpm +make-dummy-cert +mawk mcookie md5sum +md5sum.textutils +media +mesg +mkdict mkdir +mke2fs mkfifo +mkfs +mkfs.bfs +mkfs.cramfs +mkfs.ext2 +mkfs.ext3 +mkfs.ext4 +mkfs.minix +mkhomedir_helper mkinitrd +mklost+found mknod +mkpasswd +mkswap mktemp +mnt +modinfo +modprobe +modulemd-validator-v1 modutil more mount mountpoint mv namei +nawk needs-restarting +networkctl +newgidmap newgrp +newuidmap +newusers nice nisdomainname nl nm nohup +nologin nproc nsenter +nstat numfmt objcopy objdump od oldfind +openssl +opt p11-kit package-cleanup +packer +pager +pam-auth-update +pam_console_apply +pam_extrausers_chkpwd +pam_extrausers_update +pam_getenv +pam_tally +pam_tally2 +pam_timestamp_check +partx passwd paste pathchk +perl +perl5.26.1 +perl5.28.1 pgawk pgrep +pidof pinentry pinentry-curses ping +ping4 ping6 pinky +pip-3 +pip3 +pip-3.7 +pip3.7 +pivot_root pk12util pkg-config pkill pldd pmap +policy-rc.d +portablectl +poweroff pr printenv printf prlimit +proc ps ptx +pwck +pwconv pwd pwdx +pwhistory_helper pwmake pwscore +pwunconv pydoc +pydoc3 +pydoc3.7 python python2 python2.7 +python3 +python3.7 +python3.7m +pyvenv +pyvenv-3.7 ranlib raw +rbash +rdisc +rdma read readelf readlink +readprofile realpath +reboot +remove-shell rename +rename.ul +renew-dummy-cert renice -repo-graph -repo-rss repoclosure repodiff +repo-graph repomanage repoquery +repo-rss reposync repotrack reset +resize2fs +resizepart +resolvconf +resolvectl rev +rfkill +rgrep rm rmdir +rmmod +rmt +rmt-tar +root +routef +routel rpcgen rpm +rpm2archive rpm2cpio rpmdb +rpmdumpheader rpmkeys rpmquery rpmverify +rtacct +rtcwake +rtmon +rtstat +run runcon +runlevel +run-parts +runuser rvi rview +sasldblistusers2 +saslpasswd2 +savelog +sbin script scriptreplay sdiff sed +sefcontext_compile +select-editor +sensible-browser +sensible-editor +sensible-pager seq +service setarch +setcap setfacl setpriv setsid setterm setup-nsssysinit setup-nsssysinit.sh +sfdisk sg sh sha1sum @@ -2883,31 +630,49 @@ sha224sum sha256sum sha384sum sha512sum +shadowconfig +sh.distrib show-changed-rco show-installed shred shuf +shutdown signtool signver size skill slabtop sleep +sln snice sort sotruss split sprof sqlite3 +srv +ss +ssh ssltap +start-stop-daemon stat stdbuf strings strip stty su +sudo +sudoedit +sudoreplay +sulogin sum +swaplabel +swapoff +swapon +switch_root sync +sys +sysctl systemctl systemd-analyze systemd-ask-password @@ -2920,29 +685,41 @@ systemd-detect-virt systemd-escape systemd-firstboot systemd-hwdb +systemd-id128 systemd-inhibit systemd-loginctl systemd-machine-id-setup +systemd-mount systemd-notify systemd-nspawn systemd-path +systemd-resolve systemd-run +systemd-socket-activate systemd-stdio-bridge +systemd-sysusers systemd-tmpfiles systemd-tty-ask-password-agent +systemd-umount tabs tac tail tailf tar +tarcat taskset +tc tee +telinit +tempfile test testgdbm tic timedatectl timeout +tipc tload +tmp toe top touch @@ -2956,40 +733,68 @@ trust tset tsort tty +tune2fs +type +tzconfig tzselect udevadm ul +ulimit umask umount unalias uname +uname26 +unbound-anchor +uncompress unexpand uniq +unix_chkpwd +unix_update unlink unlz4 +unminimize unshare unxz +update-alternatives update-ca-trust +update-crypto-policies update-mime-database +update-passwd +update-rc.d uptime urlgrabber +useradd +userdel +usermod users +usr utmpdump uuidgen +uuidparse +var vdir verifytree vi view +vigr +vipw +visudo vmstat w wait +wall watch watchgnupg wc wdctl +weak-modules whereis +which who whoami +wipefs +w.procps write x86_64 xargs @@ -3010,18 +815,24 @@ yes ypdomainname yum yum-builddep +yum-complete-transaction yum-config-manager +yumdb yum-debug-dump yum-debug-restore -yum-groups-manager yumdownloader +yum-groups-manager zcat zcmp zdiff +zdump zegrep zfgrep zforce zgrep +zic zless zmore -znew \ No newline at end of file +znew +zramctl +zsh From 55f8800a67f786c051017e971943258fad804cd1 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 17:33:31 +0200 Subject: [PATCH 05/41] write sanitized history to file, add sanitizer to install/make --- .gitignore | 1 + Makefile | 44 ++++++++++++++++++++++- sanitize-history/resh-sanitize-history.go | 43 ++++++++++++++++------ sanitizer_data/whitelist.txt | 1 + 4 files changed, 78 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index afd111b..38a9c83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ resh-collect resh-daemon +resh-sanitize-history diff --git a/Makefile b/Makefile index b59032d..1f6631f 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,41 @@ GOFLAGS=-ldflags "-X main.Version=${VERSION} -X main.Revision=${REVISION}" autoinstall: ./install_helper.sh +sanitize: + # + # + # I'm going to create a sanitized version of your resh history. + # Everything is done locally - your history won't leave this machine. + # The way this works is that any sensitive information in your history is going to be replaced with its SHA1 hash. + # There is also going to be a second version with hashes trimed to 12 characters for readability + # + # + # > full hashes: ~/resh_history_sanitized.json + # > 12 char hashes: ~/resh_history_sanitized_trim12.json + # + # + # Encountered any issues? Got questions? -> Hit me up at https://github.com/curusarn/resh/issues + # + # + # Running history sanitization ... + resh-sanitize-history -trim-hashes 0 --output ~/resh_history_sanitized.json + resh-sanitize-history -trim-hashes 12 --output ~/resh_history_sanitized_trim12.json + # + # + # SUCCESS - ALL DONE! + # + # + # PLEASE HAVE A LOOK AT THE RESULT USING THESE COMMANDS: + # + # > pretty print JSON: + @echo 'cat ~/resh_history_sanitized_trim12.json | jq' + # + # > only show executed commands, don't show metadata: + @echo "cat ~/resh_history_sanitized_trim12.json | jq '.[\"cmdLine\"]'" + # + # + # + build: submodules resh-collect resh-daemon resh-sanitize-history @@ -41,24 +76,31 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu # Final touch touch ~/.resh_history.json # + # + # ########################################################## # # # SUCCESS - thank you for trying out this project! # # # ########################################################## # + # # WHAT'S NEXT # Please RESTART ALL OPEN TERMINAL WINDOWS (or reload your rc files) # Your resh history is located in `~/.resh_history.json` # You can look at it using e.g. `tail -f ~/.resh_history.json | jq` # + # # ISSUES # If anything looks broken create an issue: https://github.com/curusarn/resh/issues # You can uninstall this at any time by running `rm -rf ~/.resh/` # You won't lose any collected history by removing `~/.resh` directory # + # # Please give me some contact info using this form: https://forms.gle/227SoyJ5c2iteKt98 # + # + # uninstall: # Uninstalling ... @@ -70,7 +112,7 @@ resh-daemon: daemon/resh-daemon.go common/resh-common.go version resh-collect: collect/resh-collect.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< -resh-sanitize-history: collect/resh-sanitize-history.go common/resh-common.go version +resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 92d3cbe..9dbe9ea 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -38,7 +38,8 @@ func main() { showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") - // outputToStdout := flag.Bool("stdout", false, "Print output to stdout instead of file") + trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters (default: 12), 0 turns off trimming") + outputPath := flag.String("output", "", "Output file") flag.Parse() @@ -50,19 +51,31 @@ func main() { fmt.Println(Revision) os.Exit(0) } - sanitizer := sanitizer{hashLength: 4} + sanitizer := sanitizer{hashLength: *trimHashes} err := sanitizer.init(sanitizerDataPath) if err != nil { log.Fatal("Sanitizer init() error:", err) } - file, err := os.Open(historyPath) + inputFile, err := os.Open(historyPath) if err != nil { log.Fatal("Open() resh history file error:", err) } - defer file.Close() + defer inputFile.Close() - scanner := bufio.NewScanner(file) + var writer *bufio.Writer + useStdout := true + if len(*outputPath) > 0 { + useStdout = false + outputFile, err := os.Create(*outputPath) + if err != nil { + log.Fatal("Create() output file error:", err) + } + defer outputFile.Close() + writer = bufio.NewWriter(outputFile) + } + + scanner := bufio.NewScanner(inputFile) for scanner.Scan() { record := common.Record{} line := scanner.Text() @@ -84,7 +97,21 @@ func main() { log.Println("Line:", line) return } - fmt.Println(string(outLine)) + if useStdout { + fmt.Println(string(outLine)) + } else { + // fmt.Println(string(outLine)) + n, err := writer.WriteString(string(outLine) + "\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } + } + } + if useStdout == false { + writer.Flush() } } @@ -347,11 +374,9 @@ func (s *sanitizer) hashToken(token string) string { return token } // hash with sha1 - // trim to 12 characters h := sha1.New() h.Write([]byte(token)) sum := h.Sum(nil) - // TODO: extend hashes to 12 return s.trimHash(hex.EncodeToString(sum)) } @@ -359,8 +384,6 @@ func (s *sanitizer) hashNumericToken(token string) string { if len(token) <= 0 { return token } - // hash with fnv - // trim to 12 characters h := sha1.New() h.Write([]byte(token)) sum := h.Sum(nil) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 0ed66e8..e765f43 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -568,6 +568,7 @@ resize2fs resizepart resolvconf resolvectl +resh rev rfkill rgrep From c1f6a3a3b050de905aee4537b925a736ebd584a8 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 18:08:34 +0200 Subject: [PATCH 06/41] minor changes, cleanup --- go.mod | 1 - go.sum | 2 -- sanitize-history/resh-sanitize-history.go | 14 +++++++++----- sanitizer_data/whitelist.txt | 3 +++ 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 9ff85f1..28d5ece 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,5 @@ go 1.12 require ( github.com/BurntSushi/toml v0.3.1 - github.com/mattn/go-shellwords v1.0.5 github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa ) diff --git a/go.sum b/go.sum index 629918a..72fef1f 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,4 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/mattn/go-shellwords v1.0.5 h1:JhhFTIOslh5ZsPrpa3Wdg8bF0WI3b44EMblmU9wIsXc= -github.com/mattn/go-shellwords v1.0.5/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk= github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4= diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 9dbe9ea..c2bb51a 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -38,8 +38,8 @@ func main() { showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") - trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters (default: 12), 0 turns off trimming") - outputPath := flag.String("output", "", "Output file") + trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters, '0' turns off trimming") + outputPath := flag.String("output", "", "Output file (default: use stdout)") flag.Parse() @@ -177,11 +177,13 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { } func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { + const optionEndingChars = "=;)" + const optionAllowedChars = "-_" sanCmdLine := "" buff := "" // simple options shouldn't be sanitized - // 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or "=" + // 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or any of "=;)" var optionDetected bool prevR3 := ' ' @@ -190,7 +192,7 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { for _, r := range cmdLine { switch optionDetected { case true: - if unicode.IsSpace(r) || r == '=' || r == ';' { + if unicode.IsSpace(r) || strings.ContainsRune(optionEndingChars, r) { // whitespace, "=" or ";" ends the option // => add option unsanitized optionDetected = false @@ -199,7 +201,8 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { buff = "" } sanCmdLine += string(r) - } else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false && r != '-' && r != '_' { + } else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false && + strings.ContainsRune(optionAllowedChars, r) == false { // r is not any of allowed chars for an option: letter, digit, "-" or "_" // => sanitize if len(buff) > 0 { @@ -217,6 +220,7 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { case false: // split command on all non-letter and non-digit characters if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false { + // TODO: decide if we want to split on tokens // split token if len(buff) > 0 { sanToken, err := s.sanitizeCmdToken(buff) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index e765f43..74b4625 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -104,6 +104,7 @@ com combinedeltarpm comm command +convert coredumpctl cp cpgr @@ -781,6 +782,7 @@ view vigr vipw visudo +vlc vmstat w wait @@ -812,6 +814,7 @@ xzfgrep xzgrep xzless xzmore +yaourt yes ypdomainname yum From bb82c73a50740597290f478fdc0f8d020fb1b152 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 18:55:28 +0200 Subject: [PATCH 07/41] add fallback record for old resh history records, minor changes --- common/resh-common.go | 138 ++++++++++++++++++++++ sanitize-history/resh-sanitize-history.go | 18 +-- 2 files changed, 148 insertions(+), 8 deletions(-) diff --git a/common/resh-common.go b/common/resh-common.go index 0de34ea..ccfc21c 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -1,5 +1,8 @@ package common +import "strconv" + +// Record representing single executed command with its metadata type Record struct { // core CmdLine string `json:"cmdLine"` @@ -65,6 +68,141 @@ type Record struct { CmdLength int `json:"cmdLength"` } +// FallbackRecord when record is too old and can't be parsed into regular Record +type FallbackRecord struct { + // older version of the record where cols and lines are int + + // core + CmdLine string `json:"cmdLine"` + ExitCode int `json:"exitCode"` + Shell string `json:"shell"` + Uname string `json:"uname"` + SessionId string `json:"sessionId"` + + // posix + Cols int `json:"cols"` // notice the in type + Lines int `json:"lines"` // notice the in type + Home string `json:"home"` + Lang string `json:"lang"` + LcAll string `json:"lcAll"` + Login string `json:"login"` + //Path string `json:"path"` + Pwd string `json:"pwd"` + PwdAfter string `json:"pwdAfter"` + ShellEnv string `json:"shellEnv"` + Term string `json:"term"` + + // non-posix"` + RealPwd string `json:"realPwd"` + RealPwdAfter string `json:"realPwdAfter"` + Pid int `json:"pid"` + SessionPid int `json:"sessionPid"` + Host string `json:"host"` + Hosttype string `json:"hosttype"` + Ostype string `json:"ostype"` + Machtype string `json:"machtype"` + Shlvl int `json:"shlvl"` + + // before after + TimezoneBefore string `json:"timezoneBefore"` + TimezoneAfter string `json:"timezoneAfter"` + + RealtimeBefore float64 `json:"realtimeBefore"` + RealtimeAfter float64 `json:"realtimeAfter"` + RealtimeBeforeLocal float64 `json:"realtimeBeforeLocal"` + RealtimeAfterLocal float64 `json:"realtimeAfterLocal"` + + RealtimeDuration float64 `json:"realtimeDuration"` + RealtimeSinceSessionStart float64 `json:"realtimeSinceSessionStart"` + RealtimeSinceBoot float64 `json:"realtimeSinceBoot"` + //Logs []string `json: "logs"` + + GitDir string `json:"gitDir"` + GitRealDir string `json:"gitRealDir"` + GitOriginRemote string `json:"gitOriginRemote"` + MachineId string `json:"machineId"` + + OsReleaseId string `json:"osReleaseId"` + OsReleaseVersionId string `json:"osReleaseVersionId"` + OsReleaseIdLike string `json:"osReleaseIdLike"` + OsReleaseName string `json:"osReleaseName"` + OsReleasePrettyName string `json:"osReleasePrettyName"` + + ReshUuid string `json:"reshUuid"` + ReshVersion string `json:"reshVersion"` + ReshRevision string `json:"reshRevision"` + + // added by sanitizatizer + CmdLength int `json:"cmdLength"` +} + +// ConvertRecord from FallbackRecord to Record +func ConvertRecord(r *FallbackRecord) Record { + return Record{ + // core + CmdLine: r.CmdLine, + ExitCode: r.ExitCode, + Shell: r.Shell, + Uname: r.Uname, + SessionId: r.SessionId, + + // posix + // these two lines are the only reason we are doing this + Cols: strconv.Itoa(r.Cols), + Lines: strconv.Itoa(r.Lines), + + Home: r.Home, + Lang: r.Lang, + LcAll: r.LcAll, + Login: r.Login, + // Path: r.path, + Pwd: r.Pwd, + PwdAfter: r.PwdAfter, + ShellEnv: r.ShellEnv, + Term: r.Term, + + // non-posix + RealPwd: r.RealPwd, + RealPwdAfter: r.RealPwdAfter, + Pid: r.Pid, + SessionPid: r.SessionPid, + Host: r.Host, + Hosttype: r.Hosttype, + Ostype: r.Ostype, + Machtype: r.Machtype, + Shlvl: r.Shlvl, + + // before after + TimezoneBefore: r.TimezoneBefore, + TimezoneAfter: r.TimezoneAfter, + + RealtimeBefore: r.RealtimeBefore, + RealtimeAfter: r.RealtimeAfter, + RealtimeBeforeLocal: r.RealtimeBeforeLocal, + RealtimeAfterLocal: r.RealtimeAfterLocal, + + RealtimeDuration: r.RealtimeDuration, + RealtimeSinceSessionStart: r.RealtimeSinceSessionStart, + RealtimeSinceBoot: r.RealtimeSinceBoot, + + GitDir: r.GitDir, + GitRealDir: r.GitRealDir, + GitOriginRemote: r.GitOriginRemote, + MachineId: r.MachineId, + + OsReleaseId: r.OsReleaseId, + OsReleaseVersionId: r.OsReleaseVersionId, + OsReleaseIdLike: r.OsReleaseIdLike, + OsReleaseName: r.OsReleaseName, + OsReleasePrettyName: r.OsReleasePrettyName, + + ReshUuid: r.ReshUuid, + ReshVersion: r.ReshVersion, + ReshRevision: r.ReshRevision, + } +} + +// Config struct type Config struct { Port int } diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index c2bb51a..890f48f 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -78,24 +78,26 @@ func main() { scanner := bufio.NewScanner(inputFile) for scanner.Scan() { record := common.Record{} + fallbackRecord := common.FallbackRecord{} line := scanner.Text() err = json.Unmarshal([]byte(line), &record) if err != nil { - log.Println("Decoding error:", err) - log.Println("Line:", line) - return + err = json.Unmarshal([]byte(line), &fallbackRecord) + if err != nil { + log.Println("Line:", line) + log.Fatal("Decoding error:", err) + } + record = common.ConvertRecord(&fallbackRecord) } err = sanitizer.sanitizeRecord(&record) if err != nil { - log.Println("Sanitization error:", err) log.Println("Line:", line) - return + log.Fatal("Sanitization error:", err) } outLine, err := json.Marshal(&record) if err != nil { - log.Println("Encoding error:", err) log.Println("Line:", line) - return + log.Fatal("Encoding error:", err) } if useStdout { fmt.Println(string(outLine)) @@ -220,7 +222,7 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { case false: // split command on all non-letter and non-digit characters if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false { - // TODO: decide if we want to split on tokens + // TODO: decide if we want to split on "-" and "_" // split token if len(buff) > 0 { sanToken, err := s.sanitizeCmdToken(buff) From 0900bc444167297df3f02cb12a02c228d546c9e0 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 20:11:11 +0200 Subject: [PATCH 08/41] change allowed characters for options, add bash keywords and builtins to whitelist --- sanitize-history/resh-sanitize-history.go | 4 +- sanitizer_data/whitelist.txt | 61 ++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 890f48f..5122d99 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -179,8 +179,8 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { } func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { - const optionEndingChars = "=;)" - const optionAllowedChars = "-_" + const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=" // all bash control characters and '=' which commonly ends options w/ values + const optionAllowedChars = "-_" // characters commonly found inside of options sanCmdLine := "" buff := "" diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 74b4625..4b7da1c 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -1,7 +1,13 @@ +! . .. +: [ +[[ +]] +{ +} addgnupghome addgroup addpart @@ -37,6 +43,7 @@ bashbug bashbug-64 bg bin +bind blkdeactivate blkdiscard blkid @@ -44,9 +51,11 @@ blkzone blockdev boot bootctl +break bridge brotli build-locale-archive +builtin bunzip2 busctl bzcat @@ -62,8 +71,10 @@ bzless bzmore cal ca-legacy +caller capsh captoinfo +case cat catchsegv cd @@ -104,7 +115,12 @@ com combinedeltarpm comm command +compgen +complete +compopt +continue convert +coproc coredumpctl cp cpgr @@ -163,6 +179,7 @@ deb-systemd-helper deb-systemd-invoke debugfs debuginfo-install +declare delgroup delpart deluser @@ -178,6 +195,8 @@ dircolors dirmngr dirmngr-client dirname +dirs +disown dmesg dmfilemapd dmsetup @@ -185,8 +204,10 @@ dmstats dnf dnf-3 dnsdomainname +do docker domainname +done dpkg dpkg-deb dpkg-divert @@ -214,12 +235,20 @@ echo egrep eject elfedit +elif +else +enable env +esac etc +eval evmctl ex +exec +exit expand expiry +export expr factor faillock @@ -232,6 +261,7 @@ fdisk ffmpeg fg fgrep +fi filefrag fincore find @@ -244,6 +274,7 @@ fish flock fmt fold +for free fsck fsck.cramfs @@ -254,6 +285,7 @@ fsck.minix fsfreeze fstab-decode fstrim +function g13 g13-syshelp gapplication @@ -310,7 +342,9 @@ halt hardlink hash head +help hexdump +history home hostid hostname @@ -324,8 +358,10 @@ iconvconfig iconvconfig.x86_64 id idn +if ifenslave igawk +in info infocmp infokey @@ -366,6 +402,7 @@ ldconfig ldconfig.real ldd ld.gold +let lgroupadd lgroupdel lgroupmod @@ -378,6 +415,7 @@ linux64 ln lnewusers lnstat +local locale locale-check localectl @@ -387,6 +425,7 @@ logger login loginctl logname +logout logsave look losetup @@ -416,6 +455,7 @@ make makedb makedeltarpm make-dummy-cert +mapfile mawk mcookie md5sum @@ -512,6 +552,7 @@ pkill pldd pmap policy-rc.d +popd portablectl poweroff pr @@ -521,6 +562,7 @@ prlimit proc ps ptx +pushd pwck pwconv pwd @@ -546,8 +588,10 @@ rbash rdisc rdma read +readarray readelf readlink +readonly readprofile realpath reboot @@ -565,11 +609,12 @@ repo-rss reposync repotrack reset +resh resize2fs resizepart resolvconf resolvectl -resh +return rev rfkill rgrep @@ -610,12 +655,14 @@ scriptreplay sdiff sed sefcontext_compile +select select-editor sensible-browser sensible-editor sensible-pager seq service +set setarch setcap setfacl @@ -634,6 +681,8 @@ sha384sum sha512sum shadowconfig sh.distrib +shift +shopt show-changed-rco show-installed shred @@ -649,6 +698,7 @@ sln snice sort sotruss +source split sprof sqlite3 @@ -668,6 +718,7 @@ sudoedit sudoreplay sulogin sum +suspend swaplabel swapoff swapon @@ -716,9 +767,12 @@ telinit tempfile test testgdbm +then tic +time timedatectl timeout +times tipc tload tmp @@ -729,6 +783,7 @@ tput tr tracepath tracepath6 +trap true truncate trust @@ -737,6 +792,7 @@ tsort tty tune2fs type +typeset tzconfig tzselect udevadm @@ -756,7 +812,9 @@ unix_update unlink unlz4 unminimize +unset unshare +until unxz update-alternatives update-ca-trust @@ -794,6 +852,7 @@ wdctl weak-modules whereis which +while who whoami wipefs From cad811dfbaa5f52519cdb426920468e94f4d2182 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 20:21:08 +0200 Subject: [PATCH 09/41] add zsh keywords and builtins to whitelist --- sanitizer_data/whitelist.txt | 136 +++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 4b7da1c..ac47bba 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -899,3 +899,139 @@ zmore znew zramctl zsh +run-help +which-command +! +[[ +case +coproc +declare +do +done +elif +else +end +esac +export +fi +float +for +foreach +function +if +integer +local +nocorrect +readonly +repeat +select +then +time +typeset +until +while +{ +} +- +. +: +[ +alias +autoload +bg +bindkey +break +builtin +bye +cd +chdir +command +compadd +comparguments +compcall +compctl +compdescribe +compfiles +compgroups +compquote +compset +comptags +comptry +compvalues +continue +declare +dirs +disable +disown +echo +echotc +echoti +emulate +enable +eval +exec +exit +export +false +fc +fg +float +functions +getln +getopts +hash +history +integer +jobs +kill +let +limit +local +log +logout +noglob +popd +print +printf +private +pushd +pushln +pwd +r +read +readonly +rehash +return +sched +set +setopt +shift +source +suspend +test +times +trap +true +ttyctl +type +typeset +ulimit +umask +unalias +unfunction +unhash +unlimit +unset +unsetopt +vared +wait +whence +where +which +zcompile +zformat +zle +zmodload +zparseopts +zregexparse +zstyle From c9f703bc7f33755c2b7e69fb347bcb5fe0c8649a Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 20:24:27 +0200 Subject: [PATCH 10/41] add git subcommands to whitelist and sort --- sanitizer_data/whitelist.txt | 204 ++++++++++++----------------------- 1 file changed, 69 insertions(+), 135 deletions(-) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index ac47bba..08210e6 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -1,5 +1,6 @@ ! +- . .. : @@ -8,6 +9,7 @@ ]] { } +add addgnupghome addgroup addpart @@ -31,6 +33,7 @@ arch arpd arping as +autoload awk b2sum badblocks @@ -44,6 +47,8 @@ bashbug-64 bg bin bind +bindkey +bisect blkdeactivate blkdiscard blkid @@ -51,6 +56,7 @@ blkzone blockdev boot bootctl +branch break bridge brotli @@ -58,6 +64,7 @@ build-locale-archive builtin bunzip2 busctl +bye bzcat bzcmp bzdiff @@ -87,6 +94,8 @@ chardetect chattr chcon chcpu +chdir +checkout chfn chgpasswd chgrp @@ -104,6 +113,7 @@ clear clear_console clock clockdiff +clone cmp cmsutil code @@ -115,9 +125,22 @@ com combinedeltarpm comm command +commit +compadd +comparguments +compcall +compctl +compdescribe +compfiles compgen +compgroups complete compopt +compquote +compset +comptags +comptry +compvalues continue convert coproc @@ -196,6 +219,7 @@ dirmngr dirmngr-client dirname dirs +disable disown dmesg dmfilemapd @@ -232,12 +256,16 @@ e4crypt e4defrag easy_install-3.7 echo +echotc +echoti egrep eject elfedit elif else +emulate enable +end env esac etc @@ -258,6 +286,7 @@ false fc fdformat fdisk +fetch ffmpeg fg fgrep @@ -271,10 +300,12 @@ find-repos-of-install fips-finish-install fips-mode-setup fish +float flock fmt fold for +foreach free fsck fsck.cramfs @@ -286,6 +317,7 @@ fsfreeze fstab-decode fstrim function +functions g13 g13-syshelp gapplication @@ -297,6 +329,7 @@ getcap getconf getent getfacl +getln getopt getopts getpcaps @@ -372,6 +405,7 @@ insmod install install-info installkernel +integer invoke-rc.d ionice ip @@ -409,6 +443,7 @@ lgroupmod lib lib64 lid +limit link linux32 linux64 @@ -421,6 +456,7 @@ locale-check localectl localedef localhost +log logger login loginctl @@ -461,6 +497,7 @@ mcookie md5sum md5sum.textutils media +merge mesg mkdict mkdir @@ -501,6 +538,8 @@ nice nisdomainname nl nm +nocorrect +noglob nohup nologin nproc @@ -556,13 +595,18 @@ popd portablectl poweroff pr +print printenv printf +private prlimit proc ps ptx +pull +push pushd +pushln pwck pwconv pwd @@ -582,6 +626,7 @@ python3.7 python3.7m pyvenv pyvenv-3.7 +r ranlib raw rbash @@ -594,12 +639,15 @@ readlink readonly readprofile realpath +rebase reboot +rehash remove-shell rename rename.ul renew-dummy-cert renice +repeat repoclosure repodiff repo-graph @@ -641,6 +689,7 @@ rtmon rtstat run runcon +run-help runlevel run-parts runuser @@ -650,6 +699,7 @@ sasldblistusers2 saslpasswd2 savelog sbin +sched script scriptreplay sdiff @@ -666,6 +716,7 @@ set setarch setcap setfacl +setopt setpriv setsid setterm @@ -683,6 +734,7 @@ shadowconfig sh.distrib shift shopt +show show-changed-rco show-installed shred @@ -708,6 +760,7 @@ ssh ssltap start-stop-daemon stat +status stdbuf strings strip @@ -756,6 +809,7 @@ systemd-tty-ask-password-agent systemd-umount tabs tac +tag tail tailf tar @@ -790,6 +844,7 @@ trust tset tsort tty +ttyctl tune2fs type typeset @@ -806,13 +861,17 @@ uname26 unbound-anchor uncompress unexpand +unfunction +unhash uniq unix_chkpwd unix_update +unlimit unlink unlz4 unminimize unset +unsetopt unshare until unxz @@ -833,6 +892,7 @@ utmpdump uuidgen uuidparse var +vared vdir verifytree vi @@ -850,8 +910,11 @@ watchgnupg wc wdctl weak-modules +whence +where whereis which +which-command while who whoami @@ -887,151 +950,22 @@ yumdownloader yum-groups-manager zcat zcmp +zcompile zdiff zdump zegrep zfgrep zforce +zformat zgrep zic +zle zless +zmodload zmore znew -zramctl -zsh -run-help -which-command -! -[[ -case -coproc -declare -do -done -elif -else -end -esac -export -fi -float -for -foreach -function -if -integer -local -nocorrect -readonly -repeat -select -then -time -typeset -until -while -{ -} -- -. -: -[ -alias -autoload -bg -bindkey -break -builtin -bye -cd -chdir -command -compadd -comparguments -compcall -compctl -compdescribe -compfiles -compgroups -compquote -compset -comptags -comptry -compvalues -continue -declare -dirs -disable -disown -echo -echotc -echoti -emulate -enable -eval -exec -exit -export -false -fc -fg -float -functions -getln -getopts -hash -history -integer -jobs -kill -let -limit -local -log -logout -noglob -popd -print -printf -private -pushd -pushln -pwd -r -read -readonly -rehash -return -sched -set -setopt -shift -source -suspend -test -times -trap -true -ttyctl -type -typeset -ulimit -umask -unalias -unfunction -unhash -unlimit -unset -unsetopt -vared -wait -whence -where -which -zcompile -zformat -zle -zmodload zparseopts +zramctl zregexparse +zsh zstyle From c48b81958902b38337fb66a5f682f689114870d5 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 20:47:44 +0200 Subject: [PATCH 11/41] dont sanitize single char tokens, add file extensions to whitelist, minor giturl fix --- sanitize-history/resh-sanitize-history.go | 13 ++++- sanitizer_data/whitelist.txt | 69 +++++++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 5122d99..c90812c 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -264,6 +264,9 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { } func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { + if len(rawURL) <= 0 { + return rawURL, nil + } parsedURL, err := giturls.Parse(rawURL) if err != nil { return rawURL, err @@ -272,6 +275,9 @@ func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { } func (s *sanitizer) sanitizeURL(rawURL string) (string, error) { + if len(rawURL) <= 0 { + return rawURL, nil + } parsedURL, err := url.Parse(rawURL) if err != nil { return rawURL, err @@ -280,7 +286,6 @@ func (s *sanitizer) sanitizeURL(rawURL string) (string, error) { } func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) { - // Scheme string parsedURL.Opaque = s.sanitizeToken(parsedURL.Opaque) userinfo := parsedURL.User.Username() // only get username => password won't even make it to the sanitized data @@ -330,7 +335,8 @@ func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { // there shouldn't be tokens with letters or digits mixed together with symbols - if len(token) <= 0 { + if len(token) <= 1 { + // NOTE: do not sanitize single letter tokens return token, nil } if s.whitelist[token] == true { @@ -366,7 +372,8 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { } func (s *sanitizer) sanitizeToken(token string) string { - if len(token) <= 0 { + if len(token) <= 1 { + // NOTE: do not sanitize single letter tokens return token } if s.whitelist[token] { diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 08210e6..036b72a 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -33,6 +33,8 @@ arch arpd arping as +asm +au autoload awk b2sum @@ -49,11 +51,13 @@ bin bind bindkey bisect +blend blkdeactivate blkdiscard blkid blkzone blockdev +bmp boot bootctl branch @@ -65,6 +69,7 @@ builtin bunzip2 busctl bye +bz2 bzcat bzcmp bzdiff @@ -76,6 +81,7 @@ bzip2 bzip2recover bzless bzmore +c cal ca-legacy caller @@ -84,9 +90,11 @@ captoinfo case cat catchsegv +cc cd certutil cfdisk +cfg c++filt chacl chage @@ -109,6 +117,7 @@ chroot chrt chsh cksum +class clear clear_console clock @@ -141,6 +150,7 @@ compset comptags comptry compvalues +conf continue convert coproc @@ -148,6 +158,7 @@ coredumpctl cp cpgr cpio +cpp cppw cracklib-check cracklib-format @@ -156,6 +167,8 @@ cracklib-unpacker create-cracklib-dict crlutil csplit +css +csv ctrlaltdel ctstat curl @@ -164,6 +177,7 @@ cvtsudoers cz dash date +db db_archive db_checkpoint db_deadlock @@ -191,6 +205,7 @@ dbus-update-activation-environment dbus-uuidgen db_verify dd +deb debconf debconf-apt-progress debconf-communicate @@ -207,6 +222,7 @@ delgroup delpart deluser depmod +desktop dev devlink df @@ -243,6 +259,7 @@ dpkg-split dpkg-statoverride dpkg-trigger dracut +dtd du dumpe2fs dwp @@ -334,6 +351,7 @@ getopt getopts getpcaps getty +gif gio gio-launch-desktop gio-querymodules-64 @@ -369,8 +387,10 @@ grpunconv gsettings gtar gunzip +gz gzexe gzip +h halt hardlink hash @@ -382,6 +402,8 @@ home hostid hostname hostnamectl +htm +html http https hwclock @@ -414,6 +436,8 @@ ipcrm ipcs ischroot isosize +jar +java jobs join journalctl @@ -423,6 +447,9 @@ kill killall5 kmod kpartx +ksp +kss +kwd last lastb lastlog @@ -486,15 +513,21 @@ lusermod lz4 lz4c lz4cat +m3u +m4a +m4p machinectl make makedb makedeltarpm make-dummy-cert +man mapfile mawk mcookie +md5 md5sum +md5sums md5sum.textutils media merge @@ -518,6 +551,7 @@ mkpasswd mkswap mktemp mnt +mo modinfo modprobe modulemd-validator-v1 @@ -525,6 +559,8 @@ modutil more mount mountpoint +mp3 +mpg mv namei nawk @@ -546,9 +582,11 @@ nproc nsenter nstat numfmt +o objcopy objdump od +ogg oldfind openssl opt @@ -567,12 +605,17 @@ pam_timestamp_check partx passwd paste +patch pathchk +pdf perl perl5.26.1 perl5.28.1 pgawk pgrep +php +phps +phtml pidof pinentry pinentry-curses @@ -588,11 +631,15 @@ pivot_root pk12util pkg-config pkill +pl pldd +pls pmap +png policy-rc.d popd portablectl +pov poweroff pr print @@ -601,6 +648,7 @@ printf private prlimit proc +properties ps ptx pull @@ -615,9 +663,12 @@ pwhistory_helper pwmake pwscore pwunconv +py +pyc pydoc pydoc3 pydoc3.7 +pyo python python2 python2.7 @@ -630,6 +681,8 @@ r ranlib raw rbash +rc +rdf rdisc rdma read @@ -685,6 +738,7 @@ rpmquery rpmverify rtacct rtcwake +rtf rtmon rtstat run @@ -695,6 +749,7 @@ run-parts runuser rvi rview +s sasldblistusers2 saslpasswd2 savelog @@ -731,6 +786,7 @@ sha256sum sha384sum sha512sum shadowconfig +share sh.distrib shift shopt @@ -748,6 +804,7 @@ slabtop sleep sln snice +so sort sotruss source @@ -821,6 +878,8 @@ telinit tempfile test testgdbm +tga +tgz then tic time @@ -843,9 +902,11 @@ truncate trust tset tsort +ttf tty ttyctl tune2fs +txt type typeset tzconfig @@ -907,6 +968,7 @@ wait wall watch watchgnupg +wav wc wdctl weak-modules @@ -923,9 +985,14 @@ w.procps write x86_64 xargs +xbel +xml xmlcatalog xmllint xmlwf +xpm +xsd +xsl xz xzcat xzcmp @@ -948,6 +1015,7 @@ yum-debug-dump yum-debug-restore yumdownloader yum-groups-manager +Z zcat zcmp zcompile @@ -959,6 +1027,7 @@ zforce zformat zgrep zic +zip zle zless zmodload From 6b499ee11ddd4fe5a6b48f2077a0deee93f27774 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 11 Aug 2019 21:16:40 +0200 Subject: [PATCH 12/41] lowercase before whitelist lookup, add stuf to whitelist --- sanitize-history/resh-sanitize-history.go | 2 +- sanitizer_data/whitelist.txt | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index c90812c..61af1f2 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -339,7 +339,7 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { // NOTE: do not sanitize single letter tokens return token, nil } - if s.whitelist[token] == true { + if s.whitelist[strings.ToLower(token)] == true { return token, nil } diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 036b72a..1adb416 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -246,6 +246,7 @@ dnf-3 dnsdomainname do docker +Dockerfile domainname done dpkg @@ -280,6 +281,7 @@ eject elfedit elif else +emacs emulate enable end @@ -442,6 +444,7 @@ jobs join journalctl jq +json kernel-install kill killall5 @@ -521,8 +524,10 @@ make makedb makedeltarpm make-dummy-cert +Makefile man mapfile +master mawk mcookie md5 @@ -574,6 +579,7 @@ nice nisdomainname nl nm +no nocorrect noglob nohup @@ -590,6 +596,7 @@ ogg oldfind openssl opt +origin p11-kit package-cleanup packer @@ -631,6 +638,7 @@ pivot_root pk12util pkg-config pkill +pkl pl pldd pls @@ -952,6 +960,7 @@ usr utmpdump uuidgen uuidparse +Vagrantfile var vared vdir @@ -959,6 +968,7 @@ verifytree vi view vigr +vim vipw visudo vlc From d5e2d2764b7dbcbe5098d3945550e7d3e5ada5f2 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Mon, 12 Aug 2019 15:35:11 +0200 Subject: [PATCH 13/41] improve whitelisting --- sanitize-history/resh-sanitize-history.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 61af1f2..6884bc7 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -339,7 +339,7 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { // NOTE: do not sanitize single letter tokens return token, nil } - if s.whitelist[strings.ToLower(token)] == true { + if s.isInWhitelist(token) == true { return token, nil } @@ -376,7 +376,7 @@ func (s *sanitizer) sanitizeToken(token string) string { // NOTE: do not sanitize single letter tokens return token } - if s.whitelist[token] { + if s.isInWhitelist(token) { return token } return s.hashToken(token) @@ -414,3 +414,7 @@ func (s *sanitizer) trimHash(hash string) string { } return hash[:length] } + +func (s *sanitizer) isInWhitelist(token string) bool { + return s.whitelist[strings.ToLower(token)] == true +} \ No newline at end of file From 7836346a47222f70cedaab912b01102506b12415 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 23 Aug 2019 11:11:57 +0200 Subject: [PATCH 14/41] sanitize changes --- sanitize-history/resh-sanitize-history.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 6884bc7..3d4c646 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -179,8 +179,8 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { } func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { - const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=" // all bash control characters and '=' which commonly ends options w/ values - const optionAllowedChars = "-_" // characters commonly found inside of options + const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`" // all bash control characters and '=' which commonly ends options w/ values + const optionAllowedChars = "-_" // characters commonly found inside of options sanCmdLine := "" buff := "" From 737bc0a4df38f53134aa39a0a07c2047e3fa88b0 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 3 Sep 2019 00:49:01 +0200 Subject: [PATCH 15/41] add common extensions to whitelist (fileinfo.com) --- sanitizer_data/whitelist.txt | 135 +++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 1adb416..a6db1b8 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -9,6 +9,12 @@ ]] { } +3dm +3ds +3g2 +3gp +7z +accdb add addgnupghome addgroup @@ -17,8 +23,12 @@ addr2line add-shell adduser agetty +ai +aif alias alternatives +apk +app applydeltarpm applygnupgdefaults apt @@ -33,12 +43,18 @@ arch arpd arping as +asf asm +asp +aspx au autoload +avi awk +b b2sum badblocks +bak base32 base64 basename @@ -46,6 +62,7 @@ basenc bash bashbug bashbug-64 +bat bg bin bind @@ -82,6 +99,7 @@ bzip2recover bzless bzmore c +cab cal ca-legacy caller @@ -90,12 +108,16 @@ captoinfo case cat catchsegv +cbr cc cd +cer certutil cfdisk cfg c++filt +cfm +cgi chacl chage chardetect @@ -158,24 +180,32 @@ coredumpctl cp cpgr cpio +cpl cpp cppw cracklib-check cracklib-format cracklib-packer cracklib-unpacker +crdownload create-cracklib-dict crlutil +crx +cs csplit +csr css csv ctrlaltdel ctstat +cue +cur curl cut cvtsudoers cz dash +dat date db db_archive @@ -183,6 +213,7 @@ db_checkpoint db_deadlock db_dump db_dump185 +dbf db_hotbackup db_load db_log_verify @@ -204,7 +235,9 @@ dbus-test-tool dbus-update-activation-environment dbus-uuidgen db_verify +dcr dd +dds deb debconf debconf-apt-progress @@ -221,7 +254,9 @@ declare delgroup delpart deluser +dem depmod +deskthemepack desktop dev devlink @@ -237,16 +272,21 @@ dirname dirs disable disown +dll dmesg dmfilemapd +dmg +dmp dmsetup dmstats dnf dnf-3 dnsdomainname do +doc docker Dockerfile +docx domainname done dpkg @@ -260,10 +300,13 @@ dpkg-split dpkg-statoverride dpkg-trigger dracut +drv dtd du dumpe2fs +dwg dwp +dxf e2freefrag e2fsck e2image @@ -286,11 +329,13 @@ emulate enable end env +eps esac etc eval evmctl ex +exe exec exit expand @@ -319,10 +364,14 @@ find-repos-of-install fips-finish-install fips-mode-setup fish +fla float flock +flv fmt +fnt fold +fon for foreach free @@ -339,9 +388,12 @@ function functions g13 g13-syshelp +gadget +gam gapplication gawk gdbus +ged gencat genl getcap @@ -377,6 +429,7 @@ gpgv2 gpg-wks-server gpg-zip gprof +gpx grep groupadd groupdel @@ -397,6 +450,7 @@ halt hardlink hash head +heic help hexdump history @@ -404,25 +458,32 @@ home hostid hostname hostnamectl +hqx htm html http https hwclock i386 +icns +ico iconv iconvconfig iconvconfig.x86_64 +ics id idn if ifenslave +iff igawk in +indd info infocmp infokey infotocap +ini init initctl insmod @@ -437,18 +498,26 @@ ipcmk ipcrm ipcs ischroot +iso isosize jar java jobs join journalctl +jpg jq +js json +jsp kernel-install +key +keychain kill killall5 +kml kmod +kmz kpartx ksp kss @@ -479,6 +548,7 @@ linux32 linux64 ln lnewusers +lnk lnstat local locale @@ -516,9 +586,11 @@ lusermod lz4 lz4c lz4cat +m m3u m4a m4p +m4v machinectl make makedb @@ -529,14 +601,19 @@ man mapfile master mawk +max mcookie md5 md5sum md5sums md5sum.textutils +mdb +mdf media merge mesg +mid +mim mkdict mkdir mke2fs @@ -564,12 +641,18 @@ modutil more mount mountpoint +mov mp3 +mp4 +mpa mpg +msg +msi mv namei nawk needs-restarting +nes networkctl newgidmap newgrp @@ -589,18 +672,22 @@ nsenter nstat numfmt o +obj objcopy objdump od +odt ogg oldfind openssl opt origin +otf p11-kit package-cleanup packer pager +pages pam-auth-update pam_console_apply pam_extrausers_chkpwd @@ -609,11 +696,14 @@ pam_getenv pam_tally pam_tally2 pam_timestamp_check +part partx passwd paste patch pathchk +pct +pdb pdf perl perl5.26.1 @@ -636,12 +726,14 @@ pip-3.7 pip3.7 pivot_root pk12util +pkg pkg-config pkill pkl pl pldd pls +plugin pmap png policy-rc.d @@ -649,7 +741,11 @@ popd portablectl pov poweroff +pps +ppt +pptx pr +prf print printenv printf @@ -658,6 +754,8 @@ prlimit proc properties ps +psd +pspimage ptx pull push @@ -687,6 +785,7 @@ pyvenv pyvenv-3.7 r ranlib +rar raw rbash rc @@ -732,6 +831,7 @@ rmdir rmmod rmt rmt-tar +rom root routef routel @@ -744,6 +844,7 @@ rpmdumpheader rpmkeys rpmquery rpmverify +rss rtacct rtcwake rtf @@ -760,11 +861,13 @@ rview s sasldblistusers2 saslpasswd2 +sav savelog sbin sched script scriptreplay +sdf sdiff sed sefcontext_compile @@ -806,6 +909,7 @@ shuf shutdown signtool signver +sitx size skill slabtop @@ -818,7 +922,9 @@ sotruss source split sprof +sql sqlite3 +srt srv ss ssh @@ -837,9 +943,12 @@ sudoreplay sulogin sum suspend +svg swaplabel swapoff swapon +swf +swift switch_root sync sys @@ -880,16 +989,22 @@ tailf tar tarcat taskset +tax2016 +tax2018 tc tee telinit tempfile test testgdbm +tex tga tgz then +thm tic +tif +tiff time timedatectl timeout @@ -897,8 +1012,10 @@ times tipc tload tmp +toast toe top +torrent touch tput tr @@ -958,11 +1075,16 @@ usermod users usr utmpdump +uue uuidgen uuidparse Vagrantfile var vared +vb +vcd +vcf +vcxproj vdir verifytree vi @@ -973,6 +1095,7 @@ vipw visudo vlc vmstat +vob w wait wall @@ -991,11 +1114,21 @@ while who whoami wipefs +wma +wmv +wpd w.procps +wps write +wsf x86_64 xargs xbel +xcodeproj +xhtml +xlr +xls +xlsx xml xmlcatalog xmllint @@ -1025,6 +1158,7 @@ yum-debug-dump yum-debug-restore yumdownloader yum-groups-manager +yuv Z zcat zcmp @@ -1038,6 +1172,7 @@ zformat zgrep zic zip +zipx zle zless zmodload From 0d1c3ea1fc08b946dcf61d0bf254028a7bab51c5 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 4 Sep 2019 00:02:22 +0200 Subject: [PATCH 16/41] add a few common TLDs (https://www.hayksaakian.com/most-popular-tlds/) --- sanitizer_data/whitelist.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index a6db1b8..87502b0 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -77,6 +77,7 @@ blockdev bmp boot bootctl +br branch break bridge @@ -147,6 +148,7 @@ clockdiff clone cmp cmsutil +co code col colcrt @@ -238,6 +240,7 @@ db_verify dcr dd dds +de deb debconf debconf-apt-progress @@ -497,9 +500,11 @@ ip ipcmk ipcrm ipcs +ir ischroot iso isosize +it jar java jobs @@ -653,6 +658,7 @@ namei nawk needs-restarting nes +net networkctl newgidmap newgrp @@ -681,6 +687,7 @@ ogg oldfind openssl opt +org origin otf p11-kit @@ -850,6 +857,7 @@ rtcwake rtf rtmon rtstat +ru run runcon run-help @@ -1037,6 +1045,7 @@ typeset tzconfig tzselect udevadm +uk ul ulimit umask From 784938415109cd1fe4e0e6b8e669365060f9f9fb Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 8 Sep 2019 01:06:45 +0200 Subject: [PATCH 17/41] add copyright notice for sanitizer_data, minor changes --- sanitize-history/resh-sanitize-history.go | 3 ++- sanitizer_data/copyright_information.md | 7 +++++++ sanitizer_data/whitelist.txt | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 sanitizer_data/copyright_information.md diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 3d4c646..03926b1 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -39,6 +39,7 @@ func main() { showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters, '0' turns off trimming") + inputPath := flag.String("input", historyPath, "Input file") outputPath := flag.String("output", "", "Output file (default: use stdout)") flag.Parse() @@ -57,7 +58,7 @@ func main() { log.Fatal("Sanitizer init() error:", err) } - inputFile, err := os.Open(historyPath) + inputFile, err := os.Open(*inputPath) if err != nil { log.Fatal("Open() resh history file error:", err) } diff --git a/sanitizer_data/copyright_information.md b/sanitizer_data/copyright_information.md new file mode 100644 index 0000000..a1b1308 --- /dev/null +++ b/sanitizer_data/copyright_information.md @@ -0,0 +1,7 @@ +# copyright information + +Whitelist contains content from variety of sources. + +Part of the whitelist (`./whitelist.txt`) is made of copyrighted content from [FileInfo.com](https://fileinfo.com/filetypes/common). + +This content was used with permission from FileInfo.com. \ No newline at end of file diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt index 87502b0..180e9c3 100644 --- a/sanitizer_data/whitelist.txt +++ b/sanitizer_data/whitelist.txt @@ -1013,6 +1013,7 @@ thm tic tif tiff +tig time timedatectl timeout From ccaab4c4c7b847bb1290ddbe0d8741c844972dda Mon Sep 17 00:00:00 2001 From: Simon Let Date: Sun, 8 Sep 2019 01:08:12 +0200 Subject: [PATCH 18/41] draft of prediction/recommendation evaluation --- .gitignore | 1 + Makefile | 5 +- evaluate/resh-evaluate.go | 168 +++++++++++++++++++++++++++++++++++++ evaluate/strategy-dummy.go | 24 ++++++ 4 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 evaluate/resh-evaluate.go create mode 100644 evaluate/strategy-dummy.go diff --git a/.gitignore b/.gitignore index 38a9c83..602e54d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ resh-collect resh-daemon resh-sanitize-history +resh-evaluate diff --git a/Makefile b/Makefile index 1f6631f..d5b4bb0 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ sanitize: # -build: submodules resh-collect resh-daemon resh-sanitize-history +build: submodules resh-collect resh-daemon resh-sanitize-history resh-evaluate rebuild: make clean @@ -115,6 +115,9 @@ resh-collect: collect/resh-collect.go common/resh-common.go version resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< +resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go + $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... mkdir -p $@ diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go new file mode 100644 index 0000000..e6d667b --- /dev/null +++ b/evaluate/resh-evaluate.go @@ -0,0 +1,168 @@ +package main + +import ( + "bufio" + "encoding/json" + "flag" + "fmt" + "log" + "os" + "os/user" + "path/filepath" + + "github.com/curusarn/resh/common" +) + +// Version from git set during build +var Version string + +// Revision from git set during build +var Revision string + +func main() { + usr, _ := user.Current() + dir := usr.HomeDir + historyPath := filepath.Join(dir, ".resh_history.json") + sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") + + showVersion := flag.Bool("version", false, "Show version and exit") + showRevision := flag.Bool("revision", false, "Show git revision and exit") + inputPath := flag.String("input", "", + "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ + " depending on --sanitized-input option)") + outputPath := flag.String("output", "", "Output file (default: use stdout)") + sanitizedInput := flag.Bool("sanitized-input", false, + "Handle input as sanitized (also changes default value for input argument)") + + flag.Parse() + + // set default input + if *inputPath == "" { + if *sanitizedInput { + *inputPath = sanitizedHistoryPath + } else { + *inputPath = historyPath + } + } + + if *showVersion == true { + fmt.Println(Version) + os.Exit(0) + } + if *showRevision == true { + fmt.Println(Revision) + os.Exit(0) + } + + var writer *bufio.Writer + if *outputPath != "" { + outputFile, err := os.Create(*outputPath) + if err != nil { + log.Fatal("Create() output file error:", err) + } + defer outputFile.Close() + writer = bufio.NewWriter(outputFile) + } else { + writer = bufio.NewWriter(os.Stdout) + } + defer writer.Flush() + + evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer} + err := evaluator.init(*inputPath) + if err != nil { + log.Fatal("Evaluator init() error:", err) + } + + var strategies []strategy + + dummy := strategyDummy{} + + strategies = append(strategies, &dummy) + + for _, strat := range strategies { + err = evaluator.evaluate(strat) + if err != nil { + log.Println("Evaluator evaluate() error:", err) + } + } +} + +type strategy interface { + GetTitleAndDescription() (string, string) + GetCandidates() []string + AddHistoryRecord(record *common.Record) error + ResetHistory() error +} + +type evaluator struct { + sanitizedInput bool + writer *bufio.Writer + historyRecords []common.Record +} + +func (e *evaluator) init(inputPath string) error { + e.historyRecords = e.loadHistoryRecords(inputPath) + return nil +} + +func (e *evaluator) evaluate(strat strategy) error { + // init dist buckets ? + // map dist int -> matches int + // map dist int -> charactersRecalled int + for _, record := range e.historyRecords { + _ = strat.GetCandidates() + // evaluate distance and characters recalled + err := strat.AddHistoryRecord(&record) + if err != nil { + log.Println("Error while evauating", err) + return err + } + } + // print results + outLine := "testing testing 123 testing ..." + n, err := e.writer.WriteString(string(outLine) + "\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } + e.writer.Flush() + return nil +} + +func (e *evaluator) loadHistoryRecords(fname string) []common.Record { + file, err := os.Open(fname) + if err != nil { + log.Fatal("Open() resh history file error:", err) + } + defer file.Close() + + var records []common.Record + scanner := bufio.NewScanner(file) + for scanner.Scan() { + record := common.Record{} + fallbackRecord := common.FallbackRecord{} + line := scanner.Text() + err = json.Unmarshal([]byte(line), &record) + if err != nil { + err = json.Unmarshal([]byte(line), &fallbackRecord) + if err != nil { + log.Println("Line:", line) + log.Fatal("Decoding error:", err) + } + record = common.ConvertRecord(&fallbackRecord) + } + if e.sanitizedInput == false { + if record.CmdLength != 0 { + log.Fatal("Assert failed - 'cmdLength' is set in raw data. Maybe you want to use '--sanitized-input' option?") + } + record.CmdLength = len(record.CmdLine) + } + if record.CmdLength == 0 { + log.Fatal("Assert failed - 'cmdLength' is unset in the data. This should not happen.") + } + records = append(records, record) + } + return records +} diff --git a/evaluate/strategy-dummy.go b/evaluate/strategy-dummy.go new file mode 100644 index 0000000..c8e1dc8 --- /dev/null +++ b/evaluate/strategy-dummy.go @@ -0,0 +1,24 @@ +package main + +import "github.com/curusarn/resh/common" + +type strategyDummy struct { + history []string +} + +func (s *strategyDummy) GetTitleAndDescription() (string, string) { + return "recent", "Use recent commands" +} + +func (s *strategyDummy) GetCandidates() []string { + return nil +} + +func (s *strategyDummy) AddHistoryRecord(record *common.Record) error { + s.history = append(s.history, record.CmdLine) + return nil +} + +func (s *strategyDummy) ResetHistory() error { + return nil +} From 9d0a641381b79826e7137dafc1a63d81f5a6e4db Mon Sep 17 00:00:00 2001 From: Simon Let Date: Mon, 9 Sep 2019 00:00:50 +0200 Subject: [PATCH 19/41] add strategy-recent --- Makefile | 4 +- evaluate/resh-evaluate.go | 42 ++++++++++----- evaluate/statistics.go | 72 +++++++++++++++++++++++++ evaluate/strategy-dummy.go | 2 +- evaluate/strategy-recent.go | 31 +++++++++++ sanitizer_data/copyright_information.md | 2 +- 6 files changed, 137 insertions(+), 16 deletions(-) create mode 100644 evaluate/statistics.go create mode 100644 evaluate/strategy-recent.go diff --git a/Makefile b/Makefile index d5b4bb0..d7abc39 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< -resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version - go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go +resh-evaluate: evaluate/resh-evaluate.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/statistics.go evaluate/strategy-*.go $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index e6d667b..78532ef 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -67,7 +67,7 @@ func main() { } defer writer.Flush() - evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer} + evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 42} err := evaluator.init(*inputPath) if err != nil { log.Fatal("Evaluator init() error:", err) @@ -75,9 +75,11 @@ func main() { var strategies []strategy - dummy := strategyDummy{} + // dummy := strategyDummy{} + // strategies = append(strategies, &dummy) - strategies = append(strategies, &dummy) + recent := strategyRecent{} + strategies = append(strategies, &recent) for _, strat := range strategies { err = evaluator.evaluate(strat) @@ -97,6 +99,7 @@ type strategy interface { type evaluator struct { sanitizedInput bool writer *bufio.Writer + maxCandidates int historyRecords []common.Record } @@ -106,28 +109,43 @@ func (e *evaluator) init(inputPath string) error { } func (e *evaluator) evaluate(strat strategy) error { - // init dist buckets ? - // map dist int -> matches int - // map dist int -> charactersRecalled int + stats := statistics{writer: e.writer, size: e.maxCandidates + 1} + stats.init() + for _, record := range e.historyRecords { - _ = strat.GetCandidates() - // evaluate distance and characters recalled + candidates := strat.GetCandidates() + + match := false + for i, candidate := range candidates { + // make an option (--calculate-total) to turn this on/off ? + // if i >= e.maxCandidates { + // break + // } + if candidate == record.CmdLine { + stats.addMatch(i+1, record.CmdLength) + match = true + break + } + } + if match == false { + stats.addMiss() + } err := strat.AddHistoryRecord(&record) if err != nil { log.Println("Error while evauating", err) return err } } - // print results - outLine := "testing testing 123 testing ..." - n, err := e.writer.WriteString(string(outLine) + "\n") + title, description := strat.GetTitleAndDescription() + n, err := e.writer.WriteString(title + " - " + description + "\n") if err != nil { log.Fatal(err) } if n == 0 { log.Fatal("Nothing was written", n) } - e.writer.Flush() + // print results + stats.printCumulative() return nil } diff --git a/evaluate/statistics.go b/evaluate/statistics.go new file mode 100644 index 0000000..98bd2dd --- /dev/null +++ b/evaluate/statistics.go @@ -0,0 +1,72 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "math" + "strconv" +) + +type statistics struct { + writer *bufio.Writer + size int + matches []int + matchesTotal int + charactersRecalled []int + charactersRecalledTotal int + dataPointCount int +} + +func (s *statistics) init() { + s.matches = make([]int, s.size) + s.charactersRecalled = make([]int, s.size) +} + +func (s *statistics) addMatch(distance int, cmdLength int) { + if distance >= s.size { + // --calculate-total + // log.Fatal("Match distance is greater than size of statistics") + s.matchesTotal++ + s.charactersRecalledTotal += cmdLength + return + } + s.matches[distance]++ + s.matchesTotal++ + s.charactersRecalled[distance] += cmdLength + s.charactersRecalledTotal += cmdLength + s.dataPointCount++ +} + +func (s *statistics) addMiss() { + s.dataPointCount++ +} + +func (s *statistics) printCumulative() { + matchesPercent := 0.0 + out := "### Matches ###\n" + for i := 0; i < s.size; i++ { + matchesPercent += 100 * float64(s.matches[i]) / float64(s.dataPointCount) + out += strconv.Itoa(i) + " ->" + out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) + for j := 0; j < int(math.Round(matchesPercent)); j++ { + out += "#" + } + out += "\n" + } + matchesPercent = 100 * float64(s.matchesTotal) / float64(s.dataPointCount) + out += "TOTAL ->" + out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) + for j := 0; j < int(math.Round(matchesPercent)); j++ { + out += "#" + } + out += "\n" + + n, err := s.writer.WriteString(string(out) + "\n\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } +} diff --git a/evaluate/strategy-dummy.go b/evaluate/strategy-dummy.go index c8e1dc8..28ed8ec 100644 --- a/evaluate/strategy-dummy.go +++ b/evaluate/strategy-dummy.go @@ -7,7 +7,7 @@ type strategyDummy struct { } func (s *strategyDummy) GetTitleAndDescription() (string, string) { - return "recent", "Use recent commands" + return "dummy", "Return empty candidate list" } func (s *strategyDummy) GetCandidates() []string { diff --git a/evaluate/strategy-recent.go b/evaluate/strategy-recent.go new file mode 100644 index 0000000..b75adc2 --- /dev/null +++ b/evaluate/strategy-recent.go @@ -0,0 +1,31 @@ +package main + +import "github.com/curusarn/resh/common" + +type strategyRecent struct { + history []string +} + +func (s *strategyRecent) GetTitleAndDescription() (string, string) { + return "recent", "Use recent commands" +} + +func (s *strategyRecent) GetCandidates() []string { + return s.history +} + +func (s *strategyRecent) AddHistoryRecord(record *common.Record) error { + // remove previous occurance of record + for i, cmd := range s.history { + if cmd == record.CmdLine { + s.history = append(s.history[:i], s.history[i+1:]...) + } + } + // append new record + s.history = append([]string{record.CmdLine}, s.history...) + return nil +} + +func (s *strategyRecent) ResetHistory() error { + return nil +} diff --git a/sanitizer_data/copyright_information.md b/sanitizer_data/copyright_information.md index a1b1308..abdbf33 100644 --- a/sanitizer_data/copyright_information.md +++ b/sanitizer_data/copyright_information.md @@ -4,4 +4,4 @@ Whitelist contains content from variety of sources. Part of the whitelist (`./whitelist.txt`) is made of copyrighted content from [FileInfo.com](https://fileinfo.com/filetypes/common). -This content was used with permission from FileInfo.com. \ No newline at end of file +This content was used with permission from FileInfo.com. From 7d968147c122d82e10fbbf9f8ae9ff448ed7f59a Mon Sep 17 00:00:00 2001 From: Simon Let Date: Mon, 9 Sep 2019 01:44:43 +0200 Subject: [PATCH 20/41] add characters saved per submission to statistics --- evaluate/resh-evaluate.go | 2 +- evaluate/statistics.go | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 78532ef..abca32e 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -67,7 +67,7 @@ func main() { } defer writer.Flush() - evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 42} + evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 50} err := evaluator.init(*inputPath) if err != nil { log.Fatal("Evaluator init() error:", err) diff --git a/evaluate/statistics.go b/evaluate/statistics.go index 98bd2dd..7a42c9d 100644 --- a/evaluate/statistics.go +++ b/evaluate/statistics.go @@ -69,4 +69,31 @@ func (s *statistics) printCumulative() { if n == 0 { log.Fatal("Nothing was written", n) } + + charsRecall := 0.0 + out = "### Characters recalled per submission ###\n" + for i := 0; i < s.size; i++ { + charsRecall += float64(s.charactersRecalled[i]) / float64(s.dataPointCount) + out += strconv.Itoa(i) + " ->" + out += fmt.Sprintf(" (%.2f)\n", charsRecall) + for j := 0; j < int(math.Round(charsRecall)); j++ { + out += "#" + } + out += "\n" + } + charsRecall = float64(s.charactersRecalledTotal) / float64(s.dataPointCount) + out += "TOTAL ->" + out += fmt.Sprintf(" (%.2f)\n", charsRecall) + for j := 0; j < int(math.Round(charsRecall)); j++ { + out += "#" + } + out += "\n" + + n, err = s.writer.WriteString(string(out) + "\n\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } } From 939fb2f8471ab5f265c98210f0e64ad2286b63d1 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Mon, 9 Sep 2019 23:19:36 +0200 Subject: [PATCH 21/41] add sanitized flag to record, add Enrich() to record --- common/resh-common.go | 34 +++++++++++++++++++---- sanitize-history/resh-sanitize-history.go | 4 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/common/resh-common.go b/common/resh-common.go index ccfc21c..1bd3f4a 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -1,6 +1,11 @@ package common -import "strconv" +import ( + "log" + "strconv" + + "github.com/mattn/go-shellwords" +) // Record representing single executed command with its metadata type Record struct { @@ -65,7 +70,11 @@ type Record struct { ReshRevision string `json:"reshRevision"` // added by sanitizatizer - CmdLength int `json:"cmdLength"` + Sanitized bool `json:"sanitized"` + CmdLength int `json:"cmdLength"` + + // enriching fields - added "later" + FirstWord string `json:"firstWord"` } // FallbackRecord when record is too old and can't be parsed into regular Record @@ -131,9 +140,6 @@ type FallbackRecord struct { ReshUuid string `json:"reshUuid"` ReshVersion string `json:"reshVersion"` ReshRevision string `json:"reshRevision"` - - // added by sanitizatizer - CmdLength int `json:"cmdLength"` } // ConvertRecord from FallbackRecord to Record @@ -202,6 +208,24 @@ func ConvertRecord(r *FallbackRecord) Record { } } +// Enrich - adds additional fields to the record +func (r *Record) Enrich() { + // Get command/first word from commandline + r.FirstWord = GetCommandFromCommandLine(r.CmdLine) +} + +// GetCommandFromCommandLine func +func GetCommandFromCommandLine(cmdLine string) string { + args, err := shellwords.Parse(cmdLine) + if err != nil { + log.Fatal("shellwords Error:", err) + } + if len(args) > 0 { + return args[0] + } + return "" +} + // Config struct type Config struct { Port int diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 03926b1..c9fc057 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -176,6 +176,8 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { if err != nil { log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err) } + + record.Sanitized = true return nil } @@ -418,4 +420,4 @@ func (s *sanitizer) trimHash(hash string) string { func (s *sanitizer) isInWhitelist(token string) bool { return s.whitelist[strings.ToLower(token)] == true -} \ No newline at end of file +} From 6f7f50542066ddd9196ecb56c257e70d3fcfea53 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Mon, 9 Sep 2019 23:21:12 +0200 Subject: [PATCH 22/41] checkpoint before separating plotting into python because matplotlib is just too good and trying to plot in golang is not worth it --- Makefile | 4 +- evaluate/resh-evaluate.go | 20 +++-- evaluate/results.go | 99 +++++++++++++++++++++++ evaluate/statistics.go | 166 +++++++++++++++++++++----------------- go.mod | 4 + go.sum | 9 +++ 6 files changed, 218 insertions(+), 84 deletions(-) create mode 100644 evaluate/results.go diff --git a/Makefile b/Makefile index d7abc39..918a124 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< -resh-evaluate: evaluate/resh-evaluate.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version - go build ${GOFLAGS} -o $@ $< evaluate/statistics.go evaluate/strategy-*.go +resh-evaluate: evaluate/resh-evaluate.go evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index abca32e..930a694 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -108,12 +108,15 @@ func (e *evaluator) init(inputPath string) error { return nil } -func (e *evaluator) evaluate(strat strategy) error { - stats := statistics{writer: e.writer, size: e.maxCandidates + 1} +func (e *evaluator) evaluate(strategy strategy) error { + res := results{writer: e.writer, size: e.maxCandidates + 1} + stats := statistics{} + res.init() stats.init() for _, record := range e.historyRecords { - candidates := strat.GetCandidates() + stats.addCmdLine(record.CmdLine, record.CmdLength) + candidates := strategy.GetCandidates() match := false for i, candidate := range candidates { @@ -122,21 +125,21 @@ func (e *evaluator) evaluate(strat strategy) error { // break // } if candidate == record.CmdLine { - stats.addMatch(i+1, record.CmdLength) + res.addMatch(i+1, record.CmdLength) match = true break } } if match == false { - stats.addMiss() + res.addMiss() } - err := strat.AddHistoryRecord(&record) + err := strategy.AddHistoryRecord(&record) if err != nil { log.Println("Error while evauating", err) return err } } - title, description := strat.GetTitleAndDescription() + title, description := strategy.GetTitleAndDescription() n, err := e.writer.WriteString(title + " - " + description + "\n") if err != nil { log.Fatal(err) @@ -145,7 +148,8 @@ func (e *evaluator) evaluate(strat strategy) error { log.Fatal("Nothing was written", n) } // print results - stats.printCumulative() + res.printCumulative() + stats.graphCmdFrequencyAsFuncOfRank() return nil } diff --git a/evaluate/results.go b/evaluate/results.go new file mode 100644 index 0000000..5a82aba --- /dev/null +++ b/evaluate/results.go @@ -0,0 +1,99 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "math" + "strconv" +) + +type results struct { + writer *bufio.Writer + size int + matches []int // matches[N] -> # of matches at distance N + matchesTotal int + charactersRecalled []int + charactersRecalledTotal int + dataPointCount int +} + +func (r *results) init() { + r.matches = make([]int, r.size) + r.charactersRecalled = make([]int, r.size) +} + +func (r *results) addMatch(distance int, cmdLength int) { + if distance >= r.size { + // --calculate-total + // log.Fatal("Match distance is greater than size of statistics") + r.matchesTotal++ + r.charactersRecalledTotal += cmdLength + return + } + r.matches[distance]++ + r.matchesTotal++ + r.charactersRecalled[distance] += cmdLength + r.charactersRecalledTotal += cmdLength + r.dataPointCount++ +} + +func (r *results) addMiss() { + r.dataPointCount++ +} + +func (r *results) printCumulative() { + matchesPercent := 0.0 + out := "### Matches ###\n" + for i := 0; i < r.size; i++ { + matchesPercent += 100 * float64(r.matches[i]) / float64(r.dataPointCount) + out += strconv.Itoa(i) + " ->" + out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) + for j := 0; j < int(math.Round(matchesPercent)); j++ { + out += "#" + } + out += "\n" + } + matchesPercent = 100 * float64(r.matchesTotal) / float64(r.dataPointCount) + out += "TOTAL ->" + out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) + for j := 0; j < int(math.Round(matchesPercent)); j++ { + out += "#" + } + out += "\n" + + n, err := r.writer.WriteString(string(out) + "\n\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } + + charsRecall := 0.0 + out = "### Characters recalled per submission ###\n" + for i := 0; i < r.size; i++ { + charsRecall += float64(r.charactersRecalled[i]) / float64(r.dataPointCount) + out += strconv.Itoa(i) + " ->" + out += fmt.Sprintf(" (%.2f)\n", charsRecall) + for j := 0; j < int(math.Round(charsRecall)); j++ { + out += "#" + } + out += "\n" + } + charsRecall = float64(r.charactersRecalledTotal) / float64(r.dataPointCount) + out += "TOTAL ->" + out += fmt.Sprintf(" (%.2f)\n", charsRecall) + for j := 0; j < int(math.Round(charsRecall)); j++ { + out += "#" + } + out += "\n" + + n, err = r.writer.WriteString(string(out) + "\n\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } +} diff --git a/evaluate/statistics.go b/evaluate/statistics.go index 7a42c9d..0a71857 100644 --- a/evaluate/statistics.go +++ b/evaluate/statistics.go @@ -1,99 +1,117 @@ package main import ( - "bufio" - "fmt" + "bytes" + "io/ioutil" "log" - "math" - "strconv" + "sort" + + "github.com/wcharczuk/go-chart" ) type statistics struct { - writer *bufio.Writer - size int - matches []int - matchesTotal int - charactersRecalled []int - charactersRecalledTotal int - dataPointCount int + //size int + dataPointCount int + cmdLineCount map[string]int } func (s *statistics) init() { - s.matches = make([]int, s.size) - s.charactersRecalled = make([]int, s.size) + s.cmdLineCount = make(map[string]int) } -func (s *statistics) addMatch(distance int, cmdLength int) { - if distance >= s.size { - // --calculate-total - // log.Fatal("Match distance is greater than size of statistics") - s.matchesTotal++ - s.charactersRecalledTotal += cmdLength - return - } - s.matches[distance]++ - s.matchesTotal++ - s.charactersRecalled[distance] += cmdLength - s.charactersRecalledTotal += cmdLength +func (s *statistics) addCmdLine(cmdLine string, cmdLength int) { + s.cmdLineCount[cmdLine]++ s.dataPointCount++ } -func (s *statistics) addMiss() { - s.dataPointCount++ -} +func (s *statistics) graphCmdFrequencyAsFuncOfRank() { -func (s *statistics) printCumulative() { - matchesPercent := 0.0 - out := "### Matches ###\n" - for i := 0; i < s.size; i++ { - matchesPercent += 100 * float64(s.matches[i]) / float64(s.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" - } - matchesPercent = 100 * float64(s.matchesTotal) / float64(s.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" + var xValues []float64 + var yValues []float64 - n, err := s.writer.WriteString(string(out) + "\n\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } + sortedValues := sortMapByvalue(s.cmdLineCount) + sortedValues = sortedValues[:100] // cut off at rank 100 - charsRecall := 0.0 - out = "### Characters recalled per submission ###\n" - for i := 0; i < s.size; i++ { - charsRecall += float64(s.charactersRecalled[i]) / float64(s.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" - } - out += "\n" + normalizeCoeficient := float64(s.dataPointCount) / float64(sortedValues[0].Value) + for i, pair := range sortedValues { + rank := i + 1 + frequency := float64(pair.Value) / float64(s.dataPointCount) + normalizeFrequency := frequency * normalizeCoeficient + + xValues = append(xValues, float64(rank)) + yValues = append(yValues, normalizeFrequency) } - charsRecall = float64(s.charactersRecalledTotal) / float64(s.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" + + graphName := "cmdFrqAsFuncOfRank" + graph := chart.Chart{ + XAxis: chart.XAxis{ + Style: chart.StyleShow(), //enables / displays the x-axis + Ticks: []chart.Tick{ + {0.0, "0"}, + {1.0, "1"}, + {2.0, "2"}, + {3.0, "3"}, + {4.0, "4"}, + {5.0, "5"}, + {10.0, "10"}, + {15.0, "15"}, + {20.0, "20"}, + {25.0, "25"}, + {30.0, "30"}, + {35.0, "35"}, + {40.0, "40"}, + {45.0, "45"}, + {50.0, "50"}, + }, + }, + YAxis: chart.YAxis{ + AxisType: chart.YAxisSecondary, + Style: chart.StyleShow(), //enables / displays the y-axis + }, + Series: []chart.Series{ + chart.ContinuousSeries{ + Style: chart.Style{ + Show: true, + StrokeColor: chart.GetDefaultColor(0).WithAlpha(64), + FillColor: chart.GetDefaultColor(0).WithAlpha(64), + DotColor: chart.GetDefaultColor(0), + DotWidth: 3.0, + }, + XValues: xValues, + YValues: yValues, + }, + }, } - out += "\n" - n, err = s.writer.WriteString(string(out) + "\n\n") + buffer := bytes.NewBuffer([]byte{}) + err := graph.Render(chart.PNG, buffer) if err != nil { - log.Fatal(err) + log.Fatal("chart.Render error:", err) } - if n == 0 { - log.Fatal("Nothing was written", n) + ioutil.WriteFile("/tmp/resh-graph_"+graphName+".png", buffer.Bytes(), 0644) +} + +func sortMapByvalue(input map[string]int) []Pair { + p := make(PairList, len(input)) + + i := 0 + for k, v := range input { + p[i] = Pair{k, v} + i++ } + sort.Sort(sort.Reverse(p)) + return p } + +// Pair - A data structure to hold key/value pairs +type Pair struct { + Key string + Value int +} + +// PairList - A slice of pairs that implements sort.Interface to sort by values +type PairList []Pair + +func (p PairList) Len() int { return len(p) } +func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value } diff --git a/go.mod b/go.mod index 28d5ece..9c901e1 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,9 @@ go 1.12 require ( github.com/BurntSushi/toml v0.3.1 + github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect + github.com/mattn/go-shellwords v1.0.6 + github.com/wcharczuk/go-chart v2.0.1+incompatible github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa + golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 // indirect ) diff --git a/go.sum b/go.sum index 72fef1f..92beac2 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,13 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI= +github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= +github.com/wcharczuk/go-chart v2.0.1+incompatible h1:0pz39ZAycJFF7ju/1mepnk26RLVLBCWz1STcD3doU0A= +github.com/wcharczuk/go-chart v2.0.1+incompatible/go.mod h1:PF5tmL4EIx/7Wf+hEkpCqYi5He4u90sw+0+6FhrryuE= github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk= github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4= +golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 h1:4dQcAORh9oYBwVSBVIkP489LUPC+f1HBkTYXgmqfR+o= +golang.org/x/image v0.0.0-20190902063713-cb417be4ba39/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= From 039573e240332a70b1696faf34dade588d6fefb8 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 10 Sep 2019 02:34:31 +0200 Subject: [PATCH 23/41] precess data in golang then analyze and plot in python --- Makefile | 4 +- common/resh-common.go | 3 +- evaluate/resh-evaluate-plot.py | 41 +++++++++++ evaluate/resh-evaluate.go | 120 ++++++++++++++++++++++----------- evaluate/results.go | 99 --------------------------- evaluate/statistics.go | 117 -------------------------------- version | 2 +- 7 files changed, 128 insertions(+), 258 deletions(-) create mode 100755 evaluate/resh-evaluate-plot.py delete mode 100644 evaluate/results.go delete mode 100644 evaluate/statistics.go diff --git a/Makefile b/Makefile index 918a124..d5b4bb0 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,8 @@ resh-collect: collect/resh-collect.go common/resh-common.go version resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< -resh-evaluate: evaluate/resh-evaluate.go evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go common/resh-common.go version - go build ${GOFLAGS} -o $@ $< evaluate/results.go evaluate/statistics.go evaluate/strategy-*.go +resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/common/resh-common.go b/common/resh-common.go index 1bd3f4a..481486d 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -218,7 +218,8 @@ func (r *Record) Enrich() { func GetCommandFromCommandLine(cmdLine string) string { args, err := shellwords.Parse(cmdLine) if err != nil { - log.Fatal("shellwords Error:", err) + log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") + return "" } if len(args) > 0 { return args[0] diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py new file mode 100755 index 0000000..8999e0b --- /dev/null +++ b/evaluate/resh-evaluate-plot.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import sys +import json +from collections import defaultdict +import matplotlib.pyplot as plt +import matplotlib.path as mpath +import numpy as np + + +def addRank(data): + return list(enumerate(data, start=1)) + + +data = json.load(sys.stdin) +# for strategy in data["Strategies"]: +# print(json.dumps(strategy)) + +cmd_count = defaultdict(int) +cmdLine_count = defaultdict(int) + +for record in data["Records"]: + cmd_count[record["firstWord"]] += 1 + cmdLine_count[record["cmdLine"]] += 1 + + +cmdFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmd_count.items(), key=lambda x: x[1], reverse=True))) +cmdLineFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True))) + +print(cmdFrq) +print("#################") +#print(cmdLineFrq_rank) + +plt.plot(range(1, len(cmdFrq)+1), cmdFrq) +plt.title("Command frequency") +#plt.xticks(range(1, len(cmdFrq)+1)) +plt.show() + +plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq) +plt.title("Commandline frequency") +plt.show() \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 930a694..30c5a9f 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -2,11 +2,13 @@ package main import ( "bufio" + "bytes" "encoding/json" "flag" "fmt" "log" "os" + "os/exec" "os/user" "path/filepath" @@ -24,15 +26,17 @@ func main() { dir := usr.HomeDir historyPath := filepath.Join(dir, ".resh_history.json") sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") + // tmpPath := "/tmp/resh-evaluate-tmp.json" showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") inputPath := flag.String("input", "", "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ " depending on --sanitized-input option)") - outputPath := flag.String("output", "", "Output file (default: use stdout)") + outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") sanitizedInput := flag.Bool("sanitized-input", false, "Handle input as sanitized (also changes default value for input argument)") + plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") flag.Parse() @@ -54,20 +58,7 @@ func main() { os.Exit(0) } - var writer *bufio.Writer - if *outputPath != "" { - outputFile, err := os.Create(*outputPath) - if err != nil { - log.Fatal("Create() output file error:", err) - } - defer outputFile.Close() - writer = bufio.NewWriter(outputFile) - } else { - writer = bufio.NewWriter(os.Stdout) - } - defer writer.Flush() - - evaluator := evaluator{sanitizedInput: *sanitizedInput, writer: writer, maxCandidates: 50} + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50} err := evaluator.init(*inputPath) if err != nil { log.Fatal("Evaluator init() error:", err) @@ -87,6 +78,18 @@ func main() { log.Println("Evaluator evaluate() error:", err) } } + // evaluator.dumpJSON(tmpPath) + + // run python script to stat and plot/ + cmd := exec.Command("echo", *outputDir) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + log.Printf("") + err = cmd.Run() + if err != nil { + log.Printf("Command finished with error: %v", err) + } + evaluator.calculateStatsAndPlot(*plottingScript) } type strategy interface { @@ -96,42 +99,93 @@ type strategy interface { ResetHistory() error } +type matchJSON struct { + Match bool + Distance int + CharsRecalled int +} + +type strategyJSON struct { + Title string + Description string + Matches []matchJSON +} + +type evaluateJSON struct { + Strategies []strategyJSON + Records []common.Record +} + type evaluator struct { sanitizedInput bool - writer *bufio.Writer maxCandidates int historyRecords []common.Record + data evaluateJSON } func (e *evaluator) init(inputPath string) error { e.historyRecords = e.loadHistoryRecords(inputPath) + e.processRecords() return nil } -func (e *evaluator) evaluate(strategy strategy) error { - res := results{writer: e.writer, size: e.maxCandidates + 1} - stats := statistics{} - res.init() - stats.init() +func (e *evaluator) calculateStatsAndPlot(scriptName string) { + evalJSON, err := json.Marshal(e.data) + if err != nil { + log.Fatal("json marshal error", err) + } + buffer := bytes.Buffer{} + buffer.Write(evalJSON) + // run python script to stat and plot/ + cmd := exec.Command(scriptName) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = &buffer + log.Printf("...") + err = cmd.Run() + if err != nil { + log.Printf("Command finished with error: %v", err) + } +} + +// enrich records and add them to serializable structure +func (e *evaluator) processRecords() { + for _, record := range e.historyRecords { + + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + } + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } + + record.Enrich() + e.data.Records = append(e.data.Records, record) + } +} +func (e *evaluator) evaluate(strategy strategy) error { + title, description := strategy.GetTitleAndDescription() + strategyData := strategyJSON{Title: title, Description: description} for _, record := range e.historyRecords { - stats.addCmdLine(record.CmdLine, record.CmdLength) candidates := strategy.GetCandidates() - match := false + matchFound := false for i, candidate := range candidates { // make an option (--calculate-total) to turn this on/off ? // if i >= e.maxCandidates { // break // } if candidate == record.CmdLine { - res.addMatch(i+1, record.CmdLength) - match = true + match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength} + strategyData.Matches = append(strategyData.Matches, match) + matchFound = true break } } - if match == false { - res.addMiss() + if matchFound == false { + strategyData.Matches = append(strategyData.Matches, matchJSON{}) } err := strategy.AddHistoryRecord(&record) if err != nil { @@ -139,17 +193,7 @@ func (e *evaluator) evaluate(strategy strategy) error { return err } } - title, description := strategy.GetTitleAndDescription() - n, err := e.writer.WriteString(title + " - " + description + "\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } - // print results - res.printCumulative() - stats.graphCmdFrequencyAsFuncOfRank() + e.data.Strategies = append(e.data.Strategies, strategyData) return nil } diff --git a/evaluate/results.go b/evaluate/results.go deleted file mode 100644 index 5a82aba..0000000 --- a/evaluate/results.go +++ /dev/null @@ -1,99 +0,0 @@ -package main - -import ( - "bufio" - "fmt" - "log" - "math" - "strconv" -) - -type results struct { - writer *bufio.Writer - size int - matches []int // matches[N] -> # of matches at distance N - matchesTotal int - charactersRecalled []int - charactersRecalledTotal int - dataPointCount int -} - -func (r *results) init() { - r.matches = make([]int, r.size) - r.charactersRecalled = make([]int, r.size) -} - -func (r *results) addMatch(distance int, cmdLength int) { - if distance >= r.size { - // --calculate-total - // log.Fatal("Match distance is greater than size of statistics") - r.matchesTotal++ - r.charactersRecalledTotal += cmdLength - return - } - r.matches[distance]++ - r.matchesTotal++ - r.charactersRecalled[distance] += cmdLength - r.charactersRecalledTotal += cmdLength - r.dataPointCount++ -} - -func (r *results) addMiss() { - r.dataPointCount++ -} - -func (r *results) printCumulative() { - matchesPercent := 0.0 - out := "### Matches ###\n" - for i := 0; i < r.size; i++ { - matchesPercent += 100 * float64(r.matches[i]) / float64(r.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" - } - matchesPercent = 100 * float64(r.matchesTotal) / float64(r.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.1f %%)\n", matchesPercent) - for j := 0; j < int(math.Round(matchesPercent)); j++ { - out += "#" - } - out += "\n" - - n, err := r.writer.WriteString(string(out) + "\n\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } - - charsRecall := 0.0 - out = "### Characters recalled per submission ###\n" - for i := 0; i < r.size; i++ { - charsRecall += float64(r.charactersRecalled[i]) / float64(r.dataPointCount) - out += strconv.Itoa(i) + " ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" - } - out += "\n" - } - charsRecall = float64(r.charactersRecalledTotal) / float64(r.dataPointCount) - out += "TOTAL ->" - out += fmt.Sprintf(" (%.2f)\n", charsRecall) - for j := 0; j < int(math.Round(charsRecall)); j++ { - out += "#" - } - out += "\n" - - n, err = r.writer.WriteString(string(out) + "\n\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } -} diff --git a/evaluate/statistics.go b/evaluate/statistics.go deleted file mode 100644 index 0a71857..0000000 --- a/evaluate/statistics.go +++ /dev/null @@ -1,117 +0,0 @@ -package main - -import ( - "bytes" - "io/ioutil" - "log" - "sort" - - "github.com/wcharczuk/go-chart" -) - -type statistics struct { - //size int - dataPointCount int - cmdLineCount map[string]int -} - -func (s *statistics) init() { - s.cmdLineCount = make(map[string]int) -} - -func (s *statistics) addCmdLine(cmdLine string, cmdLength int) { - s.cmdLineCount[cmdLine]++ - s.dataPointCount++ -} - -func (s *statistics) graphCmdFrequencyAsFuncOfRank() { - - var xValues []float64 - var yValues []float64 - - sortedValues := sortMapByvalue(s.cmdLineCount) - sortedValues = sortedValues[:100] // cut off at rank 100 - - normalizeCoeficient := float64(s.dataPointCount) / float64(sortedValues[0].Value) - for i, pair := range sortedValues { - rank := i + 1 - frequency := float64(pair.Value) / float64(s.dataPointCount) - normalizeFrequency := frequency * normalizeCoeficient - - xValues = append(xValues, float64(rank)) - yValues = append(yValues, normalizeFrequency) - } - - graphName := "cmdFrqAsFuncOfRank" - graph := chart.Chart{ - XAxis: chart.XAxis{ - Style: chart.StyleShow(), //enables / displays the x-axis - Ticks: []chart.Tick{ - {0.0, "0"}, - {1.0, "1"}, - {2.0, "2"}, - {3.0, "3"}, - {4.0, "4"}, - {5.0, "5"}, - {10.0, "10"}, - {15.0, "15"}, - {20.0, "20"}, - {25.0, "25"}, - {30.0, "30"}, - {35.0, "35"}, - {40.0, "40"}, - {45.0, "45"}, - {50.0, "50"}, - }, - }, - YAxis: chart.YAxis{ - AxisType: chart.YAxisSecondary, - Style: chart.StyleShow(), //enables / displays the y-axis - }, - Series: []chart.Series{ - chart.ContinuousSeries{ - Style: chart.Style{ - Show: true, - StrokeColor: chart.GetDefaultColor(0).WithAlpha(64), - FillColor: chart.GetDefaultColor(0).WithAlpha(64), - DotColor: chart.GetDefaultColor(0), - DotWidth: 3.0, - }, - XValues: xValues, - YValues: yValues, - }, - }, - } - - buffer := bytes.NewBuffer([]byte{}) - err := graph.Render(chart.PNG, buffer) - if err != nil { - log.Fatal("chart.Render error:", err) - } - ioutil.WriteFile("/tmp/resh-graph_"+graphName+".png", buffer.Bytes(), 0644) -} - -func sortMapByvalue(input map[string]int) []Pair { - p := make(PairList, len(input)) - - i := 0 - for k, v := range input { - p[i] = Pair{k, v} - i++ - } - sort.Sort(sort.Reverse(p)) - return p -} - -// Pair - A data structure to hold key/value pairs -type Pair struct { - Key string - Value int -} - -// PairList - A slice of pairs that implements sort.Interface to sort by values -type PairList []Pair - -func (p PairList) Len() int { return len(p) } -func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value } diff --git a/version b/version index 524cb55..45a1b3f 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.1.1 +1.1.2 From 29b449a9e3f71e957213938324da301225382760 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 10 Sep 2019 02:49:13 +0200 Subject: [PATCH 24/41] minor tweeks because I hate sleep and because I hate future myself --- evaluate/resh-evaluate-plot.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 8999e0b..9197906 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -23,19 +23,22 @@ for record in data["Records"]: cmd_count[record["firstWord"]] += 1 cmdLine_count[record["cmdLine"]] += 1 +cmdTmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:50] +cmdFrq = list(map(lambda x: x[1] / cmdTmp[0][1], cmdTmp)) -cmdFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmd_count.items(), key=lambda x: x[1], reverse=True))) -cmdLineFrq = list(map(lambda x: x[1] / len(data["Records"]), sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True))) +cmdLineTmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:50] +cmdLineFrq = list(map(lambda x: x[1] / cmdLineTmp[0][1], cmdLineTmp)) print(cmdFrq) print("#################") #print(cmdLineFrq_rank) -plt.plot(range(1, len(cmdFrq)+1), cmdFrq) +plt.plot(range(1, len(cmdFrq)+1), cmdFrq, 'o-') plt.title("Command frequency") +plt.yticks() #plt.xticks(range(1, len(cmdFrq)+1)) plt.show() -plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq) +plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq, 'o-') plt.title("Commandline frequency") plt.show() \ No newline at end of file From 7ca44f917739f81bb42ea0f6ff5424ef02d71977 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 10 Sep 2019 22:58:25 +0200 Subject: [PATCH 25/41] command sequence graph draft --- common/resh-common.go | 14 +++ evaluate/resh-evaluate-plot.py | 199 +++++++++++++++++++++++++++++---- evaluate/resh-evaluate.go | 12 +- 3 files changed, 191 insertions(+), 34 deletions(-) diff --git a/common/resh-common.go b/common/resh-common.go index 481486d..e23fe52 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -75,6 +75,7 @@ type Record struct { // enriching fields - added "later" FirstWord string `json:"firstWord"` + Invalid bool `json:"invalid"` } // FallbackRecord when record is too old and can't be parsed into regular Record @@ -212,6 +213,19 @@ func ConvertRecord(r *FallbackRecord) Record { func (r *Record) Enrich() { // Get command/first word from commandline r.FirstWord = GetCommandFromCommandLine(r.CmdLine) + err := r.Validate() + if err != nil { + log.Println("Invalid command:", r.CmdLine) + r.Invalid = true + } + r.Invalid = false + // TODO: Detect and mark simple commands r.Simple +} + +// Validate - returns error if the record is invalid +func (r *Record) Validate() error { + + return nil } // GetCommandFromCommandLine func diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 9197906..5b8b92e 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -6,39 +6,192 @@ from collections import defaultdict import matplotlib.pyplot as plt import matplotlib.path as mpath import numpy as np +from graphviz import Digraph +PLOT_WIDTH = 10 # inches +PLOT_HEIGHT = 7 # inches -def addRank(data): - return list(enumerate(data, start=1)) - +PLOT_SIZE_zipf = 20 data = json.load(sys.stdin) # for strategy in data["Strategies"]: # print(json.dumps(strategy)) -cmd_count = defaultdict(int) -cmdLine_count = defaultdict(int) -for record in data["Records"]: - cmd_count[record["firstWord"]] += 1 - cmdLine_count[record["cmdLine"]] += 1 +def zipf(length): + return list(map(lambda x: 1/2**x, range(0, length))) + + +def trim(text, length, add_elipse=True): + if add_elipse and len(text) > length: + return text[:length-1] + "…" + return text[:length] + + +# Figure 3.1. The normalized command frequency, compared with Zipf. +def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): + cmdLine_count = defaultdict(int) + for record in data["Records"]: + if record["invalid"]: + continue + + cmdLine_count[record["cmdLine"]] += 1 + + tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] + cmdLineFrq = list(map(lambda x: x[1] / tmp[0][1], tmp)) + labels = list(map(lambda x: trim(x[0], 7), tmp)) + + ranks = range(1, len(cmdLineFrq)+1) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(ranks, zipf(len(ranks)), '-') + plt.plot(ranks, cmdLineFrq, 'o-') + plt.title("Commandline frequency / rank") + plt.ylabel("Normalized commandline frequency") + plt.xlabel("Commandline rank") + plt.legend(("Zipf", "Commandline"), loc="best") + if show_labels: + plt.xticks(ranks, labels, rotation=-60) + # TODO: make xticks integral + plt.show() + + +# similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf. +def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): + cmd_count = defaultdict(int) + for record in data["Records"]: + if record["invalid"]: + continue + + cmd = record["firstWord"] + if cmd == "": + continue + cmd_count[cmd] += 1 + + tmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] + cmdFrq = list(map(lambda x: x[1] / tmp[0][1], tmp)) + labels = list(map(lambda x: trim(x[0], 7), tmp)) + + ranks = range(1, len(cmdFrq)+1) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(ranks, zipf(len(ranks)), 'o-') + plt.plot(ranks, cmdFrq, 'o-') + plt.title("Command frequency / rank") + plt.ylabel("Normalized command frequency") + plt.xlabel("Command rank") + plt.legend(("Zipf", "Command"), loc="best") + if show_labels: + plt.xticks(ranks, labels, rotation=-60) + # TODO: make xticks integral + plt.show() + +# Figure 3.2. Command vocabulary size vs. the number of command lines entered for four individuals. +def plot_cmdVocabularySize_cmdLinesEntered(): + cmd_vocabulary = set() + y_cmd_count = [0] + for record in data["Records"]: + if record["invalid"]: + continue + + cmd = record["firstWord"] + if cmd in cmd_vocabulary: + # repeat last value + y_cmd_count.append(y_cmd_count[-1]) + else: + cmd_vocabulary.add(cmd) + # append last value +1 + y_cmd_count.append(y_cmd_count[-1] + 1) + + print(cmd_vocabulary) + x_cmds_entered = range(0, len(y_cmd_count)) + + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_cmds_entered, y_cmd_count, '-') + plt.title("Command vocabulary size vs. the number of command lines entered") + plt.ylabel("Command vocabulary size") + plt.xlabel("# of command lines entered") + plt.show() + +# Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). +# Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, +# solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). +def graphviz_cmdSequences(cmd_displayTreshold=20, edge_displayTreshold=0.03): + cmd_count = defaultdict(int) + cmdSeq_count = defaultdict(lambda: defaultdict(int)) + cmd_id = dict() + prev_cmd = "_SESSION_INIT_" # XXX: not actually session init yet + cmd_id[prev_cmd] = str(-1) + for x, record in enumerate(data["Records"]): + if record["invalid"]: + continue + + cmd = record["firstWord"] + cmdSeq_count[prev_cmd][cmd] += 1 + cmd_count[cmd] += 1 + cmd_id[cmd] = str(x) + prev_cmd = cmd + + dot = Digraph(comment="Command sequences", graph_attr={'overlap':'scale', 'splines':'true'}) + + # for cmd_entry in cmdSeq_count.items(): + # cmd, seq = cmd_entry + + # if cmd_count[cmd] < cmd_displayTreshold: + # continue + # + # dot.node(cmd_id[cmd], cmd) + + for cmd_entry in cmdSeq_count.items(): + cmd, seq = cmd_entry + + count = cmd_count[cmd] + if count < cmd_displayTreshold: + continue + + for seq_entry in seq.items(): + cmd2, seq_count = seq_entry + relative_seq_count = seq_count / count + + if cmd_count[cmd2] < cmd_displayTreshold: + continue + if relative_seq_count < edge_displayTreshold: + continue + + for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)): + count_ = cmd_count[cmd_] + scale_ = count_ / (cmd_displayTreshold) + width_ = str(0.08*scale_) + fontsize_ = str(1*scale_) + if scale_ < 12: + dot.node(id_, '', shape='circle', fixedsize='true', fontname='bold', + width=width_, fontsize='12', forcelabels='true', xlabel=cmd_) + else: + dot.node(id_, cmd_, shape='circle', fixedsize='true', fontname='bold', + width=width_, fontsize=fontsize_, forcelabels='true') -cmdTmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:50] -cmdFrq = list(map(lambda x: x[1] / cmdTmp[0][1], cmdTmp)) + + # 1.0 is max + scale_ = seq_count / cmd_count[cmd] + penwidth_ = str(0.5 + 4.5 * scale_) + #penwidth_bold_ = str(8 * scale_) + if scale_ > 0.5: + dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, style='bold') + elif scale_ > 0.2: + dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, arrowhead='open') + elif scale_ > 0.1: + dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, style='dashed', arrowhead='open') + else: + dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, style='dotted', arrowhead='empty') -cmdLineTmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:50] -cmdLineFrq = list(map(lambda x: x[1] / cmdLineTmp[0][1], cmdLineTmp)) + dot.render('/tmp/resh-graphviz-cmdSeq.gv', view=False) -print(cmdFrq) -print("#################") -#print(cmdLineFrq_rank) +graphviz_cmdSequences() +# plot_cmdVocabularySize_cmdLinesEntered() +# plot_cmdLineFrq_rank() +# plot_cmdFrq_rank() -plt.plot(range(1, len(cmdFrq)+1), cmdFrq, 'o-') -plt.title("Command frequency") -plt.yticks() -#plt.xticks(range(1, len(cmdFrq)+1)) -plt.show() -plt.plot(range(1, len(cmdLineFrq)+1), cmdLineFrq, 'o-') -plt.title("Commandline frequency") -plt.show() \ No newline at end of file +# be careful and check if labels fit the display \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 30c5a9f..3917a8b 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -33,7 +33,7 @@ func main() { inputPath := flag.String("input", "", "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ " depending on --sanitized-input option)") - outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") + // outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") sanitizedInput := flag.Bool("sanitized-input", false, "Handle input as sanitized (also changes default value for input argument)") plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") @@ -80,15 +80,6 @@ func main() { } // evaluator.dumpJSON(tmpPath) - // run python script to stat and plot/ - cmd := exec.Command("echo", *outputDir) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - log.Printf("") - err = cmd.Run() - if err != nil { - log.Printf("Command finished with error: %v", err) - } evaluator.calculateStatsAndPlot(*plottingScript) } @@ -141,7 +132,6 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Stdin = &buffer - log.Printf("...") err = cmd.Run() if err != nil { log.Printf("Command finished with error: %v", err) From 00ff511915a43930c1102e1e3e25567e19d24024 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Tue, 10 Sep 2019 23:29:56 +0200 Subject: [PATCH 26/41] minor changes --- evaluate/resh-evaluate-plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 5b8b92e..b6a01e6 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -114,7 +114,7 @@ def plot_cmdVocabularySize_cmdLinesEntered(): # Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, # solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). -def graphviz_cmdSequences(cmd_displayTreshold=20, edge_displayTreshold=0.03): +def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): cmd_count = defaultdict(int) cmdSeq_count = defaultdict(lambda: defaultdict(int)) cmd_id = dict() @@ -130,7 +130,7 @@ def graphviz_cmdSequences(cmd_displayTreshold=20, edge_displayTreshold=0.03): cmd_id[cmd] = str(x) prev_cmd = cmd - dot = Digraph(comment="Command sequences", graph_attr={'overlap':'scale', 'splines':'true'}) + dot = Digraph(comment="Command sequences", graph_attr={'overlap':'scale', 'splines':'true', 'sep':'0.25'}) # for cmd_entry in cmdSeq_count.items(): # cmd, seq = cmd_entry From 29d8a5b5c00e7a5a424bf4e5311a8f9c2bd9d2b8 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 11 Sep 2019 00:42:54 +0200 Subject: [PATCH 27/41] plots for cumulative recurrence rate and characters saved --- evaluate/resh-evaluate-plot.py | 80 +++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index b6a01e6..d5ae737 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -188,7 +188,85 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): dot.render('/tmp/resh-graphviz-cmdSeq.gv', view=False) -graphviz_cmdSequences() +def plot_strategy_recency(): + recent = None + for strategy in data["Strategies"]: + if strategy["Title"] != "recent": + continue + recent = strategy + break + + assert(recent is not None) + + size = 50 + + dataPoint_count = 0 + matches = [0] * size + matches_total = 0 + charsRecalled = [0] * size + charsRecalled_total = 0 + + for match in recent["Matches"]: + dataPoint_count += 1 + + if not match["Match"]: + continue + + chars = match["CharsRecalled"] + charsRecalled_total += chars + matches_total += 1 + + dist = match["Distance"] + if dist > size: + continue + + matches[dist-1] += 1 + charsRecalled[dist-1] += chars + + x_values = range(1, size+2) + x_ticks = list(range(1, size+1, 2)) + x_labels = x_ticks[:] + x_ticks.append(size+1) + x_labels.append("total") + + acc = 0 + matches_cumulative = [] + for x in matches: + acc += x + matches_cumulative.append(acc) + matches_cumulative.append(matches_total) + matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative)) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_values, matches_percent, 'o-') + plt.title("Matches at distance") + plt.ylabel('%' + " of matches") + plt.xlabel("Distance") + plt.xticks(x_ticks, x_labels) + #plt.legend(("Zipf", "Command"), loc="best") + plt.show() + + acc = 0 + charsRecalled_cumulative = [] + for x in charsRecalled: + acc += x + charsRecalled_cumulative.append(acc) + charsRecalled_cumulative.append(charsRecalled_total) + charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_values, charsRecalled_average, 'o-') + plt.title("Average characters recalled at distance") + plt.ylabel("Average characters recalled") + plt.xlabel("Distance") + plt.xticks(x_ticks, x_labels) + #plt.legend(("Zipf", "Command"), loc="best") + plt.show() + + + + +plot_strategy_recency() + +# graphviz_cmdSequences() # plot_cmdVocabularySize_cmdLinesEntered() # plot_cmdLineFrq_rank() # plot_cmdFrq_rank() From 7cfc4f579a31ec920192f3b38a7b151e924e6b9e Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 11 Sep 2019 16:53:08 +0200 Subject: [PATCH 28/41] switch graphviz from dot to neato --- evaluate/resh-evaluate-plot.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index d5ae737..ef834b0 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -130,7 +130,7 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): cmd_id[cmd] = str(x) prev_cmd = cmd - dot = Digraph(comment="Command sequences", graph_attr={'overlap':'scale', 'splines':'true', 'sep':'0.25'}) + graph = Digraph(engine='neato', graph_attr={'overlap':'scale', 'overlap_shrink':'true', 'splines':'true', 'sep':'0.25'}) # for cmd_entry in cmdSeq_count.items(): # cmd, seq = cmd_entry @@ -138,7 +138,7 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): # if cmd_count[cmd] < cmd_displayTreshold: # continue # - # dot.node(cmd_id[cmd], cmd) + # graph.node(cmd_id[cmd], cmd) for cmd_entry in cmdSeq_count.items(): cmd, seq = cmd_entry @@ -162,10 +162,10 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): width_ = str(0.08*scale_) fontsize_ = str(1*scale_) if scale_ < 12: - dot.node(id_, '', shape='circle', fixedsize='true', fontname='bold', + graph.node(id_, '', shape='circle', fixedsize='true', fontname='bold', width=width_, fontsize='12', forcelabels='true', xlabel=cmd_) else: - dot.node(id_, cmd_, shape='circle', fixedsize='true', fontname='bold', + graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='bold', width=width_, fontsize=fontsize_, forcelabels='true') @@ -174,19 +174,20 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): penwidth_ = str(0.5 + 4.5 * scale_) #penwidth_bold_ = str(8 * scale_) if scale_ > 0.5: - dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', penwidth=penwidth_, style='bold') elif scale_ > 0.2: - dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', penwidth=penwidth_, arrowhead='open') elif scale_ > 0.1: - dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', penwidth=penwidth_, style='dashed', arrowhead='open') else: - dot.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', penwidth=penwidth_, style='dotted', arrowhead='empty') - dot.render('/tmp/resh-graphviz-cmdSeq.gv', view=False) + graph.view() + # graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True) def plot_strategy_recency(): recent = None @@ -264,9 +265,9 @@ def plot_strategy_recency(): -plot_strategy_recency() +# plot_strategy_recency() -# graphviz_cmdSequences() +graphviz_cmdSequences() # plot_cmdVocabularySize_cmdLinesEntered() # plot_cmdLineFrq_rank() # plot_cmdFrq_rank() From 050af919dc6d6f36be2e0b60ae047b5a3da6d595 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 11 Sep 2019 19:37:46 +0200 Subject: [PATCH 29/41] polish graphviz command sequences --- evaluate/resh-evaluate-plot.py | 152 ++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 59 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index ef834b0..cfb7624 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 + +import traceback import sys import json from collections import defaultdict @@ -111,14 +113,15 @@ def plot_cmdVocabularySize_cmdLinesEntered(): plt.xlabel("# of command lines entered") plt.show() + # Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, # solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). -def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): +def graph_cmdSequences(node_count=33, edge_minValue=0.05): cmd_count = defaultdict(int) cmdSeq_count = defaultdict(lambda: defaultdict(int)) cmd_id = dict() - prev_cmd = "_SESSION_INIT_" # XXX: not actually session init yet + prev_cmd = "" # XXX: not actually session init yet cmd_id[prev_cmd] = str(-1) for x, record in enumerate(data["Records"]): if record["invalid"]: @@ -130,64 +133,95 @@ def graphviz_cmdSequences(cmd_displayTreshold=28, edge_displayTreshold=0.05): cmd_id[cmd] = str(x) prev_cmd = cmd - graph = Digraph(engine='neato', graph_attr={'overlap':'scale', 'overlap_shrink':'true', 'splines':'true', 'sep':'0.25'}) - - # for cmd_entry in cmdSeq_count.items(): - # cmd, seq = cmd_entry - - # if cmd_count[cmd] < cmd_displayTreshold: - # continue - # - # graph.node(cmd_id[cmd], cmd) - - for cmd_entry in cmdSeq_count.items(): - cmd, seq = cmd_entry + # get `node_count` of largest nodes + sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True) + cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count] + + # use 3 biggest nodes as a reference point for scaling + biggest_node = cmd_count[cmds_to_graph[0]] + nd_biggest_node = cmd_count[cmds_to_graph[1]] + rd_biggest_node = cmd_count[cmds_to_graph[1]] + count2scale_coef = 3 / (biggest_node + nd_biggest_node + rd_biggest_node) + + # scaling constant + # affects node size and node label + base_scaling_factor = 21 + # extra scaling for experiments - not really useful imho + # affects everything nodes, edges, node labels, treshold for turning label into xlabel, xlabel size, ... + extra_scaling_factor = 1.0 + for x in range(0, 10): + # graphviz is not the most reliable piece of software + # -> retry on fail but scale nodes down by 1% + scaling_factor = base_scaling_factor * (1 - x * 0.01) + + # overlap: scale -> solve overlap by scaling the graph + # overlap_shrink -> try to shrink the graph a bit after you are done + # splines -> don't draw edges over nodes + # sep: 2.5 -> assume that nodes are 2.5 inches larger + graph_attr={'overlap':'scale', 'overlap_shrink':'true', + 'splines':'true', 'sep':'0.25'} + graph = Digraph(name='command_sequentiality', engine='neato', graph_attr=graph_attr) + + # iterate over all nodes + for cmd in cmds_to_graph: + seq = cmdSeq_count[cmd] + count = cmd_count[cmd] + + # iterate over all "following" commands (for each node) + for seq_entry in seq.items(): + cmd2, seq_count = seq_entry + relative_seq_count = seq_count / count + + # check if "follow" command is supposed to be in the graph + if cmd2 not in cmds_to_graph: + continue + # check if the edge value is high enough + if relative_seq_count < edge_minValue: + continue + + # create starting node and end node for the edge + # duplicates don't matter + for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)): + count_ = cmd_count[cmd_] + scale_ = count_ * count2scale_coef * scaling_factor * extra_scaling_factor + width_ = 0.08 * scale_ + fontsize_ = 8.5 * scale_ / (len(cmd_) + 3) + + width_ = str(width_) + if fontsize_ < 12 * extra_scaling_factor: + graph.node(id_, ' ', shape='circle', fixedsize='true', fontname='monospace bold', + width=width_, fontsize=str(12 * extra_scaling_factor), forcelabels='true', xlabel=cmd_) + else: + fontsize_ = str(fontsize_) + graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='monospace bold', + width=width_, fontsize=fontsize_, forcelabels='true', labelloc='c') + + # value of the edge (percentage) 1.0 is max + scale_ = seq_count / cmd_count[cmd] + penwidth_ = str((0.5 + 4.5 * scale_) * extra_scaling_factor) + #penwidth_bold_ = str(8 * scale_) + if scale_ > 0.5: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, style='bold') + elif scale_ > 0.2: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, arrowhead='open') + elif scale_ > 0.1: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, style='dashed', arrowhead='open') + else: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, style='dotted', arrowhead='empty') - count = cmd_count[cmd] - if count < cmd_displayTreshold: - continue + # graphviz sometimes fails - see above + try: + graph.view() + # graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True) + break + except Exception as e: + trace = traceback.format_exc() + print("GRAPHVIZ EXCEPTION: <{}>\nGRAPHVIZ TRACE: <{}>".format(str(e), trace)) - for seq_entry in seq.items(): - cmd2, seq_count = seq_entry - relative_seq_count = seq_count / count - - if cmd_count[cmd2] < cmd_displayTreshold: - continue - if relative_seq_count < edge_displayTreshold: - continue - - for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)): - count_ = cmd_count[cmd_] - scale_ = count_ / (cmd_displayTreshold) - width_ = str(0.08*scale_) - fontsize_ = str(1*scale_) - if scale_ < 12: - graph.node(id_, '', shape='circle', fixedsize='true', fontname='bold', - width=width_, fontsize='12', forcelabels='true', xlabel=cmd_) - else: - graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='bold', - width=width_, fontsize=fontsize_, forcelabels='true') - - - # 1.0 is max - scale_ = seq_count / cmd_count[cmd] - penwidth_ = str(0.5 + 4.5 * scale_) - #penwidth_bold_ = str(8 * scale_) - if scale_ > 0.5: - graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', - penwidth=penwidth_, style='bold') - elif scale_ > 0.2: - graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', - penwidth=penwidth_, arrowhead='open') - elif scale_ > 0.1: - graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', - penwidth=penwidth_, style='dashed', arrowhead='open') - else: - graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', - penwidth=penwidth_, style='dotted', arrowhead='empty') - - graph.view() - # graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True) def plot_strategy_recency(): recent = None @@ -267,7 +301,7 @@ def plot_strategy_recency(): # plot_strategy_recency() -graphviz_cmdSequences() +graph_cmdSequences(node_count=28, edge_minValue=0.06) # plot_cmdVocabularySize_cmdLinesEntered() # plot_cmdLineFrq_rank() # plot_cmdFrq_rank() From abb786c4788102c45fbd799f03517da9b6d11d74 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 11 Sep 2019 22:50:21 +0200 Subject: [PATCH 30/41] add grouping by session (pid), add batch mode --- common/resh-common.go | 1 - evaluate/resh-evaluate-plot.py | 71 +++++++++------ evaluate/resh-evaluate.go | 162 ++++++++++++++++++++++++++------- 3 files changed, 171 insertions(+), 63 deletions(-) diff --git a/common/resh-common.go b/common/resh-common.go index e23fe52..69fb7cf 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -224,7 +224,6 @@ func (r *Record) Enrich() { // Validate - returns error if the record is invalid func (r *Record) Validate() error { - return nil } diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index cfb7624..c82db98 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -16,6 +16,24 @@ PLOT_HEIGHT = 7 # inches PLOT_SIZE_zipf = 20 data = json.load(sys.stdin) + +DATA_records = [] +DATA_records_by_session = defaultdict(list) +for user in data["UsersRecords"]: + for device in user["Devices"]: + for record in device["Records"]: + if record["invalid"]: + continue + + DATA_records.append(record) + DATA_records_by_session[record["sessionPid"]].append(record) + +DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeBeforeLocal"])) + +for pid, session in DATA_records_by_session.items(): + session = list(sorted(session, key=lambda x: x["realtimeBeforeLocal"])) + + # for strategy in data["Strategies"]: # print(json.dumps(strategy)) @@ -33,10 +51,7 @@ def trim(text, length, add_elipse=True): # Figure 3.1. The normalized command frequency, compared with Zipf. def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): cmdLine_count = defaultdict(int) - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmdLine_count[record["cmdLine"]] += 1 tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] @@ -60,10 +75,7 @@ def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): # similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf. def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): cmd_count = defaultdict(int) - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmd = record["firstWord"] if cmd == "": continue @@ -90,10 +102,7 @@ def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): def plot_cmdVocabularySize_cmdLinesEntered(): cmd_vocabulary = set() y_cmd_count = [0] - for record in data["Records"]: - if record["invalid"]: - continue - + for record in DATA_records: cmd = record["firstWord"] if cmd in cmd_vocabulary: # repeat last value @@ -103,7 +112,7 @@ def plot_cmdVocabularySize_cmdLinesEntered(): # append last value +1 y_cmd_count.append(y_cmd_count[-1] + 1) - print(cmd_vocabulary) + # print(cmd_vocabulary) x_cmds_entered = range(0, len(y_cmd_count)) plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) @@ -118,23 +127,27 @@ def plot_cmdVocabularySize_cmdLinesEntered(): # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, # solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). def graph_cmdSequences(node_count=33, edge_minValue=0.05): + START_CMD = "_start_" cmd_count = defaultdict(int) cmdSeq_count = defaultdict(lambda: defaultdict(int)) cmd_id = dict() - prev_cmd = "" # XXX: not actually session init yet - cmd_id[prev_cmd] = str(-1) - for x, record in enumerate(data["Records"]): - if record["invalid"]: - continue - - cmd = record["firstWord"] - cmdSeq_count[prev_cmd][cmd] += 1 - cmd_count[cmd] += 1 - cmd_id[cmd] = str(x) - prev_cmd = cmd + x = 0 + cmd_id[START_CMD] = str(x) + for pid, session in DATA_records_by_session.items(): + cmd_count[START_CMD] += 1 + prev_cmd = START_CMD + for record in session: + cmd = record["firstWord"] + cmdSeq_count[prev_cmd][cmd] += 1 + cmd_count[cmd] += 1 + if cmd not in cmd_id: + x += 1 + cmd_id[cmd] = str(x) + prev_cmd = cmd # get `node_count` of largest nodes sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True) + print(sorted_cmd_count) cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count] # use 3 biggest nodes as a reference point for scaling @@ -298,13 +311,15 @@ def plot_strategy_recency(): +# plot_cmdLineFrq_rank() +# plot_cmdFrq_rank() +# plot_cmdVocabularySize_cmdLinesEntered() + # plot_strategy_recency() -graph_cmdSequences(node_count=28, edge_minValue=0.06) -# plot_cmdVocabularySize_cmdLinesEntered() -# plot_cmdLineFrq_rank() -# plot_cmdFrq_rank() +graph_cmdSequences() +# graph_cmdSequences(node_count=28, edge_minValue=0.06) # be careful and check if labels fit the display \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 3917a8b..07bb34c 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -6,6 +6,7 @@ import ( "encoding/json" "flag" "fmt" + "io/ioutil" "log" "os" "os/exec" @@ -25,30 +26,25 @@ func main() { usr, _ := user.Current() dir := usr.HomeDir historyPath := filepath.Join(dir, ".resh_history.json") + historyPathBatchMode := filepath.Join(dir, "resh_history.json") sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") // tmpPath := "/tmp/resh-evaluate-tmp.json" showVersion := flag.Bool("version", false, "Show version and exit") showRevision := flag.Bool("revision", false, "Show git revision and exit") - inputPath := flag.String("input", "", + input := flag.String("input", "", "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ " depending on --sanitized-input option)") // outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") sanitizedInput := flag.Bool("sanitized-input", false, "Handle input as sanitized (also changes default value for input argument)") plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") + inputDataRoot := flag.String("input-data-root", "", + "Input data root, enables batch mode, looks for files matching --input option") flag.Parse() - // set default input - if *inputPath == "" { - if *sanitizedInput { - *inputPath = sanitizedHistoryPath - } else { - *inputPath = historyPath - } - } - + // handle show{Version,Revision} options if *showVersion == true { fmt.Println(Version) os.Exit(0) @@ -58,10 +54,33 @@ func main() { os.Exit(0) } - evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50} - err := evaluator.init(*inputPath) - if err != nil { - log.Fatal("Evaluator init() error:", err) + // handle batch mode + batchMode := false + if *inputDataRoot != "" { + batchMode = true + } + // set default input + if *input == "" { + if *sanitizedInput { + *input = sanitizedHistoryPath + } else if batchMode { + *input = historyPathBatchMode + } else { + *input = historyPath + } + } + + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode} + if batchMode { + err := evaluator.initBatchMode(*input, *inputDataRoot) + if err != nil { + log.Fatal("Evaluator initBatchMode() error:", err) + } + } else { + err := evaluator.init(*input) + if err != nil { + log.Fatal("Evaluator init() error:", err) + } } var strategies []strategy @@ -73,12 +92,11 @@ func main() { strategies = append(strategies, &recent) for _, strat := range strategies { - err = evaluator.evaluate(strat) + err := evaluator.evaluate(strat) if err != nil { log.Println("Evaluator evaluate() error:", err) } } - // evaluator.dumpJSON(tmpPath) evaluator.calculateStatsAndPlot(*plottingScript) } @@ -102,26 +120,42 @@ type strategyJSON struct { Matches []matchJSON } -type evaluateJSON struct { - Strategies []strategyJSON - Records []common.Record +type deviceRecords struct { + Name string + Records []common.Record +} + +type userRecords struct { + Name string + Devices []deviceRecords } type evaluator struct { sanitizedInput bool + BatchMode bool maxCandidates int - historyRecords []common.Record - data evaluateJSON + UsersRecords []userRecords + Strategies []strategyJSON +} + +func (e *evaluator) initBatchMode(input string, inputDataRoot string) error { + e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot) + e.processRecords() + return nil } func (e *evaluator) init(inputPath string) error { - e.historyRecords = e.loadHistoryRecords(inputPath) + records := e.loadHistoryRecords(inputPath) + device := deviceRecords{Records: records} + user := userRecords{} + user.Devices = append(user.Devices, device) + e.UsersRecords = append(e.UsersRecords, user) e.processRecords() return nil } func (e *evaluator) calculateStatsAndPlot(scriptName string) { - evalJSON, err := json.Marshal(e.data) + evalJSON, err := json.Marshal(e) if err != nil { log.Fatal("json marshal error", err) } @@ -140,25 +174,28 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) { // enrich records and add them to serializable structure func (e *evaluator) processRecords() { - for _, record := range e.historyRecords { + for i := range e.UsersRecords { + for j := range e.UsersRecords[i].Devices { + for k, record := range e.UsersRecords[i].Devices[j].Records { + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + } + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } - // assert - if record.Sanitized != e.sanitizedInput { - if e.sanitizedInput { - log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + e.UsersRecords[i].Devices[j].Records[k].Enrich() + // device.Records = append(device.Records, record) } - log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") } - - record.Enrich() - e.data.Records = append(e.data.Records, record) } } func (e *evaluator) evaluate(strategy strategy) error { title, description := strategy.GetTitleAndDescription() strategyData := strategyJSON{Title: title, Description: description} - for _, record := range e.historyRecords { + for _, record := range e.UsersRecords[0].Devices[0].Records { candidates := strategy.GetCandidates() matchFound := false @@ -183,10 +220,67 @@ func (e *evaluator) evaluate(strategy strategy) error { return err } } - e.data.Strategies = append(e.data.Strategies, strategyData) + e.Strategies = append(e.Strategies, strategyData) return nil } +func (e *evaluator) loadHistoryRecordsBatchMode(fname string, dataRootPath string) []userRecords { + var records []userRecords + info, err := os.Stat(dataRootPath) + if err != nil { + log.Fatal("Error: Directory", dataRootPath, "does not exist - exiting! (", err, ")") + } + if info.IsDir() == false { + log.Fatal("Error:", dataRootPath, "is not a directory - exiting!") + } + users, err := ioutil.ReadDir(dataRootPath) + if err != nil { + log.Fatal("Could not read directory:", dataRootPath) + } + fmt.Println("Listing users in <", dataRootPath, ">...") + for _, user := range users { + userRecords := userRecords{Name: user.Name()} + userFullPath := filepath.Join(dataRootPath, user.Name()) + if user.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", userFullPath, "> - skipping.") + continue + } + fmt.Println() + fmt.Printf("*- %s\n", user.Name()) + devices, err := ioutil.ReadDir(userFullPath) + if err != nil { + log.Fatal("Could not read directory:", userFullPath) + } + for _, device := range devices { + deviceRecords := deviceRecords{Name: device.Name()} + deviceFullPath := filepath.Join(userFullPath, device.Name()) + if device.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", deviceFullPath, "> - skipping.") + continue + } + fmt.Printf(" \\- %s\n", device.Name()) + files, err := ioutil.ReadDir(deviceFullPath) + if err != nil { + log.Fatal("Could not read directory:", deviceFullPath) + } + for _, file := range files { + fileFullPath := filepath.Join(deviceFullPath, file.Name()) + if file.Name() == fname { + fmt.Printf(" \\- %s - loading ...", file.Name()) + // load the data + deviceRecords.Records = e.loadHistoryRecords(fileFullPath) + fmt.Println(" OK ✓") + } else { + fmt.Printf(" \\- %s - skipped\n", file.Name()) + } + } + userRecords.Devices = append(userRecords.Devices, deviceRecords) + } + records = append(records, userRecords) + } + return records +} + func (e *evaluator) loadHistoryRecords(fname string) []common.Record { file, err := os.Open(fname) if err != nil { From ee606b675e5e1cadb88805a9e0e16c12531f3d58 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Wed, 11 Sep 2019 22:53:05 +0200 Subject: [PATCH 31/41] minor fix, enable graphs and plots --- Makefile | 1 + evaluate/resh-evaluate-plot.py | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d5b4bb0..687ad36 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,7 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu cp -f shellrc.sh ~/.resh/shellrc cp -f uuid.sh ~/.resh/bin/resh-uuid cp -f resh-* ~/.resh/bin/ + cp -f evaluate/resh-evaluate-plot.py ~/.resh/bin/ cp -fr sanitizer_data ~/.resh/ # backward compatibility: We have a new location for resh history file [ ! -f ~/.resh/history.json ] || mv ~/.resh/history.json ~/.resh_history.json diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index c82db98..ce476db 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -310,16 +310,14 @@ def plot_strategy_recency(): plt.show() +graph_cmdSequences() +graph_cmdSequences(node_count=28, edge_minValue=0.06) -# plot_cmdLineFrq_rank() +plot_cmdLineFrq_rank() # plot_cmdFrq_rank() -# plot_cmdVocabularySize_cmdLinesEntered() - -# plot_strategy_recency() - -graph_cmdSequences() -# graph_cmdSequences(node_count=28, edge_minValue=0.06) +plot_cmdVocabularySize_cmdLinesEntered() +plot_strategy_recency() # be careful and check if labels fit the display \ No newline at end of file From 367263b28cd0b0a4d4f9b282cca9ab32f836b9bf Mon Sep 17 00:00:00 2001 From: Simon Let Date: Thu, 12 Sep 2019 00:59:16 +0200 Subject: [PATCH 32/41] improve plotting --- evaluate/resh-evaluate-plot.py | 179 +++++++++++++++++++++++---------- 1 file changed, 125 insertions(+), 54 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index ce476db..5ec215b 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -122,6 +122,29 @@ def plot_cmdVocabularySize_cmdLinesEntered(): plt.xlabel("# of command lines entered") plt.show() +# Figure 5.6. Command line vocabulary size vs. the number of commands entered for four typical individuals. +def plot_cmdLineVocabularySize_cmdLinesEntered(): + cmdLine_vocabulary = set() + y_cmdLine_count = [0] + for record in DATA_records: + cmdLine = record["cmdLine"] + if cmdLine in cmdLine_vocabulary: + # repeat last value + y_cmdLine_count.append(y_cmdLine_count[-1]) + else: + cmdLine_vocabulary.add(cmdLine) + # append last value +1 + y_cmdLine_count.append(y_cmdLine_count[-1] + 1) + + # print(cmdLine_vocabulary) + x_cmdLines_entered = range(0, len(y_cmdLine_count)) + + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_cmdLines_entered, y_cmdLine_count, '-') + plt.title("Command line vocabulary size vs. the number of command lines entered") + plt.ylabel("Command line vocabulary size") + plt.xlabel("# of command lines entered") + plt.show() # Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, @@ -236,79 +259,125 @@ def graph_cmdSequences(node_count=33, edge_minValue=0.05): print("GRAPHVIZ EXCEPTION: <{}>\nGRAPHVIZ TRACE: <{}>".format(str(e), trace)) -def plot_strategy_recency(): - recent = None +def plot_strategies_matches(plot_size=50, selected_strategies=[]): + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.title("Matches at distance") + plt.ylabel('%' + " of matches") + plt.xlabel("Distance") + legend = [] for strategy in data["Strategies"]: - if strategy["Title"] != "recent": + strategy_title = strategy["Title"] + # strategy_description = strategy["Description"] + + if len(selected_strategies) and strategy_title not in selected_strategies: continue - recent = strategy - break - assert(recent is not None) + dataPoint_count = 0 + matches = [0] * plot_size + matches_total = 0 + charsRecalled = [0] * plot_size + charsRecalled_total = 0 + + for match in strategy["Matches"]: + dataPoint_count += 1 + + if not match["Match"]: + continue - size = 50 + chars = match["CharsRecalled"] + charsRecalled_total += chars + matches_total += 1 - dataPoint_count = 0 - matches = [0] * size - matches_total = 0 - charsRecalled = [0] * size - charsRecalled_total = 0 - - for match in recent["Matches"]: - dataPoint_count += 1 + dist = match["Distance"] + if dist > plot_size: + continue - if not match["Match"]: - continue + matches[dist-1] += 1 + charsRecalled[dist-1] += chars + - chars = match["CharsRecalled"] - charsRecalled_total += chars - matches_total += 1 + acc = 0 + matches_cumulative = [] + for x in matches: + acc += x + matches_cumulative.append(acc) + matches_cumulative.append(matches_total) + matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative)) - dist = match["Distance"] - if dist > size: - continue + x_values = range(1, plot_size+2) + plt.plot(x_values, matches_percent, 'o-') + legend.append(strategy_title) - matches[dist-1] += 1 - charsRecalled[dist-1] += chars - - x_values = range(1, size+2) - x_ticks = list(range(1, size+1, 2)) + + x_ticks = list(range(1, plot_size+1, 2)) x_labels = x_ticks[:] - x_ticks.append(size+1) + x_ticks.append(plot_size+1) x_labels.append("total") - - acc = 0 - matches_cumulative = [] - for x in matches: - acc += x - matches_cumulative.append(acc) - matches_cumulative.append(matches_total) - matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative)) - plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) - plt.plot(x_values, matches_percent, 'o-') - plt.title("Matches at distance") - plt.ylabel('%' + " of matches") - plt.xlabel("Distance") plt.xticks(x_ticks, x_labels) - #plt.legend(("Zipf", "Command"), loc="best") + plt.legend(legend, loc="best") plt.show() - acc = 0 - charsRecalled_cumulative = [] - for x in charsRecalled: - acc += x - charsRecalled_cumulative.append(acc) - charsRecalled_cumulative.append(charsRecalled_total) - charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) + + +def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) - plt.plot(x_values, charsRecalled_average, 'o-') plt.title("Average characters recalled at distance") plt.ylabel("Average characters recalled") plt.xlabel("Distance") + legend = [] + for strategy in data["Strategies"]: + strategy_title = strategy["Title"] + # strategy_description = strategy["Description"] + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue + + dataPoint_count = 0 + matches = [0] * plot_size + matches_total = 0 + charsRecalled = [0] * plot_size + charsRecalled_total = 0 + + for match in strategy["Matches"]: + dataPoint_count += 1 + + if not match["Match"]: + continue + + chars = match["CharsRecalled"] + charsRecalled_total += chars + matches_total += 1 + + dist = match["Distance"] + if dist > plot_size: + continue + + matches[dist-1] += 1 + charsRecalled[dist-1] += chars + + + acc = 0 + charsRecalled_cumulative = [] + for x in charsRecalled: + acc += x + charsRecalled_cumulative.append(acc) + charsRecalled_cumulative.append(charsRecalled_total) + charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) + + x_values = range(1, plot_size+2) + plt.plot(x_values, charsRecalled_average, 'o-') + legend.append(strategy_title) + + + x_ticks = list(range(1, plot_size+1, 2)) + x_labels = x_ticks[:] + x_ticks.append(plot_size+1) + x_labels.append("total") plt.xticks(x_ticks, x_labels) - #plt.legend(("Zipf", "Command"), loc="best") + plt.legend(legend, loc="best") plt.show() + graph_cmdSequences() graph_cmdSequences(node_count=28, edge_minValue=0.06) @@ -316,8 +385,10 @@ graph_cmdSequences(node_count=28, edge_minValue=0.06) plot_cmdLineFrq_rank() # plot_cmdFrq_rank() -plot_cmdVocabularySize_cmdLinesEntered() +plot_cmdLineVocabularySize_cmdLinesEntered() +# plot_cmdVocabularySize_cmdLinesEntered() -plot_strategy_recency() +plot_strategies_matches() +plot_strategies_charsRecalled() # be careful and check if labels fit the display \ No newline at end of file From 67ab2ffaef2046988a28cba6fb51d7d62b1444c0 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Thu, 12 Sep 2019 02:29:50 +0200 Subject: [PATCH 33/41] add strategies: frequent, directory sensitive; sort records by session and time --- common/resh-common.go | 5 +- evaluate/resh-evaluate-plot.py | 62 ++++++++++++++++-------- evaluate/resh-evaluate.go | 26 +++++++++- evaluate/strategy-directory-sensitive.go | 42 ++++++++++++++++ evaluate/strategy-frequent.go | 47 ++++++++++++++++++ evaluate/strategy-recent.go | 1 + 6 files changed, 159 insertions(+), 24 deletions(-) create mode 100644 evaluate/strategy-directory-sensitive.go create mode 100644 evaluate/strategy-frequent.go diff --git a/common/resh-common.go b/common/resh-common.go index 69fb7cf..1499595 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -74,8 +74,9 @@ type Record struct { CmdLength int `json:"cmdLength"` // enriching fields - added "later" - FirstWord string `json:"firstWord"` - Invalid bool `json:"invalid"` + FirstWord string `json:"firstWord"` + Invalid bool `json:"invalid"` + SeqSessionID uint64 `json:"seqSessionID"` } // FallbackRecord when record is too old and can't be parsed into regular Record diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 5ec215b..9f27347 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -26,7 +26,7 @@ for user in data["UsersRecords"]: continue DATA_records.append(record) - DATA_records_by_session[record["sessionPid"]].append(record) + DATA_records_by_session[record["sessionId"]].append(record) DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeBeforeLocal"])) @@ -265,6 +265,9 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]): plt.ylabel('%' + " of matches") plt.xlabel("Distance") legend = [] + x_values = range(1, plot_size+1) + saved_matches_total = None + saved_dataPoint_count = None for strategy in data["Strategies"]: strategy_title = strategy["Title"] # strategy_description = strategy["Description"] @@ -295,24 +298,34 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]): matches[dist-1] += 1 charsRecalled[dist-1] += chars + # recent is very simple strategy so we will believe + # that there is no bug in it and we can use it to determine total + if strategy_title == "recent": + saved_matches_total = matches_total + saved_dataPoint_count = dataPoint_count + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue acc = 0 matches_cumulative = [] for x in matches: acc += x matches_cumulative.append(acc) - matches_cumulative.append(matches_total) + # matches_cumulative.append(matches_total) matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative)) - x_values = range(1, plot_size+2) plt.plot(x_values, matches_percent, 'o-') legend.append(strategy_title) + assert(saved_matches_total is not None) + assert(saved_dataPoint_count is not None) + max_values = [100 * saved_matches_total / saved_dataPoint_count] * len(x_values) + plt.plot(x_values, max_values, 'r-') + legend.append("maximum possible") x_ticks = list(range(1, plot_size+1, 2)) x_labels = x_ticks[:] - x_ticks.append(plot_size+1) - x_labels.append("total") plt.xticks(x_ticks, x_labels) plt.legend(legend, loc="best") plt.show() @@ -324,14 +337,14 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): plt.title("Average characters recalled at distance") plt.ylabel("Average characters recalled") plt.xlabel("Distance") + x_values = range(1, plot_size+1) legend = [] + saved_charsRecalled_total = None + saved_dataPoint_count = None for strategy in data["Strategies"]: strategy_title = strategy["Title"] # strategy_description = strategy["Description"] - if len(selected_strategies) and strategy_title not in selected_strategies: - continue - dataPoint_count = 0 matches = [0] * plot_size matches_total = 0 @@ -355,38 +368,47 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): matches[dist-1] += 1 charsRecalled[dist-1] += chars + # recent is very simple strategy so we will believe + # that there is no bug in it and we can use it to determine total + if strategy_title == "recent": + saved_charsRecalled_total = charsRecalled_total + saved_dataPoint_count = dataPoint_count + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue acc = 0 charsRecalled_cumulative = [] for x in charsRecalled: acc += x charsRecalled_cumulative.append(acc) - charsRecalled_cumulative.append(charsRecalled_total) charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) - x_values = range(1, plot_size+2) plt.plot(x_values, charsRecalled_average, 'o-') legend.append(strategy_title) + assert(saved_charsRecalled_total is not None) + assert(saved_dataPoint_count is not None) + max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values) + plt.plot(x_values, max_values, 'r-') + legend.append("maximum possible") x_ticks = list(range(1, plot_size+1, 2)) x_labels = x_ticks[:] - x_ticks.append(plot_size+1) - x_labels.append("total") plt.xticks(x_ticks, x_labels) plt.legend(legend, loc="best") plt.show() -graph_cmdSequences() -graph_cmdSequences(node_count=28, edge_minValue=0.06) - -plot_cmdLineFrq_rank() -# plot_cmdFrq_rank() - -plot_cmdLineVocabularySize_cmdLinesEntered() -# plot_cmdVocabularySize_cmdLinesEntered() +# graph_cmdSequences() +# graph_cmdSequences(node_count=28, edge_minValue=0.06) +# +# plot_cmdLineFrq_rank() +# # plot_cmdFrq_rank() +# +# plot_cmdLineVocabularySize_cmdLinesEntered() +# # plot_cmdVocabularySize_cmdLinesEntered() plot_strategies_matches() plot_strategies_charsRecalled() diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go index 07bb34c..bef0b24 100644 --- a/evaluate/resh-evaluate.go +++ b/evaluate/resh-evaluate.go @@ -12,6 +12,7 @@ import ( "os/exec" "os/user" "path/filepath" + "sort" "github.com/curusarn/resh/common" ) @@ -89,7 +90,12 @@ func main() { // strategies = append(strategies, &dummy) recent := strategyRecent{} - strategies = append(strategies, &recent) + frequent := strategyFrequent{} + frequent.init() + directory := strategyDirectorySensitive{} + directory.init() + + strategies = append(strategies, &recent, &frequent, &directory) for _, strat := range strategies { err := evaluator.evaluate(strat) @@ -175,8 +181,18 @@ func (e *evaluator) calculateStatsAndPlot(scriptName string) { // enrich records and add them to serializable structure func (e *evaluator) processRecords() { for i := range e.UsersRecords { - for j := range e.UsersRecords[i].Devices { + for j, device := range e.UsersRecords[i].Devices { + sessionIDs := map[string]uint64{} + var nextID uint64 + nextID = 0 for k, record := range e.UsersRecords[i].Devices[j].Records { + id, found := sessionIDs[record.SessionId] + if found == false { + id = nextID + sessionIDs[record.SessionId] = id + nextID++ + } + record.SeqSessionID = id // assert if record.Sanitized != e.sanitizedInput { if e.sanitizedInput { @@ -188,6 +204,12 @@ func (e *evaluator) processRecords() { e.UsersRecords[i].Devices[j].Records[k].Enrich() // device.Records = append(device.Records, record) } + sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { + if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { + return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal + } + return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID + }) } } } diff --git a/evaluate/strategy-directory-sensitive.go b/evaluate/strategy-directory-sensitive.go new file mode 100644 index 0000000..0c00bc4 --- /dev/null +++ b/evaluate/strategy-directory-sensitive.go @@ -0,0 +1,42 @@ +package main + +import ( + "github.com/curusarn/resh/common" +) + +type strategyDirectorySensitive struct { + history map[string][]string + lastPwd string +} + +func (s *strategyDirectorySensitive) init() { + s.history = map[string][]string{} +} + +func (s *strategyDirectorySensitive) GetTitleAndDescription() (string, string) { + return "directory sensitive (recent)", "Use recent commands executed is the same directory" +} + +func (s *strategyDirectorySensitive) GetCandidates() []string { + return s.history[s.lastPwd] +} + +func (s *strategyDirectorySensitive) AddHistoryRecord(record *common.Record) error { + // work on history for PWD + pwd := record.Pwd + // remove previous occurance of record + for i, cmd := range s.history[pwd] { + if cmd == record.CmdLine { + s.history[pwd] = append(s.history[pwd][:i], s.history[pwd][i+1:]...) + } + } + // append new record + s.history[pwd] = append([]string{record.CmdLine}, s.history[pwd]...) + s.lastPwd = record.PwdAfter + return nil +} + +func (s *strategyDirectorySensitive) ResetHistory() error { + s.history = map[string][]string{} + return nil +} diff --git a/evaluate/strategy-frequent.go b/evaluate/strategy-frequent.go new file mode 100644 index 0000000..c41f852 --- /dev/null +++ b/evaluate/strategy-frequent.go @@ -0,0 +1,47 @@ +package main + +import ( + "sort" + + "github.com/curusarn/resh/common" +) + +type strategyFrequent struct { + history map[string]int +} + +type strFrqEntry struct { + cmdLine string + count int +} + +func (s *strategyFrequent) init() { + s.history = map[string]int{} +} + +func (s *strategyFrequent) GetTitleAndDescription() (string, string) { + return "frequent", "Use frequent commands" +} + +func (s *strategyFrequent) GetCandidates() []string { + var mapItems []strFrqEntry + for cmdLine, count := range s.history { + mapItems = append(mapItems, strFrqEntry{cmdLine, count}) + } + sort.Slice(mapItems, func(i int, j int) bool { return mapItems[i].count > mapItems[j].count }) + var hist []string + for _, item := range mapItems { + hist = append(hist, item.cmdLine) + } + return hist +} + +func (s *strategyFrequent) AddHistoryRecord(record *common.Record) error { + s.history[record.CmdLine]++ + return nil +} + +func (s *strategyFrequent) ResetHistory() error { + s.history = map[string]int{} + return nil +} diff --git a/evaluate/strategy-recent.go b/evaluate/strategy-recent.go index b75adc2..7d24d23 100644 --- a/evaluate/strategy-recent.go +++ b/evaluate/strategy-recent.go @@ -27,5 +27,6 @@ func (s *strategyRecent) AddHistoryRecord(record *common.Record) error { } func (s *strategyRecent) ResetHistory() error { + s.history = nil return nil } From fc0636010372f4f48401c9ea7b004871f2f0d8a1 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Thu, 12 Sep 2019 02:34:10 +0200 Subject: [PATCH 34/41] enable all the plots and graphs --- evaluate/resh-evaluate-plot.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 9f27347..0bb74e5 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -401,16 +401,16 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): -# graph_cmdSequences() -# graph_cmdSequences(node_count=28, edge_minValue=0.06) -# -# plot_cmdLineFrq_rank() -# # plot_cmdFrq_rank() -# -# plot_cmdLineVocabularySize_cmdLinesEntered() -# # plot_cmdVocabularySize_cmdLinesEntered() - -plot_strategies_matches() -plot_strategies_charsRecalled() +graph_cmdSequences() +graph_cmdSequences(node_count=28, edge_minValue=0.06) + +plot_cmdLineFrq_rank() +plot_cmdFrq_rank() + +plot_cmdLineVocabularySize_cmdLinesEntered() +plot_cmdVocabularySize_cmdLinesEntered() + +plot_strategies_matches(20) +plot_strategies_charsRecalled(20) # be careful and check if labels fit the display \ No newline at end of file From ad3440f4df57c4950bce49c8bcf0376191c1d510 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Thu, 12 Sep 2019 02:44:45 +0200 Subject: [PATCH 35/41] make pyplot draw async --- evaluate/resh-evaluate-plot.py | 36 +++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index 0bb74e5..ee68877 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -33,6 +33,8 @@ DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeBeforeLocal"]) for pid, session in DATA_records_by_session.items(): session = list(sorted(session, key=lambda x: x["realtimeBeforeLocal"])) +# TODO: this should be a cmdline option +async_draw = True # for strategy in data["Strategies"]: # print(json.dumps(strategy)) @@ -69,7 +71,10 @@ def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): if show_labels: plt.xticks(ranks, labels, rotation=-60) # TODO: make xticks integral - plt.show() + if async_draw: + plt.draw() + else: + plt.show() # similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf. @@ -96,7 +101,10 @@ def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): if show_labels: plt.xticks(ranks, labels, rotation=-60) # TODO: make xticks integral - plt.show() + if async_draw: + plt.draw() + else: + plt.show() # Figure 3.2. Command vocabulary size vs. the number of command lines entered for four individuals. def plot_cmdVocabularySize_cmdLinesEntered(): @@ -120,7 +128,10 @@ def plot_cmdVocabularySize_cmdLinesEntered(): plt.title("Command vocabulary size vs. the number of command lines entered") plt.ylabel("Command vocabulary size") plt.xlabel("# of command lines entered") - plt.show() + if async_draw: + plt.draw() + else: + plt.show() # Figure 5.6. Command line vocabulary size vs. the number of commands entered for four typical individuals. def plot_cmdLineVocabularySize_cmdLinesEntered(): @@ -144,7 +155,10 @@ def plot_cmdLineVocabularySize_cmdLinesEntered(): plt.title("Command line vocabulary size vs. the number of command lines entered") plt.ylabel("Command line vocabulary size") plt.xlabel("# of command lines entered") - plt.show() + if async_draw: + plt.draw() + else: + plt.show() # Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). # Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, @@ -328,7 +342,10 @@ def plot_strategies_matches(plot_size=50, selected_strategies=[]): x_labels = x_ticks[:] plt.xticks(x_ticks, x_labels) plt.legend(legend, loc="best") - plt.show() + if async_draw: + plt.draw() + else: + plt.show() @@ -397,11 +414,14 @@ def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): x_labels = x_ticks[:] plt.xticks(x_ticks, x_labels) plt.legend(legend, loc="best") - plt.show() + if async_draw: + plt.draw() + else: + plt.show() -graph_cmdSequences() +# graph_cmdSequences(node_count=33, edge_minValue=0.05) graph_cmdSequences(node_count=28, edge_minValue=0.06) plot_cmdLineFrq_rank() @@ -413,4 +433,6 @@ plot_cmdVocabularySize_cmdLinesEntered() plot_strategies_matches(20) plot_strategies_charsRecalled(20) +if async_draw: + plt.show() # be careful and check if labels fit the display \ No newline at end of file From a7f1555dafc44b2a88f93a1edfef0cb9a9cb84f1 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Thu, 12 Sep 2019 23:16:28 +0200 Subject: [PATCH 36/41] minor change --- evaluate/resh-evaluate-plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py index ee68877..45d9322 100755 --- a/evaluate/resh-evaluate-plot.py +++ b/evaluate/resh-evaluate-plot.py @@ -28,10 +28,10 @@ for user in data["UsersRecords"]: DATA_records.append(record) DATA_records_by_session[record["sessionId"]].append(record) -DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeBeforeLocal"])) +DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeAfterLocal"])) for pid, session in DATA_records_by_session.items(): - session = list(sorted(session, key=lambda x: x["realtimeBeforeLocal"])) + session = list(sorted(session, key=lambda x: x["realtimeAfterLocal"])) # TODO: this should be a cmdline option async_draw = True From 0fb4d46174029d0fb36fbef2b7368c66406b1530 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 13 Sep 2019 00:57:54 +0200 Subject: [PATCH 37/41] simplify output writing, minor changes --- sanitize-history/resh-sanitize-history.go | 48 +++++++++++------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index c9fc057..32a0262 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -65,9 +65,9 @@ func main() { defer inputFile.Close() var writer *bufio.Writer - useStdout := true - if len(*outputPath) > 0 { - useStdout = false + if *outputPath == "" { + writer = bufio.NewWriter(os.Stdout) + } else { outputFile, err := os.Create(*outputPath) if err != nil { log.Fatal("Create() output file error:", err) @@ -75,6 +75,7 @@ func main() { defer outputFile.Close() writer = bufio.NewWriter(outputFile) } + defer writer.Flush() scanner := bufio.NewScanner(inputFile) for scanner.Scan() { @@ -100,21 +101,14 @@ func main() { log.Println("Line:", line) log.Fatal("Encoding error:", err) } - if useStdout { - fmt.Println(string(outLine)) - } else { - // fmt.Println(string(outLine)) - n, err := writer.WriteString(string(outLine) + "\n") - if err != nil { - log.Fatal(err) - } - if n == 0 { - log.Fatal("Nothing was written", n) - } + // fmt.Println(string(outLine)) + n, err := writer.WriteString(string(outLine) + "\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) } - } - if useStdout == false { - writer.Flush() } } @@ -146,6 +140,7 @@ func loadData(fname string) map[string]bool { } func (s *sanitizer) sanitizeRecord(record *common.Record) error { + // hash directories of the paths record.Pwd = s.sanitizePath(record.Pwd) record.RealPwd = s.sanitizePath(record.RealPwd) record.PwdAfter = s.sanitizePath(record.PwdAfter) @@ -155,6 +150,7 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { record.Home = s.sanitizePath(record.Home) record.ShellEnv = s.sanitizePath(record.ShellEnv) + // hash the most sensitive info, do not tokenize record.Host = s.hashToken(record.Host) record.Login = s.hashToken(record.Login) record.MachineId = s.hashToken(record.MachineId) @@ -177,6 +173,7 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err) } + // add a flag to signify that the record has been sanitized record.Sanitized = true return nil } @@ -347,23 +344,24 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { } isLettersOrDigits := true - isDigits := true + // isDigits := true isOtherCharacters := true for _, r := range token { if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false { isLettersOrDigits = false - isDigits = false - } - if unicode.IsDigit(r) == false { - isDigits = false + // isDigits = false } + // if unicode.IsDigit(r) == false { + // isDigits = false + // } if unicode.IsDigit(r) || unicode.IsLetter(r) { isOtherCharacters = false } } - if isDigits { - return s.hashNumericToken(token), nil - } + // I decided that I don't want a special sanitization for numbers + // if isDigits { + // return s.hashNumericToken(token), nil + // } if isLettersOrDigits { return s.hashToken(token), nil } From 9f6fe6d60667683802ff74d1fa8e34257b6dbd51 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 13 Sep 2019 02:23:05 +0200 Subject: [PATCH 38/41] polish sanitization for the release turn some errors into warnings add some option ending characters --- sanitize-history/resh-sanitize-history.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go index 32a0262..5ce0581 100644 --- a/sanitize-history/resh-sanitize-history.go +++ b/sanitize-history/resh-sanitize-history.go @@ -179,8 +179,8 @@ func (s *sanitizer) sanitizeRecord(record *common.Record) error { } func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { - const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`" // all bash control characters and '=' which commonly ends options w/ values - const optionAllowedChars = "-_" // characters commonly found inside of options + const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`:@^/+%." // all bash control characters, '=', ... + const optionAllowedChars = "-_" // characters commonly found inside of options sanCmdLine := "" buff := "" @@ -195,7 +195,7 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { switch optionDetected { case true: if unicode.IsSpace(r) || strings.ContainsRune(optionEndingChars, r) { - // whitespace, "=" or ";" ends the option + // whitespace or option ends the option // => add option unsanitized optionDetected = false if len(buff) > 0 { @@ -210,7 +210,8 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { if len(buff) > 0 { sanToken, err := s.sanitizeCmdToken(buff) if err != nil { - return cmdLine, err + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err } sanCmdLine += sanToken buff = "" @@ -222,12 +223,12 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { case false: // split command on all non-letter and non-digit characters if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false { - // TODO: decide if we want to split on "-" and "_" // split token if len(buff) > 0 { sanToken, err := s.sanitizeCmdToken(buff) if err != nil { - return cmdLine, err + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err } sanCmdLine += sanToken buff = "" @@ -257,7 +258,8 @@ func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { // sanitize sanToken, err := s.sanitizeCmdToken(buff) if err != nil { - return cmdLine, err + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err } sanCmdLine += sanToken return sanCmdLine, nil @@ -358,7 +360,7 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { isOtherCharacters = false } } - // I decided that I don't want a special sanitization for numbers + // NOTE: I decided that I don't want a special sanitization for numbers // if isDigits { // return s.hashNumericToken(token), nil // } @@ -368,8 +370,9 @@ func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { if isOtherCharacters { return token, nil } - log.Println("token:", token) - return token, errors.New("cmd token is made of mix of letters or digits and other characters") + log.Println("WARN: cmd token is made of mix of letters or digits and other characters; token:", token) + // return token, errors.New("cmd token is made of mix of letters or digits and other characters") + return s.hashToken(token), errors.New("cmd token is made of mix of letters or digits and other characters") } func (s *sanitizer) sanitizeToken(token string) string { From 91296f4dd20759c3213a51af5a6262d2a749deee Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 13 Sep 2019 02:31:19 +0200 Subject: [PATCH 39/41] hide postprocessing fields from record, bump version --- common/resh-common.go | 8 ++++---- version | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/common/resh-common.go b/common/resh-common.go index 1499595..7e91094 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -71,12 +71,12 @@ type Record struct { // added by sanitizatizer Sanitized bool `json:"sanitized"` - CmdLength int `json:"cmdLength"` + CmdLength int `json:"cmdLength,omitempty"` // enriching fields - added "later" - FirstWord string `json:"firstWord"` - Invalid bool `json:"invalid"` - SeqSessionID uint64 `json:"seqSessionID"` + FirstWord string `json:"firstWord,omitempty"` + Invalid bool `json:"invalid,omitempty"` + SeqSessionID uint64 `json:"seqSessionID,omitempty"` } // FallbackRecord when record is too old and can't be parsed into regular Record diff --git a/version b/version index 45a1b3f..781dcb0 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.1.2 +1.1.3 From 7de9c50ff086178c77c2a028a614f50678878d4c Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 13 Sep 2019 02:57:22 +0200 Subject: [PATCH 40/41] improve reinstall handling --- shellrc.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/shellrc.sh b/shellrc.sh index 671e8eb..cfb1666 100644 --- a/shellrc.sh +++ b/shellrc.sh @@ -153,10 +153,19 @@ __resh_precmd() { __RESH_PWD_AFTER="$PWD" if [ -n "${__RESH_COLLECT}" ]; then if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then - echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})" + source ~/.resh/shellrc + if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then + echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})" + else + echo "RESH INFO: New RESH shellrc script was loaded - if you encounter any issues please restart this terminal session." + fi elif [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then - echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -revision); resh version of this terminal session: ${__RESH_REVISION})" - else + source ~/.resh/shellrc + if [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then + echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh revision: $(resh-collect -revision); resh revision of this terminal session: ${__RESH_REVISION})" + fi + fi + if [ "$__RESH_VERSION" == $(resh-collect -version) ] && [ "$__RESH_REVISION" == $(resh-collect -revision) ]; then resh-collect -requireVersion "$__RESH_VERSION" \ -requireRevision "$__RESH_REVISION" \ -cmdLine "$__RESH_CMDLINE" \ From 188d8b420493454c11bdcc02599c11ceb7133670 Mon Sep 17 00:00:00 2001 From: Simon Let Date: Fri, 13 Sep 2019 03:07:21 +0200 Subject: [PATCH 41/41] improve readme --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7ddd5ed..74f86d3 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This project is the first phase of my Master project. -It records shell history with rich set of metadata and saves it locally. (device, dir, git, ... see example below) +It records shell history with rich set of metadata and saves it locally. (device, directory, git, time, terminal session pid, ... see example below) It doesn't change the way your shell and your shell history behaves. @@ -17,8 +17,9 @@ If you are not happy with it you can uninstall it with a single command (`rm -rf The ultimate point of my thesis is to provide a context-based replacement/enhancement for bash and zsh shell history. The idea is to: -- Save each command with metadata (device, dir, gitdir, ...) -- Recommend history based on saved metadata (e.g. it will be easier to get to commands specific to your project) +- Save each command with metadata (device, directory, git, time, terminal session pid, ... see example below) +- Recommend history based on saved metadata + - e.g. it will be easier to get to commands specific to the project you are currently working on (based on directory, git repository url, ...) - Provide a simple way to search whole history by command itself and/or metadata (e.g. imagine searching by project, directory, device, ...) - Synchronize history across devices - Provide an API (to make the project easily extensible)