package main import ( "bufio" "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "io" "io/ioutil" "log" "os" "path/filepath" "sort" "strings" "sync" "time" "code.cloudfoundry.org/bytefmt" flag "github.com/spf13/pflag" "wkla.no-ip.biz/gogs/Willie/GoHash/pkg/hash" ) var hashes map[string]hash.Fdhashes var ignoreLines []string var mu sync.RWMutex var rewrite bool var prune bool var outputJson bool var report string var ignores string func init() { flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.") flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.") flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.") flag.BoolVarP(&outputJson, "json", "j", false, "output as json.") flag.StringVarP(&ignores, "ignores", "i", "", "list of files to ignore in report.") } func main() { log.Println("starting GoHash") ignoreLines = make([]string, 0) hashes = make(map[string]hash.Fdhashes) flag.Parse() myFile := flag.Arg(0) if rewrite { log.Println("rewrite active") } if prune { log.Println("prune active") } if outputJson { log.Println("output json format active") } if report != "" { log.Println("report active, file: ", report) } if ignores != "" { log.Println("ignores file: ", ignores) } file, err := os.Stat(myFile) if os.IsNotExist(err) { log.Fatalln("File does not exists:", myFile) } if file.IsDir() { log.Println("start with folder:", myFile) if report != "" { compareFolder(myFile) } else { processFolder(myFile) saveAllHashFiles() } } else { log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile)) } log.Println("done") } func getSha256Hash(fileStr string) string { f, err := os.Open(fileStr) if err != nil { log.Fatal(err) } defer f.Close() h := sha256.New() if _, err := io.Copy(h, f); err != nil { log.Fatal(err) } return hex.EncodeToString(h.Sum(nil)) } var lock1 = sync.RWMutex{} var lock2 = sync.RWMutex{} func calculateHash(fileStr string) { var hashFile hash.Fdhashes doHash := true dir, fileName := filepath.Split(fileStr) if fileName == ".fdhashes3" { return } // checking if hash is present mu.Lock() hashFile, ok := hashes[dir] if !ok { _, err := os.Stat(dir + ".fdhashes3") if os.IsNotExist(err) || rewrite { hashFile = hash.Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true} } else { hf, err := hash.LoadHashfile(dir + ".fdhashes3") check(err) hashFile = *hf } hashes[dir] = hashFile } lock1.RLock() _, ok = hashFile.Hashes[fileName] lock1.RUnlock() mu.Unlock() doHash = !ok // checking if dattime is identically file, _ := os.Stat(fileStr) time := file.ModTime() lock2.RLock() savedTime, ok := hashFile.Times[fileName] lock2.RUnlock() if !time.Equal(savedTime) || !ok { doHash = true } if doHash { log.Printf("starting %s\n", fileStr) hash := getSha256Hash(fileStr) log.Printf("ready %s\n", fileStr) mu.Lock() lock1.Lock() hashFile.Hashes[fileName] = hash lock1.Unlock() lock2.Lock() hashFile.Times[fileName] = time lock2.Unlock() dirtyHashfile(&hashFile) hashes[dir] = hashFile mu.Unlock() log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash) } } var count int var addWork int var startTime time.Time func processFolder(folder string) { startTime = time.Now() count = 0 addWork = 0 err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error { count++ if (count % 100) == 0 { fmt.Print(".") } if (count % 10000) == 0 { fmt.Println() } filename := info.Name() if filename[0:1] != "." { if info.IsDir() { fmt.Println(path) if prune { pruneHash(path) } } if !info.IsDir() { addWork++ calculateHash(path) if time.Since(startTime).Seconds() > 10.0 { startTime = time.Now() saveAllHashFiles() addWork = 0 } } } return nil }) if err != nil { panic(err) } } /* delete unused hash values from the hash file */ func pruneHash(dir string) { _, err := os.Stat(dir + "/.fdhashes3") if !os.IsNotExist(err) { hashFile, err := hash.LoadHashfile(dir + "/.fdhashes3") check(err) for filename := range hashFile.Hashes { _, err := os.Stat(dir + "/" + filename) if os.IsNotExist(err) { delete(hashFile.Hashes, filename) delete(hashFile.Times, filename) hashFile.Dirty = true } } for filename := range hashFile.Times { _, err := os.Stat(dir + "/" + filename) if os.IsNotExist(err) { delete(hashFile.Hashes, filename) delete(hashFile.Times, filename) hashFile.Dirty = true } } saveHashfile(hashFile) } } func dirtyHashfile(hashFile *hash.Fdhashes) { hashFile.Dirty = true } func saveAllHashFiles() { hashList := make([]hash.Fdhashes, 0) for _, hashFile := range hashes { if hashFile.Dirty { saveHashfile(&hashFile) hashList = append(hashList, hashFile) } } hashes = make(map[string]hash.Fdhashes) for _, hashFile := range hashList { hashes[hashFile.Path] = hashFile } } func saveHashfile(hashFile *hash.Fdhashes) { if hashFile.Dirty { hashFile.Dirty = false b, err := json.Marshal(hashFile) if err != nil { fmt.Println(err) return } err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644) if err != nil { panic(err) } } } func compareFolder(folder string) { loadIgnoreFile(ignores) loadAllHashFiles(folder) // putting all hashes into one big map key = hash, value list of files with that hash size := len(hashes) index := make(map[string][]string) count = 0 for _, hashFile := range hashes { count++ if count%100 == 0 { fmt.Printf("%d (%d) merging\n", count, size) } for filename, hash := range hashFile.Hashes { values := index[hash] if values == nil { values = make([]string, 0) } filepath := fmt.Sprintf("%s/%s", hashFile.Path, filename) pos := sort.SearchStrings(ignoreLines, filepath) if pos == len(ignoreLines) { _, err := os.Stat(filepath) if err == nil { values = append(values, filepath) index[hash] = values } } } } // sorting list of files for every hash and deleting hashes with only 1 entry size = len(index) myHashes := make([]string, 0) count = 0 for hash, values := range index { count++ if count%100 == 0 { fmt.Printf("%d (%d) sorting\n", count, size) } if len(values) > 1 { sort.Strings(values) index[hash] = values myHashes = append(myHashes, hash) // for _, filename := range values { // fmt.Printf(" %s\n", filename) // } } else { delete(index, hash) } } sort.Slice(myHashes, func(i, j int) bool { return index[myHashes[i]][0] < index[myHashes[j]][0] }) if outputJson { size = len(index) var filesize int64 fileCount := 0 for _, hash := range myHashes { values := index[hash] count++ if count%100 == 0 { fmt.Printf("%d (%d) checking\n", count, size) } if len(values) > 1 { info, err := os.Stat(values[0]) if err == nil { fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size()) filesize += int64(len(values)-1) * info.Size() } fileCount += len(values) - 1 for _, filename := range values { fmt.Printf(" %s\n", filename) } } else { delete(index, hash) } } b, err := json.Marshal(index) if err != nil { fmt.Println(err) return } err = ioutil.WriteFile(report, b, 0644) if err != nil { panic(err) } } else { size := len(index) f, err := os.Create(report) check(err) w := bufio.NewWriter(f) count := 0 var filesize int64 fileCount := 0 for _, hash := range myHashes { values := index[hash] count++ if count%100 == 0 { fmt.Printf("%d (%d) checking\n", count, size) } if len(values) > 1 { info, err := os.Stat(values[0]) if err == nil { w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size())) filesize += int64(len(values)-1) * info.Size() } fileCount += len(values) - 1 for _, filename := range values { w.WriteString(fmt.Sprintf(" %s\n", filename)) } w.Flush() } } w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount)) w.Flush() } } func loadIgnoreFile(filename string) { content, err := ioutil.ReadFile(filename) if err == nil { ignoreLines = strings.Split(string(content), "\n") sort.Strings(ignoreLines) } } func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) { for _, hashFile := range hashes { for filename, hash := range hashFile.Hashes { if (filename != exFilename) && (hashFile.Path != exFilepath) { if hash == srcHash { value += fmt.Sprintf("%s/%s;", hashFile.Path, filename) found = true } } } } return } func loadAllHashFiles(folder string) { count = 0 addWork = 0 err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error { if info != nil { if info.IsDir() { count++ fmt.Print(".") if (count % 100) == 0 { fmt.Println() } hashFile, ok := hashes[path] if !ok { _, err := os.Stat(path + "/.fdhashes3") if os.IsNotExist(err) { hashFile = hash.Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true} } else { hf, err := hash.LoadHashfile(path + "/.fdhashes3") check(err) hashFile = *hf } hashes[path] = hashFile } } } return nil }) check(err) fmt.Printf("\nfound %d hash files.\n", len(hashes)) } func check(e error) { if e != nil { panic(e) } }