| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 | package mainimport (	"bufio"	"crypto/sha256"	"encoding/hex"	"encoding/json"	"fmt"	"io"	"io/ioutil"	"log"	"os"	"path/filepath"	"runtime"	"sync"	"time"	"code.cloudfoundry.org/bytefmt"	flag "github.com/spf13/pflag")// Fdhashes struct for holding all informations about one folder.type Fdhashes struct {	Path   string	Hashes map[string]string	Times  map[string]time.Time	Dirty  bool}var hashes map[string]Fdhashesvar mu sync.RWMutexvar driveLetter stringvar rewrite boolvar prune boolvar outputJson boolvar report stringfunc init() {	flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")	flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.")	flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.")	flag.BoolVarP(&outputJson, "json", "j", false, "output as json.")}func main() {	log.Println("starting GoHash")	hashes = make(map[string]Fdhashes)	flag.Parse()	myFile := flag.Arg(0)	file, err := os.Stat(myFile)	if os.IsNotExist(err) {		log.Fatalln("File does not exists:", myFile)	}	if file.IsDir() {		log.Println("start with folder:", myFile)		driveLetter = ""		if runtime.GOOS == "windows" {			driveLetter = filepath.VolumeName(myFile) + "/"		}		if report != "" {			compareFolder(myFile)		} else {			processFolder(myFile)			saveAllHashFiles()		}	} else {		log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile))	}	log.Println("done")}func getSha256Hash(fileStr string) string {	f, err := os.Open(fileStr)	if err != nil {		log.Fatal(err)	}	defer f.Close()	h := sha256.New()	if _, err := io.Copy(h, f); err != nil {		log.Fatal(err)	}	return hex.EncodeToString(h.Sum(nil))}var lock1 = sync.RWMutex{}var lock2 = sync.RWMutex{}func calculateHash(fileStr string) {	var hashFile Fdhashes	doHash := true	dir, fileName := filepath.Split(fileStr)	if fileName == ".fdhashes3" {		return	}	// checking if hash is present	mu.Lock()	hashFile, ok := hashes[dir]	if !ok {		_, err := os.Stat(dir + ".fdhashes3")		if os.IsNotExist(err) {			hashFile = Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}		} else {			hashFile = loadHashfile(dir + ".fdhashes3")		}		hashes[dir] = hashFile	}	lock1.RLock()	_, ok = hashFile.Hashes[fileName]	lock1.RUnlock()	mu.Unlock()	doHash = !ok	// checking if dattime is identically	file, _ := os.Stat(fileStr)	time := file.ModTime()	lock2.RLock()	savedTime, ok := hashFile.Times[fileName]	lock2.RUnlock()	if !time.Equal(savedTime) || !ok {		doHash = true	}	if doHash {		log.Printf("starting %s\n", fileStr)		hash := getSha256Hash(fileStr)		log.Printf("ready %s\n", fileStr)		mu.Lock()		lock1.Lock()		hashFile.Hashes[fileName] = hash		lock1.Unlock()		lock2.Lock()		hashFile.Times[fileName] = time		lock2.Unlock()		dirtyHashfile(&hashFile)		hashes[dir] = hashFile		mu.Unlock()		log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)	}}var count intvar addWork intvar startTime time.Timefunc processFolder(folder string) {	startTime = time.Now()	count = 0	addWork = 0	err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {		count++		if (count % 100) == 0 {			fmt.Print(".")		}		if (count % 10000) == 0 {			fmt.Println()		}		filename := info.Name()		if filename[0:1] != "." {			if info.IsDir() {				fmt.Println(path)				if prune {					pruneHash(path)				}			}			if !info.IsDir() {				addWork++				calculateHash(path)				if time.Since(startTime).Seconds() > 10.0 {					startTime = time.Now()					saveAllHashFiles()					addWork = 0				}			}		}		return nil	})	if err != nil {		panic(err)	}}/* delete unused hash values from the hash file */func pruneHash(dir string) {	_, err := os.Stat(dir + "/.fdhashes3")	if !os.IsNotExist(err) {		hashFile := loadHashfile(dir + "/.fdhashes3")		for filename := range hashFile.Hashes {			_, err := os.Stat(dir + "/" + filename)			if os.IsNotExist(err) {				delete(hashFile.Hashes, filename)				delete(hashFile.Times, filename)				hashFile.Dirty = true			}		}		for filename := range hashFile.Times {			_, err := os.Stat(dir + "/" + filename)			if os.IsNotExist(err) {				delete(hashFile.Hashes, filename)				delete(hashFile.Times, filename)				hashFile.Dirty = true			}		}		saveHashfile(&hashFile)	}}func dirtyHashfile(hashFile *Fdhashes) {	hashFile.Dirty = true}func saveAllHashFiles() {	hashList := make([]Fdhashes, 0)	for _, hashFile := range hashes {		if hashFile.Dirty {			saveHashfile(&hashFile)			hashList = append(hashList, hashFile)		}	}	hashes = make(map[string]Fdhashes)	for _, hashFile := range hashList {		hashes[hashFile.Path] = hashFile	}}func saveHashfile(hashFile *Fdhashes) {	if hashFile.Dirty {		hashFile.Dirty = false		b, err := json.Marshal(hashFile)		if err != nil {			fmt.Println(err)			return		}		err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644)		if err != nil {			panic(err)		}	}}func loadHashfile(fileStr string) Fdhashes {	dir, _ := filepath.Split(fileStr)	dir = filepath.ToSlash(filepath.Clean(dir))	data := Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: false}	if !rewrite {		file, err := ioutil.ReadFile(fileStr)		if err != nil {			panic(err)		}		err = json.Unmarshal([]byte(file), &data)		if err != nil {			log.Printf("can't read file %s", fileStr)		}	}	if data.Path != dir {		data.Path = dir		data.Dirty = true	}	return data}func compareFolder(folder string) {	loadAllHashFiles(folder)	index := make(map[string][]string)	for _, hashFile := range hashes {		for filename, hash := range hashFile.Hashes {			values := index[hash]			if values == nil {				values = make([]string, 0)			}			values = append(values, fmt.Sprintf("%s/%s", hashFile.Path, filename))			index[hash] = values		}	}	if outputJson {		size := len(index)		var filesize int64		fileCount := 0		for hash, values := range index {			count++			if count%100 == 0 {				fmt.Printf("%d (%d) checking\n", count, size)			}			if len(values) > 1 {				info, err := os.Stat(values[0])				if err == nil {					fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size())					filesize += int64(len(values)-1) * info.Size()				}				fileCount += len(values) - 1				for _, filename := range values {					fmt.Printf("  %s\n", filename)				}			} else {				delete(index, hash)			}		}		b, err := json.Marshal(index)		if err != nil {			fmt.Println(err)			return		}		err = ioutil.WriteFile(report, b, 0644)		if err != nil {			panic(err)		}	} else {		size := len(index)		f, err := os.Create(report)		check(err)		w := bufio.NewWriter(f)		count := 0		var filesize int64		fileCount := 0		for _, values := range index {			count++			if count%100 == 0 {				fmt.Printf("%d (%d) checking\n", count, size)			}			if len(values) > 1 {				info, err := os.Stat(values[0])				if err == nil {					w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size()))					filesize += int64(len(values)-1) * info.Size()				}				fileCount += len(values) - 1				for _, filename := range values {					w.WriteString(fmt.Sprintf("  %s\n", filename))				}				w.Flush()			}		}		w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount))		w.Flush()	}}func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {	for _, hashFile := range hashes {		for filename, hash := range hashFile.Hashes {			if (filename != exFilename) && (hashFile.Path != exFilepath) {				if hash == srcHash {					value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)					found = true				}			}		}	}	return}func loadAllHashFiles(folder string) {	count = 0	addWork = 0	err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {		if info != nil {			if info.IsDir() {				count++				fmt.Print(".")				if (count % 100) == 0 {					fmt.Println()				}				hashFile, ok := hashes[path]				if !ok {					_, err := os.Stat(path + "/.fdhashes3")					if os.IsNotExist(err) {						hashFile = Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}					} else {						hashFile = loadHashfile(path + "/.fdhashes3")					}					hashes[path] = hashFile				}			}		}		return nil	})	if err != nil {		panic(err)	}	fmt.Printf("\nfound %d hash files.\n", len(hashes))}func check(e error) {	if e != nil {		panic(e)	}}
 |