GoHash.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package main
  2. import (
  3. "bufio"
  4. "crypto/sha256"
  5. "encoding/hex"
  6. "encoding/json"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "log"
  11. "os"
  12. "path/filepath"
  13. "runtime"
  14. "sync"
  15. "time"
  16. flag "github.com/spf13/pflag"
  17. )
  18. // Fdhashes struct for holding all informations about one folder.
  19. type Fdhashes struct {
  20. Path string
  21. Hashes map[string]string
  22. Times map[string]time.Time
  23. Dirty bool
  24. }
  25. var hashes map[string]Fdhashes
  26. var wg sync.WaitGroup
  27. var mu sync.RWMutex
  28. var driveLetter string
  29. var rewrite bool
  30. var compare bool
  31. func init() {
  32. flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")
  33. flag.BoolVarP(&compare, "compare", "c", false, "compare all file hashes and writing a compartion report.")
  34. }
  35. func main() {
  36. log.Println("starting GoHash")
  37. runtime.GOMAXPROCS(5)
  38. hashes = make(map[string]Fdhashes)
  39. flag.Parse()
  40. myFile := flag.Arg(0)
  41. file, err := os.Stat(myFile)
  42. if os.IsNotExist(err) {
  43. log.Fatalln("File does not exists:", myFile)
  44. }
  45. if file.IsDir() {
  46. log.Println("start with folder:", myFile)
  47. driveLetter = ""
  48. if runtime.GOOS == "windows" {
  49. driveLetter = filepath.VolumeName(myFile) + "/"
  50. }
  51. if compare {
  52. compareFolder(myFile)
  53. } else {
  54. processFolder(myFile)
  55. fmt.Println("waiting")
  56. wg.Wait()
  57. saveAllHashFiles()
  58. }
  59. } else {
  60. log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile))
  61. }
  62. log.Println("done")
  63. }
  64. func getSha256Hash(fileStr string) string {
  65. f, err := os.Open(fileStr)
  66. if err != nil {
  67. log.Fatal(err)
  68. }
  69. defer f.Close()
  70. h := sha256.New()
  71. if _, err := io.Copy(h, f); err != nil {
  72. log.Fatal(err)
  73. }
  74. return hex.EncodeToString(h.Sum(nil))
  75. }
  76. var lock1 = sync.RWMutex{}
  77. var lock2 = sync.RWMutex{}
  78. func outputHash(fileStr string) {
  79. var hashFile Fdhashes
  80. doHash := true
  81. defer wg.Done()
  82. dir, fileName := filepath.Split(fileStr)
  83. if fileName == ".fdhashes3" {
  84. return
  85. }
  86. // checking if hash is present
  87. mu.Lock()
  88. hashFile, ok := hashes[dir]
  89. if !ok {
  90. _, err := os.Stat(dir + ".fdhashes3")
  91. if os.IsNotExist(err) {
  92. hashFile = Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  93. } else {
  94. hashFile = loadHashfile(dir + ".fdhashes3")
  95. }
  96. hashes[dir] = hashFile
  97. }
  98. lock1.RLock()
  99. _, ok = hashFile.Hashes[fileName]
  100. lock1.RUnlock()
  101. mu.Unlock()
  102. doHash = !ok
  103. // checking if dattime is identically
  104. file, _ := os.Stat(fileStr)
  105. time := file.ModTime()
  106. lock2.RLock()
  107. savedTime, ok := hashFile.Times[fileName]
  108. lock2.RUnlock()
  109. if !time.Equal(savedTime) || !ok {
  110. doHash = true
  111. }
  112. if doHash {
  113. hash := getSha256Hash(fileStr)
  114. mu.Lock()
  115. lock1.Lock()
  116. hashFile.Hashes[fileName] = hash
  117. lock1.Unlock()
  118. lock2.Lock()
  119. hashFile.Times[fileName] = time
  120. lock2.Unlock()
  121. saveHashfile(&hashFile)
  122. hashes[dir] = hashFile
  123. mu.Unlock()
  124. log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)
  125. }
  126. }
  127. var count int
  128. var addWork int
  129. func processFolder(folder string) {
  130. count = 0
  131. addWork = 0
  132. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  133. count++
  134. if (count % 100) == 0 {
  135. fmt.Print(".")
  136. }
  137. if (count % 10000) == 0 {
  138. fmt.Println()
  139. }
  140. filename := info.Name()
  141. if filename[0:1] != "." {
  142. if info.IsDir() {
  143. fmt.Println(path)
  144. }
  145. if !info.IsDir() {
  146. addWork++
  147. wg.Add(1)
  148. go outputHash(path)
  149. if addWork > 1000 {
  150. fmt.Println("x")
  151. wg.Wait()
  152. saveAllHashFiles()
  153. addWork = 0
  154. }
  155. }
  156. }
  157. return nil
  158. })
  159. if err != nil {
  160. panic(err)
  161. }
  162. }
  163. func saveHashfile(hashFile *Fdhashes) {
  164. hashFile.Dirty = true
  165. }
  166. func saveAllHashFiles() {
  167. hashList := make([]Fdhashes, 0)
  168. for _, hashFile := range hashes {
  169. if hashFile.Dirty {
  170. hashFile.Dirty = false
  171. b, err := json.Marshal(hashFile)
  172. if err != nil {
  173. fmt.Println(err)
  174. return
  175. }
  176. err = ioutil.WriteFile(hashFile.Path+".fdhashes3", b, 0644)
  177. if err != nil {
  178. panic(err)
  179. }
  180. hashList = append(hashList, hashFile)
  181. }
  182. }
  183. hashes = make(map[string]Fdhashes)
  184. for _, hashFile := range hashList {
  185. hashes[hashFile.Path] = hashFile
  186. }
  187. }
  188. func loadHashfile(fileStr string) Fdhashes {
  189. dir, _ := filepath.Split(fileStr)
  190. data := Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: false}
  191. if !rewrite {
  192. file, err := ioutil.ReadFile(fileStr)
  193. if err != nil {
  194. panic(err)
  195. }
  196. err = json.Unmarshal([]byte(file), &data)
  197. if err != nil {
  198. log.Printf("can't read file %s", fileStr)
  199. }
  200. }
  201. if data.Path != dir {
  202. data.Path = dir
  203. data.Dirty = true
  204. }
  205. return data
  206. }
  207. func compareFolder(folder string) {
  208. loadAllHashFiles(folder)
  209. size := len(hashes)
  210. f, err := os.Create("report.txt")
  211. check(err)
  212. w := bufio.NewWriter(f)
  213. count := 0
  214. for _, hashFile := range hashes {
  215. count++
  216. fmt.Printf("%d (%d) checking: %s\n", count, size, hashFile.Path)
  217. // fmt.Printf("checking: %s\n", hashFile.Path)
  218. for filename, hash := range hashFile.Hashes {
  219. if value, found := search(hash, filename, hashFile.Path); found {
  220. w.WriteString("found identically hash\n")
  221. w.WriteString(fmt.Sprintf(" src: %s/%s\n", hashFile.Path, filename))
  222. w.WriteString(fmt.Sprintf(" dest: %s\n", value))
  223. w.Flush()
  224. }
  225. }
  226. }
  227. }
  228. func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {
  229. for _, hashFile := range hashes {
  230. for filename, hash := range hashFile.Hashes {
  231. if (filename != exFilename) && (hashFile.Path != exFilepath) {
  232. if hash == srcHash {
  233. value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)
  234. found = true
  235. }
  236. }
  237. }
  238. }
  239. return
  240. }
  241. func loadAllHashFiles(folder string) {
  242. count = 0
  243. addWork = 0
  244. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  245. //filename := info.Name()
  246. if info.IsDir() {
  247. fmt.Print(".")
  248. hashFile, ok := hashes[path]
  249. if !ok {
  250. _, err := os.Stat(path + "/.fdhashes3")
  251. if os.IsNotExist(err) {
  252. hashFile = Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  253. } else {
  254. hashFile = loadHashfile(path + "/.fdhashes3")
  255. }
  256. hashes[path] = hashFile
  257. }
  258. }
  259. return nil
  260. })
  261. if err != nil {
  262. panic(err)
  263. }
  264. fmt.Printf("\nfound %d hash files.\n", len(hashes))
  265. }
  266. func check(e error) {
  267. if e != nil {
  268. panic(e)
  269. }
  270. }