GoHash.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. package main
  2. import (
  3. "bufio"
  4. "crypto/sha256"
  5. "encoding/hex"
  6. "encoding/json"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "log"
  11. "os"
  12. "path/filepath"
  13. "runtime"
  14. "sync"
  15. "time"
  16. "code.cloudfoundry.org/bytefmt"
  17. flag "github.com/spf13/pflag"
  18. )
  19. // Fdhashes struct for holding all informations about one folder.
  20. type Fdhashes struct {
  21. Path string
  22. Hashes map[string]string
  23. Times map[string]time.Time
  24. Dirty bool
  25. }
  26. var hashes map[string]Fdhashes
  27. var wg sync.WaitGroup
  28. var mu sync.RWMutex
  29. var driveLetter string
  30. var rewrite bool
  31. var compare bool
  32. func init() {
  33. flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")
  34. flag.BoolVarP(&compare, "compare", "c", false, "compare all file hashes and writing a compartion report.")
  35. }
  36. func main() {
  37. log.Println("starting GoHash")
  38. runtime.GOMAXPROCS(5)
  39. hashes = make(map[string]Fdhashes)
  40. flag.Parse()
  41. myFile := flag.Arg(0)
  42. file, err := os.Stat(myFile)
  43. if os.IsNotExist(err) {
  44. log.Fatalln("File does not exists:", myFile)
  45. }
  46. if file.IsDir() {
  47. log.Println("start with folder:", myFile)
  48. driveLetter = ""
  49. if runtime.GOOS == "windows" {
  50. driveLetter = filepath.VolumeName(myFile) + "/"
  51. }
  52. if compare {
  53. compareFolder(myFile)
  54. } else {
  55. processFolder(myFile)
  56. fmt.Println("waiting")
  57. wg.Wait()
  58. saveAllHashFiles()
  59. }
  60. } else {
  61. log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile))
  62. }
  63. log.Println("done")
  64. }
  65. func getSha256Hash(fileStr string) string {
  66. f, err := os.Open(fileStr)
  67. if err != nil {
  68. log.Fatal(err)
  69. }
  70. defer f.Close()
  71. h := sha256.New()
  72. if _, err := io.Copy(h, f); err != nil {
  73. log.Fatal(err)
  74. }
  75. return hex.EncodeToString(h.Sum(nil))
  76. }
  77. var lock1 = sync.RWMutex{}
  78. var lock2 = sync.RWMutex{}
  79. func outputHash(fileStr string) {
  80. var hashFile Fdhashes
  81. doHash := true
  82. defer wg.Done()
  83. dir, fileName := filepath.Split(fileStr)
  84. if fileName == ".fdhashes3" {
  85. return
  86. }
  87. // checking if hash is present
  88. mu.Lock()
  89. hashFile, ok := hashes[dir]
  90. if !ok {
  91. _, err := os.Stat(dir + ".fdhashes3")
  92. if os.IsNotExist(err) {
  93. hashFile = Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  94. } else {
  95. hashFile = loadHashfile(dir + ".fdhashes3")
  96. }
  97. hashes[dir] = hashFile
  98. }
  99. lock1.RLock()
  100. _, ok = hashFile.Hashes[fileName]
  101. lock1.RUnlock()
  102. mu.Unlock()
  103. doHash = !ok
  104. // checking if dattime is identically
  105. file, _ := os.Stat(fileStr)
  106. time := file.ModTime()
  107. lock2.RLock()
  108. savedTime, ok := hashFile.Times[fileName]
  109. lock2.RUnlock()
  110. if !time.Equal(savedTime) || !ok {
  111. doHash = true
  112. }
  113. if doHash {
  114. hash := getSha256Hash(fileStr)
  115. mu.Lock()
  116. lock1.Lock()
  117. hashFile.Hashes[fileName] = hash
  118. lock1.Unlock()
  119. lock2.Lock()
  120. hashFile.Times[fileName] = time
  121. lock2.Unlock()
  122. saveHashfile(&hashFile)
  123. hashes[dir] = hashFile
  124. mu.Unlock()
  125. log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)
  126. }
  127. }
  128. var count int
  129. var addWork int
  130. func processFolder(folder string) {
  131. count = 0
  132. addWork = 0
  133. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  134. count++
  135. if (count % 100) == 0 {
  136. fmt.Print(".")
  137. }
  138. if (count % 10000) == 0 {
  139. fmt.Println()
  140. }
  141. filename := info.Name()
  142. if filename[0:1] != "." {
  143. if info.IsDir() {
  144. fmt.Println(path)
  145. }
  146. if !info.IsDir() {
  147. addWork++
  148. wg.Add(1)
  149. go outputHash(path)
  150. if addWork > 1000 {
  151. fmt.Println("x")
  152. wg.Wait()
  153. saveAllHashFiles()
  154. addWork = 0
  155. }
  156. }
  157. }
  158. return nil
  159. })
  160. if err != nil {
  161. panic(err)
  162. }
  163. }
  164. func saveHashfile(hashFile *Fdhashes) {
  165. hashFile.Dirty = true
  166. }
  167. func saveAllHashFiles() {
  168. hashList := make([]Fdhashes, 0)
  169. for _, hashFile := range hashes {
  170. if hashFile.Dirty {
  171. hashFile.Dirty = false
  172. b, err := json.Marshal(hashFile)
  173. if err != nil {
  174. fmt.Println(err)
  175. return
  176. }
  177. err = ioutil.WriteFile(hashFile.Path+".fdhashes3", b, 0644)
  178. if err != nil {
  179. panic(err)
  180. }
  181. hashList = append(hashList, hashFile)
  182. }
  183. }
  184. hashes = make(map[string]Fdhashes)
  185. for _, hashFile := range hashList {
  186. hashes[hashFile.Path] = hashFile
  187. }
  188. }
  189. func loadHashfile(fileStr string) Fdhashes {
  190. dir, _ := filepath.Split(fileStr)
  191. data := Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: false}
  192. if !rewrite {
  193. file, err := ioutil.ReadFile(fileStr)
  194. if err != nil {
  195. panic(err)
  196. }
  197. err = json.Unmarshal([]byte(file), &data)
  198. if err != nil {
  199. log.Printf("can't read file %s", fileStr)
  200. }
  201. }
  202. if data.Path != dir {
  203. data.Path = dir
  204. data.Dirty = true
  205. }
  206. return data
  207. }
  208. func compareFolder(folder string) {
  209. loadAllHashFiles(folder)
  210. index := make(map[string][]string)
  211. for _, hashFile := range hashes {
  212. for filename, hash := range hashFile.Hashes {
  213. values := index[hash]
  214. if values == nil {
  215. values = make([]string, 0)
  216. }
  217. values = append(values, fmt.Sprintf("%s/%s", hashFile.Path, filename))
  218. index[hash] = values
  219. }
  220. }
  221. size := len(index)
  222. f, err := os.Create("report.txt")
  223. check(err)
  224. w := bufio.NewWriter(f)
  225. count := 0
  226. var filesize int64
  227. fileCount := 0
  228. for _, values := range index {
  229. count++
  230. if count%100 == 0 {
  231. fmt.Printf("%d (%d) checking\n", count, size)
  232. }
  233. if len(values) > 1 {
  234. info, err := os.Stat(values[0])
  235. if err == nil {
  236. w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size()))
  237. filesize += int64(len(values)-1) * info.Size()
  238. }
  239. fileCount += len(values) - 1
  240. for _, filename := range values {
  241. w.WriteString(fmt.Sprintf(" %s\n", filename))
  242. }
  243. w.Flush()
  244. }
  245. }
  246. w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount))
  247. w.Flush()
  248. }
  249. func compareFolder2(folder string) {
  250. loadAllHashFiles(folder)
  251. size := len(hashes)
  252. f, err := os.Create("report.txt")
  253. check(err)
  254. w := bufio.NewWriter(f)
  255. count := 0
  256. for _, hashFile := range hashes {
  257. count++
  258. fmt.Printf("%d (%d) checking: %s\n", count, size, hashFile.Path)
  259. // fmt.Printf("checking: %s\n", hashFile.Path)
  260. for filename, hash := range hashFile.Hashes {
  261. if value, found := search(hash, filename, hashFile.Path); found {
  262. w.WriteString("found identically hash\n")
  263. w.WriteString(fmt.Sprintf(" src: %s/%s\n", hashFile.Path, filename))
  264. w.WriteString(fmt.Sprintf(" dest: %s\n", value))
  265. w.Flush()
  266. }
  267. }
  268. }
  269. }
  270. func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {
  271. for _, hashFile := range hashes {
  272. for filename, hash := range hashFile.Hashes {
  273. if (filename != exFilename) && (hashFile.Path != exFilepath) {
  274. if hash == srcHash {
  275. value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)
  276. found = true
  277. }
  278. }
  279. }
  280. }
  281. return
  282. }
  283. func loadAllHashFiles(folder string) {
  284. count = 0
  285. addWork = 0
  286. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  287. //filename := info.Name()
  288. if info.IsDir() {
  289. fmt.Print(".")
  290. hashFile, ok := hashes[path]
  291. if !ok {
  292. _, err := os.Stat(path + "/.fdhashes3")
  293. if os.IsNotExist(err) {
  294. hashFile = Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  295. } else {
  296. hashFile = loadHashfile(path + "/.fdhashes3")
  297. }
  298. hashes[path] = hashFile
  299. }
  300. }
  301. return nil
  302. })
  303. if err != nil {
  304. panic(err)
  305. }
  306. fmt.Printf("\nfound %d hash files.\n", len(hashes))
  307. }
  308. func check(e error) {
  309. if e != nil {
  310. panic(e)
  311. }
  312. }