图片Hash计算,区别与MD5、SHA-1的特点是对于细微差别的图片有着相似的Hash值。摘自 realpython.com
# Filename : index.py # import the necessary packages from PIL import Image import imagehash import argparse import shelve import glob # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "path to input dataset of images") ap.add_argument("-s", "--shelve", required = True, help = "output shelve database") args = vars(ap.parse_args()) # open the shelve database db = shelve.open(args["shelve"], writeback = True) # loop over the image dataset for imagePath in glob.glob(args["dataset"] + "/*.jpg"): # load the image and compute the difference hash image = Image.open(imagePath) h = str(imagehash.dhash(image)) # extract the filename from the path and update the database # using the hash as the key and the filename append to the # list of values filename = imagePath[imagePath.rfind("/") + 1:] db[h] = db.get(h, []) + [filename] # close the shelf database db.close()
# Filename: search.py # import the necessary packages from PIL import Image import imagehash import argparse import shelve # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "path to dataset of images") ap.add_argument("-s", "--shelve", required = True, help = "output shelve database") ap.add_argument("-q", "--query", required = True, help = "path to the query image") args = vars(ap.parse_args()) # open the shelve database db = shelve.open(args["shelve"]) # load the query image, compute the difference image hash, and # and grab the images from the database that have the same hash # value query = Image.open(args["query"]) h = str(imagehash.dhash(query)) filenames = db[h] print("Found %d images" % (len(filenames))) # loop over the images for filename in filenames: image = Image.open(args["dataset"] + "/" + filename) image.show() # close the shelve database db.close()
$python index.py --dataset images --shelve db.shelve #构建images中图片Hash库 $python search.py --dataset images --shelve db.shelve --query images/xxx.jpg #搜索xxx.jpg
https://realpython.com/blog/python/fingerprinting-images-for-near-duplicate-detection 图像Hash
http://www.pyimagesearch.com 图像搜索引擎Blog