将图片转为灰阶 计算汉明距离 判断是否相似

from os import listdir
from PIL import Image, ImageFile
import pymysql

ImageFile.LOAD_TRUNCATED_IMAGES = True

conn = pymysql.connect(
    host='127.0.0.1',
    user='root',
    password='xxxxxx',
    database='media'
)


class Pic:

    @classmethod
    def get_client(cls):
        return conn.cursor()

    # 计算汉明距离
    @classmethod
    def hamming(cls, diff1=None, diff2=None):
        if diff2 is None:
            diff2 = []
        if diff1 is None:
            diff1 = []

        hamming_distance = 0
        for i in range(len(diff1)):
            if diff1[i] != diff2[i]:
                hamming_distance += 1

        return hamming_distance

    #  计算图片灰阶
    @classmethod
    def dhash(cls, image, hash_size=8):
        # 调整为灰阶
        image = image.convert('L').resize(
            (hash_size + 1, hash_size),
            Image.ANTIALIAS,
        )

        pixels = list(image.getdata())

        # 比较相邻像素。
        difference = []
        for row in range(hash_size):
            for col in range(hash_size):
                pixel_left = image.getpixel((col, row))
                pixel_right = image.getpixel((col + 1, row))
                difference.append(pixel_left > pixel_right)

        # 转换
        decimal_value = 0
        hex_string = []
        for index, value in enumerate(difference):
            if value:
                decimal_value += 2 ** (index % 8)
            if (index % 8) == 7:
                hex_string.append(hex(decimal_value)[2:].rjust(2, '0'))
                decimal_value = 0

        return ''.join(hex_string)


if __name__ == '__main__':
    pic = Pic()
    dirName = "/media/"  # 相册路径
    allDiff = []
    dirList = listdir(dirName)
    num = 0
    for i in dirList:
        if i == ".DS_Store": continue
        try:
            im = Image.open(dirName + i)
            hash = pic.dhash(im)
            allDiff.append((i, hash))
            num += 1
        except:
            continue

    same_map = {}
    for i in range(len(allDiff)):
        for j in range(i + 1, len(allDiff)):
            if i != j:
                ans = pic.hamming(allDiff[i][1], allDiff[j][1])
                if ans <= 5:  # 判别的汉明距离
                    same_map[allDiff[i][0]] = allDiff[j][0]
                    print(allDiff[i][0] + " => " + allDiff[j][0])

    client = pic.get_client()

    same_list = []
    for k, item in same_map.items():
        same = [k, item]
        same_list.append(same)

    # 查询
    sql = """insert into same(a,b) values(%s,%s);"""
    client.executemany(sql, same_list)
    conn.commit()
最后修改:2021 年 08 月 13 日
一切随缘