Python实现的哈夫曼编码

2015/01/11 47480
#coding:utf-8
import struct
codeDict={}#全局字典key=字符，value=数字
encodeDict={}
filename=None
listForEveryByte=[]

class Node:
    def __init__(self,right=None,left=None,parent=None,weight=0,charcode=None):
        self.right=right
        self.left=left
        self.parent=parent
        self.weight=weight
        self.charcode=charcode

#按权值排序
def sort(list):
    return sorted(list,key=lambda node:node.weight)

#构建哈夫曼树
def Huffman(listOfNode):
    listOfNode=sort(listOfNode)
    while len(listOfNode)!=1:
        a,b = listOfNode[0],listOfNode[1]
        new=Node()
        new.weight, new.left, new.right = a.weight + b.weight, a, b
        a.parent, b.parent = new, new
        listOfNode.remove(a), listOfNode.remove(b)
        listOfNode.append(new)
        listOfNode=sort(listOfNode)
    return listOfNode

def inPutFile():
    global filename
    global  listForEveryByte
    filename=raw_input("请输入要压缩的文件：")
    global  codeDict
    with open(filename,'rb') as f:
        data=f.read()
        for Byte in data:
            codeDict.setdefault(Byte,0) #每个字节出现的次数默认为0
            codeDict[Byte]+=1
            listForEveryByte.append(Byte)

def outputCompressedFile():
    global  listForEveryByte
    fileString=""
    with open(filename.split(".")[0]+".jbj","wb") as f:
        for Byte in listForEveryByte:
            fileString+=encodeDict[Byte]  #构成一个长字符序列
        leng=len(fileString)
        more=16-leng%16
        fileString=fileString+"0"*more          #空位用0补齐
        #print(fileString)

        leng=len(fileString)
        i,j=0,16
        while j<=leng:
            k=fileString[i:j]
            a=int(k,2)
            #print(a)
           # print(repr(struct.pack(">H",a)))
            f.write(struct.pack(">H",a))
           # f.write(str(a))
            i=i+16
            j=j+16


def encode(head,listOfNode):
    global  encodeDict
    for e in listOfNode:
        ep=e
        encodeDict.setdefault(e.charcode,"")
        while ep!=head:

            if ep.parent.left==ep:
                encodeDict[e.charcode]="1"+encodeDict[e.charcode]
            else:
                encodeDict[e.charcode]="0"+encodeDict[e.charcode]
            ep=ep.parent


if __name__ == '__main__':
    inPutFile()
    listOfNode=[]
    for e in codeDict.keys():
        listOfNode.append(Node(weight=codeDict[e],charcode=e))
    head=Huffman(listOfNode)[0]    #构建哈夫曼树，head称为树的根节点
    encode(head,listOfNode)

    for i in encodeDict.keys():
         print(i,encodeDict[i])
    #outputCompressedFile()
代码片段