您现在的位置： Linux教程網 >> UnixLinux > >> Linux編程 >> Linux編程

哈夫曼樹的數組實現

題目來源：SOJ 1000. Huffman Coding V1,V3

題目描述

V3：

Description
對輸入的英文大寫字母序列進行統計概率，然後構建Huffman樹，得出每個字母的Huffman編碼，輸出字母序列的總編碼長度。
Input
第一行是大寫字母個數n(0<n<=100)
第二行為n個字母，中間以一個空格分隔。
Output
輸出字母序列的總編碼長度。
Sample Input
10
S S U U U S U L U U
Sample Output
14

V1:

Description
對輸入的英文大寫字母序列進行統計概率，然後構建Huffman樹，輸出按照概率降序排序輸出Huffman編碼。
Input
第一行是大寫字母個數n(0<n<=100)
第二行為n個字母，中間以一個空格分隔。
建樹過程把權值較小的子樹作為左孩子，數據保證建樹過程不會出現左右子樹權值一樣的情況。
Output
假設輸入中共有m個不同的字母，按照出現概率降序輸出，每個字母單獨一行輸出。格式如下：

　　字母1 出現次數 Huffman編碼
　　字母2 出現次數 Huffman編碼
　　字母3 出現次數 Huffman編碼
　　…
　　字母m 出現次數 Huffman編碼

Sample Input
Copy sample input to clipboard
10
S S U U U S U L U U
Sample Output
U 6 1
S 3 01
L 1 00

算法描述

其實哈夫曼樹的建樹規則的話網上都有不少資料可以看，此處不予贅述。講講個人的一些收獲和想法：數組這種實現方法也是我在網上學習來的，簡單講就是先計算輸入數據對應的字符的權重並進行記錄。主要的結構體是哈夫曼樹的節點，存儲的是每個字符的權重以及左右子樹的權重，還有就是很有用的一個數據：父節點的權重。這樣就可以以權重代替指針域進行上下的尋址，可以減少由於指針使用不當帶來的內存問題。然後寫代碼的過程中遇到的一個纏最久的bug就是在建立非字符節點時候查找無父節點的節點的函數select()中，遇到的很棘手的一個問題是已經標記為有父節點的節點仍未被識別，後來才發現問題是出現在查找權重最小的節點的過程中，for循環的邊界寫錯了= =
不多說了，直接上代碼吧

個人代碼實現

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;

#define N 26
#define M (2*N-1)

struct huffmanTreeNode
{
    int weight;
    int left, right, parent;
};

struct huffmanCode
{
    char data;
    int weight;
    char code[N];
};

int initialize(huffmanCode hfmCodeSet[], int n)
{
    int set[N + 1];
    char inputStr[5];
    memset(set, 0, sizeof(set));
    memset(inputStr, 0, sizeof(inputStr));
    int i, j;
    
    huffmanCode cd;
    cd.data = 0;
    cd.weight = 0;
    memset(cd.code, 0, sizeof(cd.code));
    for (i = 0; i < N + 1; i ++)
        hfmCodeSet[i] = cd;

    for (i = 0; i < n; i ++) {
        scanf("%s", inputStr);
        set[inputStr[0] - 'A']++;
    }

    j = 1;
    for (i = 0; i < N + 1; i ++) {
        if (set[i] > 0) {
            hfmCodeSet[j].data = 'A' + i;
            hfmCodeSet[j].weight = set[i];
            j++;
        }
    }

    return (j - 1);
}

void select(huffmanTreeNode hfmTree[],int idx, int* i1, int* i2)
{
    int i, j;
    for (i = 1; i <= idx; i ++) {
        if (hfmTree[i].parent == 0) {
            *i1 = i;
            break;
        }
    }

    for (i = 1; i <= idx; i ++) {
        if (hfmTree[i].parent == 0 &&
            hfmTree[i].weight < hfmTree[*i1].weight) {
            *i1 = i;
        }
    }

    for (i = 1; i <= idx; i ++) {
        

        if (i != *i1 && hfmTree[i].parent == 0) {
            *i2 = i;
            break;
        }
    }

    for (i = 1; i <= idx; i ++) {
        

        if (i != *i1) {
            if (hfmTree[i].parent == 0 &&
                hfmTree[i].weight < hfmTree[*i2].weight) {
                *i2 = i;
            } 
        }
    }
}

void hfmCoding(huffmanCode hfmCodeSet[], huffmanTreeNode hfmTree[], int n)
{
    int i;
    char tempCode[N + 1];
    for (i = 1; i <= 2 * n - 1; i ++) {
        hfmTree[i].weight = (i <= n ? hfmCodeSet[i].weight : 0);
        hfmTree[i].parent = hfmTree[i].left = hfmTree[i].right = 0;
    }

    int minIdx1, minIdx2;
    for (i = n + 1; i <= 2 * n - 1; i ++) {
        select(hfmTree, i - 1, &minIdx1, &minIdx2);
        hfmTree[i].weight = hfmTree[minIdx1].weight + hfmTree[minIdx2].weight;
        hfmTree[i].left = minIdx1;
        hfmTree[i].right = minIdx2;
        hfmTree[minIdx1].parent = i;
        hfmTree[minIdx2].parent = i;
    }

    int start, childIdx, parentIdx;
    for (i = 1; i <= n; i ++) {
        start = n - 1;
        tempCode[n] = '\0';     

        childIdx = i;
        parentIdx = hfmTree[childIdx].parent;
        while (parentIdx) {
            if (hfmTree[parentIdx].left == childIdx) {
                tempCode[--start] = '0';
            } else if (hfmTree[parentIdx].right == childIdx) {
                tempCode[--start] = '1';
            }
            childIdx = parentIdx;
            parentIdx = hfmTree[childIdx].parent;
        }

        strcpy(hfmCodeSet[i].code, &tempCode[start]);

    }
}

int totalLength(huffmanCode hfmCodeSet[])
{
    int i, sum = 0;
    for (i = 1; i <= N; i ++) {
        if (hfmCodeSet[i].weight > 0) {
            sum += ((hfmCodeSet[i].weight) * strlen(hfmCodeSet[i].code));
        }
    }
    return sum;
}

void sortDisplayCode(huffmanCode hfmCodeSet[], int n)
{
    int i, j;
    huffmanCode tempCode;
    for (i = 1; i <= n - 1; i ++) {
        for (j = 1; j <= n - 1; j ++) {
            if (hfmCodeSet[j].weight < hfmCodeSet[j + 1].weight) {
                tempCode = hfmCodeSet[j];
                hfmCodeSet[j] = hfmCodeSet[j + 1];
                hfmCodeSet[j + 1] = tempCode;
            }
        }
    }

    for (i = 1; i <= n; i ++) {
        printf("%c%d%s\n", hfmCodeSet[i].data, hfmCodeSet[i].weight, hfmCodeSet[i].code);
    }
}

int main(int argc, charconst *argv[])
{
    huffmanCode hfmCodeSet[N + 1];
    huffmanTreeNode hfmTree[M + 1];
    int n;

    // input the number of letters
    scanf("%d", &n);
    
    // input the letters
    n = initialize(hfmCodeSet, n);
    
    // build a huffman tree using arrays and calculate the huffman codes
    hfmCoding(hfmCodeSet, hfmTree, n);

    if (n == 1) {
        printf("%d\n", strlen(hfmCodeSet[1].code));
        return 0;
    }
    // output the total length of huffman Code
    printf("%d\n", totalLength(hfmCodeSet));
    // output the weight and huffman Code of corresponding char
    sortDisplayCode(hfmCodeSet, n);
    return 0;
}

上一篇文章：關於js閉包的經典實例的學習筆記
下一篇文章： Android Studio非法字符: \ufeff解決

Linux編程

C++實現輸入多行數字到數組

哈夫曼樹的實現

使用Java數組實現順序表

使用Java數組實現順序棧

哈夫曼樹與編碼譯碼實現

Python字典數組排序實現

C語言中使用struct實現數組復制

JAVA實現二維數組的轉置

相關文章

棧的解析及C++實現

JavaScript數組詳解

用C++實現的貪吃蛇游戲

用C++實現的八皇後問題

Struts2綁定對象數組

OpenCV實現人臉檢測例程

Python線程池實現

map實現之紅黑樹

線性實現最大子序列和

循環數組 C 語言實現中一個不易發現的 bug

linux awk 數組排序多種實現方法

awk 數組排序多種實現方法

Linux編程

SHELL編程

PERL編程