Dagum Gini Decomposition，Dagum基尼系数分解的python实例

最新推荐文章于 2024-08-30 12:09:11 发布

wsxyh1071652438

最新推荐文章于 2024-08-30 12:09:11 发布

阅读量3.4k

点赞数 8

分类专栏：计量金融文章标签： python 算法人工智能

本文链接： https://blog.csdn.net/wsxyh1071652438/article/details/128219887

版权

计量金融专栏收录该内容

1 篇文章 3 订阅

订阅专栏

背景

基尼系数（Gini index or Gini Coefficient），是国际上通用的、用以衡量一个国家内或地区内总体居民收入差距的常用指标之一。

但是其全局性的特征也意味着缺少了局部的收入（或发展等）的相对区域化的信息。在97年Camilo Dagnum介绍了一种对Gini系数进行区域化分解的方法，用以将基尼系数这个相对宏观的数值描述分解为子区域内差距、子区域间差距等相对细节的描述以及超变密度。而这样的“总体的结果由局部的影响综合而来”的过程也是符合直觉的。

因本文作者非经济学学生，故不对超变密度的意义做深究。

分解过程

基尼系数：

代码实例

由于测试输入涉及未发表数据，故暂不公开，目前仅可通过邮件索要包含人为错误信息的测试输入。

'''
    Dagum Gini coefficient decomposition.

    @author:    Xian Yuehui, <xianyuehui@stu.xjtu.edu.cn>
    @date:      2022.07.10
    @licence:   GPL clause
'''

from itertools import permutations, product, combinations
from typing import Dict, List
import numpy as np

# return (Dict[year_id, Dict[subgroup_id, List[gini_coeff]], subgroup_ids_set)
def read_data(file_path: str = 'gini.txt') -> Dict[int, Dict[int, List[float]]]:
    with open(file_path, 'rt') as f:
        _ = f.readline().strip().split('\t')    # provinces
        subgroup_ids = list(map(int, f.readline().strip().split('\t')))
        data = {}
        while True:
            year_data_str = f.readline()
            if year_data_str == '':
                break
            year_data_raw = list(map(float, year_data_str.strip().split('\t')))
            year_id = int(year_data_raw[0])
            # allocate memory
            data.update({year_id: {}})
            for subgroup_id in set(subgroup_ids):
                data[year_id].update({subgroup_id: []})
            # read year data
            for idx in range(len(year_data_raw) - 1):
                subgroup_id = subgroup_ids[idx]
                data[year_id][subgroup_id].append(year_data_raw[idx + 1])

    return data, set(subgroup_ids)

# return sum of abs_diff of permuation of 2 lists
def sum_abs_product(a: List[float], b:List[float]) -> float:
    return sum([abs(item[0] - item[-1]) for item in list(product(a, b))])

# return Gini coefficient of two groups (lists)
def Gini(a: List[float], b:List[float]) -> float:
    n_a, n_b = len(a), len(b)
    avg_a, avg_b = sum(a) / n_a, sum(b) / n_b
    return sum_abs_product(a, b) / ((avg_a + avg_b) * n_a * n_b)

if __name__ == '__main__':
    result_save_path = 'dagum_gini_coef_decomp.txt'
    data, subgroup_ids = read_data()

    # for debug
    # year = list(data.keys())[0]
    # data = {year: data[year]}

    # formating
    sub_group_ids = sorted(subgroup_ids)
    G_idx_list = []
    G_idx_list.extend([(idx, idx) for idx in sub_group_ids])
    G_idx_list.extend([idxs for idxs in combinations(sub_group_ids, 2)])
    D_idx_list = [idxs for idxs in permutations(sub_group_ids, 2)]

    with open(result_save_path, 'wt') as f:
        # result file header
        # macro
        f.write('#year\tGini\tG_nb\tG_w\tG_t\tG_nb+G_w+G_t')
        f.write('\tG_nb/Gini\tG_w/Gini\tG_t/Gini')
        # micro
        for idxs in G_idx_list:
            f.write('\tG_{}_{}'.format(idxs[0], idxs[1]))
        for idxs in D_idx_list:
            f.write('\tD_{}_{}'.format(idxs[0], idxs[1]))
        f.write('\n')

        for year, year_data in data.items():
            # iteration body
            sort_list = []
            for k_j, v_j in year_data.items():
                sort_list.append((k_j, sum(v_j) / len(v_j)))
            sort_list.sort(key = lambda x: x[-1], reverse = True)
            sorted_keys = [item[0] for item in sort_list]

            year_data_list = []
            for key in sorted_keys:
                v_j = year_data[key]
                year_data_list.extend(v_j)
            
            # G: Gini
            G = Gini(year_data_list, year_data_list)
            
            # G_jh
            G_jh = {}
            for key_j, key_h in product(sorted_keys, sorted_keys):
                G_jh_temp = Gini(year_data[key_j], year_data[key_h])
                if key_j not in G_jh.keys():
                    G_jh.update({key_j: {}})
                G_jh[key_j].update({key_h: G_jh_temp})

            # P_j, S_j
            P_j, S_j = {}, {}
            Y_avg = sum(year_data_list) / len(year_data_list)
            for k_j, v_j in year_data.items():
                P_j.update({k_j: len(v_j) / len(year_data_list)})
                y_j_avg = sum(v_j) / len(v_j)
                S_j.update({k_j: P_j[k_j] * y_j_avg / Y_avg})

            # D_jh
            D_jh = {}
            for key_j in sorted_keys:
                D_jh.update({key_j: {}})
            for key_j, key_h in product(sorted_keys, sorted_keys):
                v_j, v_h = year_data[key_j], year_data[key_h]
                M_jh_tmp = np.array([item[0] - item[-1] if item[0] - item[-1] > 0 else 0 for item in list(product(v_j, v_h))]).mean()
                N_jh_tmp = np.array([item[-1] - item[0] if item[-1] - item[0] > 0 else 0 for item in list(product(v_j, v_h))]).mean()
                D_jh_tmp = (M_jh_tmp - N_jh_tmp) / (M_jh_tmp + N_jh_tmp)
                D_jh[key_j].update({key_h: D_jh_tmp})

            # G_w
            G_w = sum([G_jh[idx][idx] * P_j[idx] * S_j[idx] for idx in year_data.keys()])

            # G_nb, G_t
            G_nb, G_t = 0., 0.
            for idx in range(0, len(sorted_keys)):
                j = sorted_keys[idx]
                for h in sorted_keys[idx + 1:]:
                    G_nb += G_jh[j][h] * D_jh[j][h] * (P_j[j] * S_j[h] + P_j[h] * S_j[j])
                    G_t += G_jh[j][h] * (P_j[j] * S_j[h] + P_j[h] * S_j[j]) * (1 - D_jh[j][h])

            f.write('{}\t{}\t{}\t{}\t{}\t{}'.format(year, G, G_nb, G_w, G_t, G_nb + G_w + G_t))
            f.write('\t{}\t{}\t{}'.format(G_nb / G, G_w / G, G_t / G))
            for idxs in G_idx_list:
                f.write('\t{}'.format(G_jh[idxs[0]][idxs[1]]))
            for idxs in D_idx_list:
                f.write('\t{}'.format(D_jh[idxs[0]][idxs[1]]))
            f.write('\n')

            # for debug
            # print('{}'.format(year).center(50, '-'))
            # print('Gini: ', G)
            # print('G_nb: ', G_nb)
            # print('G_w: ', G_w)
            # print('G_t: ', G_t)
            # print('G_nb + G_w + G_t:', G_nb + G_w + G_t)

版权声明

禁止未进行原文章地址声明的文章转载。使用本文代码时请在适当位置给予感谢声明。

参考资料

1. Dagum C. A new approach to the decomposition of the Gini income inequality ratio[M]//Income Inequality, Poverty, and Economic Welfare. Physica-Verlag HD, 1998: 47-63.