#第２回コード例

####################################################
#p4
#pie関数（引数：データフレーム、列名）
def pie(data, column):
  plt.rcParams["font.size"] = 18 #フォントサイズの設定

  ret = data[column].value_counts() #集計
  #円グラフを描く
  ret.plot.pie(autopct="%.1f%%", figsize=(6, 6), \
    wedgeprops={"linewidth": 0, "edgecolor": "white"}, \
    label = "", \
    colors=("#ff8080", "#8080ff", "#80ff80"))
  plt.show() #グラフ描画の確定

#関数を呼び出す
pie(data, "あなたの性別は？")


####################################################
#p5
#pie関数（引数：データフレーム、列名）
def pie(data, column):
  plt.rcParams["font.size"] = 18 #フォントサイズの設定

  ret = data[column].value_counts() #集計
  #円グラフを描く
  ret.plot.pie(autopct="%.1f%%", figsize=(6, 6), \
    wedgeprops={"linewidth": 0, "edgecolor": "white"}, \
    label = "", \
    colors=("#ff8080", "#8080ff", "#80ff80"))
  plt.show() #グラフ描画の確定

  kensu = data[column].count() #回答件数
  ret.loc["総計"] = kensu        #総計業を追加
  final = pd.DataFrame(ret)     #集計結果をデータフレームに変換
  #割合の列を追加して、mapメソッドのラムダ式で割合を計算
  final["割合"] = final[column].map(lambda x: '{:.01f}'.format(x / kensu * 100) + "%")
  display(final) #集計表をdisplay関数で整形して出力

#関数を呼び出す
pie(data, "あなたの性別は？")
pie(data, "あなたの年代は？")

####################################################
#p6
#各要素に関数を適用（map）
def test(x):
  return x + "です"
ret = data["あなたの性別は？"].map(test)
display(ret)

#各要素にラムダ式を適用（map）
ret = data["あなたの性別は？"].map(lambda x: x + "です")
display(ret)

#各要素にラムダ式を適用（apply）
ret = data["あなたの性別は？"].apply(lambda x: x + "です")
display(ret)

####################################################
#p8
#pie関数（引数：データフレーム、列名）
def pie(data, column, sort_index=False):
  plt.rcParams["font.size"] = 18 #フォントサイズの設定

  ret = data[column].value_counts() #集計
  #項目名でソートするか否か
  if sort_index == True:
    ret = ret.sort_index()
  #円グラフを描く
  ret.plot.pie(autopct="%.1f%%", figsize=(6, 6), \
    wedgeprops={"linewidth": 0, "edgecolor": "white"}, \
    label = "", \
    colors=("#ff8080", "#8080ff", "#80ff80", "#ffc080", "#ff80ff", "#80ffff", \
            "#c0c0c0","#d0c060", "#c080ff", "#80ffc0"))
  plt.show() #グラフ描画の確定

  kensu = data[column].count() #回答件数
  ret.loc["総計"] = kensu        #総計業を追加
  final = pd.DataFrame(ret)     #集計結果をデータフレームに変換
  #割合の列を追加して、mapメソッドのラムダ式で割合を計算
  final["割合"] = final[column].map(lambda x: '{:.01f}'.format(x / kensu * 100) + "%")
  display(final) #集計表をdisplay関数で整形して出力

#関数を呼び出す
pie(data, "あなたの性別は？")
pie(data, "あなたの年代は？", sort_index=True)


####################################################
#p11
df = data["今回の静岡観光の目的は？（複数選択可）"].str.split(',\s*').apply(lambda x: pd.Series(1,index=x))
df = df.fillna(0).astype(int)
display(df)


####################################################
#p12
ret = df.sum()
ret = ret.sort_values(0)
ret.plot.barh(figsize=(6, 8), color=("red"))
plt.show()

kensu = data["今回の静岡観光の目的は？（複数選択可）"].count()
ret = pd.DataFrame(ret)
ret["割合"] = ret[0].apply(lambda x: '{:.01f}'.format(x / kensu * 100) + "%")
ret = ret.sort_values(0, ascending=False)
display(ret)
print("N = ", kensu)


####################################################
#p13
col7 = data.iloc[:, 7]
print(col7.sum())
print(col7.mean())
print(col7.min())
print(col7.max())
print(col7.describe())
col7.plot.hist(bins=10)
plt.show()


####################################################
#p14
col1 = data["あなたの性別は？"]
col2 = data["あなたの年代は？"]  

cross = pd.crosstab(col1, col2)
display(cross)


####################################################
#p15
cross.plot.barh(figsize=(10, 5), stacked=True)
plt.show()


####################################################
#p16
cross = pd.crosstab(col1, col2)
#実数の表crossをパーセントの表cross2に変換（axis=1で列方向の合計で各要素を割る）
cross2 = cross.apply(lambda x: x / x.sum() * 100, axis=1)

#列方向（axis=1）にsumで合計を計算し、新しい「計」の列に記録
cross["計"] = cross.apply(lambda x: x.sum(), axis=1)
#内包表記でラベルを作る
labels = [i + "(N=" + str(j) + ")" for i, j in zip(cross.index, cross.iloc[:, -1])]
#ラベルをcross2の行名に設定
cross2.index = labels

#横棒グラフを描く
cross2.plot.barh(figsize=(10, 5), stacked=True, width=0.8, \
      color=("#4472c4", "#ed7d31", "#a5a5a5", "#ffc000", "#5b9bd5", \
      "#70ad47", "#264478", "#9e480e", "#636363", "#008000"))
#凡例の左上が、グラフの右上を(1, 1)としたとき、それよりも0.02だけ右に配置
plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)
plt.ylabel("")


####################################################
#p20
cross = pd.crosstab(col1, col2)
#全体の行を追加
cross.loc["全体"] = cross.sum()
#実数の表crossをパーセントの表cross2に変換（axis=1で列方向の合計で各要素を割る）
cross2 = cross.apply(lambda x: x / x.sum() * 100, axis=1)

#列方向（axis=1）にsumで合計を計算し、新しい「計」の列に記録
cross["計"] = cross.apply(lambda x: x.sum(), axis=1)
#内包表記でラベルを作る
labels = [i + "(N=" + str(j) + ")" for i, j in zip(cross.index, cross.iloc[:, -1])]
#ラベルをcross2の行名に設定
cross2.index = labels

#横棒グラフを描く
cross2.plot.barh(figsize=(10, 5), stacked=True, width=0.8, \
      color=("#4472c4", "#ed7d31", "#a5a5a5", "#ffc000", "#5b9bd5", \
      "#70ad47", "#264478", "#9e480e", "#636363", "#008000"))
#凡例の左上が、グラフの右上を(1, 1)としたとき、それよりも0.02だけ右に配置
plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)
plt.ylabel("")

ax = plt.gca() #グラフの現在の軸を取得
for p in ax.patches: #1つ1つのグラフの棒を取得し繰り返し
  if p.get_width() >= 5: #棒の幅が5以上ならパーセントを表示
    #annotateメソッドでパーセントを描く
    ax.annotate('{:.01f}'.format(p.get_width()) + "%", \
                (p.get_x() + p.get_width() * 0.25, \
                p.get_y() + p.get_height() * 0.35), \
                color="white")
plt.show()


####################################################
#p22
def crossbar(data, c1, c2):
  col1 = data[c1]
  col2 = data[c2]
  cross = pd.crosstab(col1, col2)
  cross.loc["全体"] = cross.sum()
  cross2 = cross.apply(lambda x: x / x.sum() * 100, axis=1)
  #display(cross2)

  cross["計"] = cross.apply(lambda x: x.sum(), axis=1)
  #display(cross)
  labels = [i + "(N=" + str(j) + ")" for i, j in zip(cross.index, cross.iloc[:, -1])]
  #display(labels)
  cross2.index = labels

  cross2.plot.barh(figsize=(10, 5), stacked=True, width=0.8, \
                  color=("#4472c4", "#ed7d31", "#a5a5a5", "#ffc000", "#5b9bd5", \
                          "#70ad47", "#264478", "#9e480e", "#636363", "#008000"))
  plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)
  plt.ylabel("")

  ax = plt.gca()
  for p in ax.patches:
    if p.get_width() >= 5:
      ax.annotate('{:.01f}'.format(p.get_width()) + "%", \
                  (p.get_x() + p.get_width() * 0.25, \
                  p.get_y() + p.get_height() * 0.35), \
                  color="white")
  plt.savefig("図1.png")
  plt.show()

crossbar(data, "あなたの性別は？", "あなたの職業は？")
crossbar(data, "あなたの性別は？", "今回の静岡観光の満足度は？")

####################################################
#p28
from scipy import stats
def crossbar(data, c1, c2):
  col1 = data[c1]
  col2 = data[c2]
  cross = pd.crosstab(col1, col2)
  
  #カイ二乗検定
  chi2, p, dof, exp = stats.chi2_contingency(cross, correction=False)
  print("期待度数", "\n", exp)
  print("自由度", "\n", dof)
  print("カイ二乗値", "\n", chi2)
  print("p値", "\n", p)
  
  cross.loc["全体"] = cross.sum()
  cross2 = cross.apply(lambda x: x / x.sum() * 100, axis=1)
  #display(cross2)

  cross["計"] = cross.apply(lambda x: x.sum(), axis=1)
  #display(cross)
  labels = [i + "(N=" + str(j) + ")" for i, j in zip(cross.index, cross.iloc[:, -1])]
  #display(labels)
  cross2.index = labels

  cross2.plot.barh(figsize=(10, 5), stacked=True, width=0.8, \
                  color=("#4472c4", "#ed7d31", "#a5a5a5", "#ffc000", "#5b9bd5", \
                          "#70ad47", "#264478", "#9e480e", "#636363", "#008000"))
  plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)
  plt.ylabel("")

  ax = plt.gca()
  for p in ax.patches:
    if p.get_width() >= 5:
      ax.annotate('{:.01f}'.format(p.get_width()) + "%", \
                  (p.get_x() + p.get_width() * 0.25, \
                  p.get_y() + p.get_height() * 0.35), \
                  color="white")
  plt.savefig("図1.png")
  plt.show()

crossbar(data, "あなたの性別は？", "あなたの職業は？")
crossbar(data, "あなたの性別は？", "今回の静岡観光の満足度は？")