numpy.histogram()
import numpy as np
a = np.array([89, 34, 56, 87, 90, 23, 45, 12, 65, 78, 9, 34, 12, 11, 2, 65, 78, 82, 28, 78])
histogram = np.histogram(a, bins= 2)
print(histogram)
(array([10, 10]), array([ 2., 46., 90.]))
import numpy as np
a = np.array([89, 34, 56, 87, 90, 23, 45, 12, 65, 78, 9, 34, 12, 11, 2, 65, 78, 82, 28, 78])
histogram = np.histogram(a, bins= 3)
print(histogram)
(array([7, 4, 9]), array([ 2. , 31.33333333, 60.66666667, 90. ]))
import numpy as np
a = np.array([89, 34, 56, 87, 90, 23, 45, 12, 65, 78, 9, 34, 12, 11, 2, 65, 78, 82, 28, 78])
histogram = np.histogram(a, bins= [0,30,60,90])
print(histogram)
(array([7, 4, 9]), array([ 0, 30, 60, 90]))
from matplotlib import pyplot as plt
import numpy as np
a = np.array([89, 34, 56, 87, 90, 23, 45, 12, 65, 78, 9, 34, 12, 11, 2, 65, 78, 82, 28, 78])
plt.hist(a, bins = [0,20,40,60,80,100])
plt.title("histogram")
plt.show()

import pandas as pd
import numpy as np
# read_csv() 함수로 df 생성
df = pd.read_csv('part5/auto-mpg.csv', header=None)
# 열 이름을 지정
df.columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
'acceleration', 'model year', 'origin', 'name']
print(df['horsepower'])
# print('\n')
# print(df.dtypes)
# print(df['horsepower'].sample(3))
df['horsepower'].replace('?', np.nan, inplace=True)
df.dropna(subset=['horsepower'], axis=0, inplace=True)
df['horsepower'] = df['horsepower'].astype(float)
count, bin_dividers = np.histogram(df['horsepower'], bins=3)
# print(count)
print(bin_dividers)
bin_names = ['저출력', '보통출력', '고출력']
df['hp_bin'] = pd.cut(x=df['horsepower'], bins=bin_dividers,
labels=bin_names,
include_lowest=True)
print(df[['horsepower', 'hp_bin']].head(10))
horsepower_dummies = pd.get_dummies(df['hp_bin'])
print(horsepower_dummies.head(10))
'python' 카테고리의 다른 글
시계열 데이터 (0) | 2022.02.22 |
---|---|
pandas index (0) | 2022.02.22 |
누락 데이터 치환 (0) | 2022.02.20 |
누락데이터 처리 (0) | 2022.02.20 |
pandas matplotlib folium (0) | 2022.02.20 |