Series

출처 : 금융 데이터 분석을 위한 파이썬 판다스

금융 데이터 분석을 위한 파이썬 판다스

최근 인공지능 AI(Artificial Intelligence)이 보급화되면서 방대한 양의 데이터를 처리하는 방식이 중요해지기 시작했습니다. 판다스(Pandas)는 오픈 소 ...

wikidocs.net

Series( List , [ index , dtype ] )

Series( Dict , [ dtype ] )

### keys -> index

시리즈 생성

s = Series(data)

s = Series(data, index)
s = Series(data, index=index)
s = Series(data=data, index=index)
s = Series(index=index, data=data)

from pandas import Series data = [10, 20, 30] s = Series(data) print(s)	0 10 1 20 2 30 dtype: int64
from pandas import Series data = { "2019-05-31": 42500, "2019-05-30": 42550, "2019-05-29": 41800, "2019-05-28": 42550, "2019-05-27": 42650 } s = Series(data) print(s)	2019-05-31 42500 2019-05-30 42550 2019-05-29 41800 2019-05-28 42550 2019-05-27 42650 dtype: int64
from pandas import Series import numpy as np data = np.arange(5) s = Series(data) print(s)	0 0 1 1 2 2 3 3 4 4 dtype: int32
data = ["시가", "고가"] s = Series(data) print(s)	0 시가 1 고가 dtype: object
index = ["시가", "고가"] data = [100, 200] s = Series(data, index) print(s) print(s.index) # Index(['시가', '고가'], dtype='object') print(s.values) # [100 200] s.index.name = '구분' s.name = '자료' print(s)	시가 100 고가 200 dtype: int64 구분 시가 100 고가 200 Name: 자료, dtype: int64
df = DataFrame(s) print(df)	자료 구분 시가 100 고가 200

시리즈 인덱스

data = [1000, 2000, 3000]
s = Series(data)
print(s.index)            # RangeIndex(start=0, stop=3, step=1)
print(s.index.to_list())  # [0, 1, 2]

s.index = ["메로나", "구구콘", "하겐다즈"]
print(s)
"""
메로나     1000
구구콘     2000
하겐다즈    3000
dtype: int64
"""

data = [1000, 2000, 3000]
index = ["메로나", "구구콘", "하겐다즈"]
s = Series(data=data, index=index)
print(s)
"""
메로나     1000
구구콘     2000
하겐다즈    3000
dtype: int64
"""

reindex

data = [1000, 2000, 3000] index = ['메로나', '구구콘', '하겐다즈'] s = Series(data=data, index=index) #Series.reindex(index,fill_value=) s2 = s.reindex(["메로나", "비비빅", "구구콘"]) print(s2)	메로나 1000.0 비비빅 NaN 구구콘 2000.0 dtype: float64
s2=s2.fillna(0) print(s2)	메로나 1000.0 비비빅 0.0 구구콘 2000.0 dtype: float64
print(s.reindex(["메로나", "비비빅", "구구콘"], fill_value=0))	메로나 1000 비비빅 0 구구콘 2000 dtype: int64

시리즈 인덱싱/슬라이싱

from pandas import Series, DataFrame
data = [1000, 2000, 3000]
index = ["메로나", "구구콘", "하겐다즈"]
s = Series(data=data,index=index)
print(s)

메로나     1000
구구콘     2000
하겐다즈    3000
dtype: int64

print(s.iloc[0])  # 1000
print(s.iloc[1])  # 2000
print(s.iloc[2])  # 3000
print(s.iloc[-1]) # 3000

print(s.loc["메로나"])  # 1000
print(s.loc["구구콘"])  # 2000
print(s.loc["하겐다즈"])  # 3000

print(s[0])  # 1000
print(s[1])  # 2000
print(s[2])  # 3000

print(s["메로나"])  # 1000
print(s["구구콘"])  # 2000
print(s["하겐다즈"])  # 3000

# Series.iloc[시작인덱스:끝인덱스] - 시작 ~ 끝인덱스전까지 print(s.iloc[0:2])	메로나 1000 구구콘 2000 dtype: int64
# Series.iloc[시작인덱스:끝인덱스] - 시작 ~ 끝인덱스까지 print(s.loc['메로나':'구구콘'])	메로나 1000 구구콘 2000 dtype: int64
indice = [0, 2] print(s.iloc[ indice ])	메로나 1000 하겐다즈 3000 dtype: int64
indice = ["메로나", "하겐다즈"] print(s.loc[ indice ])	메로나 1000 하겐다즈 3000 dtype: int64

시리즈 수정/추가/삭제

# 값 수정 s.loc['메로나'] = 500 # 동일기능 s.iloc[0] = s['메로나'] = s[0] = 500 print(s) # s.iloc[ : 2] = 100 가능	메로나 500 구구콘 2000 하겐다즈 3000 dtype: int64
# 데이터 삭제 s1 = s.drop('메로나') print(s1)	구구콘 2000 하겐다즈 3000 dtype: int64
# 값 추가 s.loc['비비빅'] = 500 print(s)	메로나 500 구구콘 2000 하겐다즈 3000 비비빅 500 dtype: int64

시리즈 연산

철수 = Series([10, 20, 30], index=['NAVER', 'SKT', 'KT']) 영희 = Series([10, 30, 20], index=['SKT', 'KT', 'NAVER']) 가족 = 철수 + 영희 print(가족)	KT 60 NAVER 30 SKT 30 dtype: int64
print(철수 * 10)	NAVER 100 SKT 200 KT 300 dtype: int64
high = Series([42800, 42700, 42050, 42950, 43000]) low = Series([42150, 42150, 41300, 42150, 42350]) diff = high - low diff.index = ['월','화','수','목','금'] print(diff)	월 650 화 550 수 750 목 800 금 650 dtype: int64
# 최대.최소값의 index 반환 idx_max = diff.idxmax() # 목 idx_min = diff.idxmin() # 화
# max,min등 함수가능 print(diff.max()) # max(diff)	800
date = ["6/1", "6/2", "6/3", "6/4", "6/5"] high = Series([42800, 42700, 42050, 42950, 43000], index=date) low = Series([42150, 42150, 41300, 42150, 42350] , index=date) profit = high / low print(profit) # 계승 print( profit.cumprod( ) ) # 최종 수익률 print( profit.cumprod( ).iloc[ -1 ] )	6/1 1.015421 6/2 1.013049 6/3 1.018160 6/4 1.018980 6/5 1.015348 dtype: float64 6/1 1.015421 6/2 1.028671 6/3 1.047351 6/4 1.067230 6/5 1.083610 dtype: float64 1.0836101509172456
data = { "삼성전자": "전기,전자", "LG전자": "전기,전자", "현대차": "운수장비", "NAVER": "서비스업", "카카오": "서비스업" } s = Series(data) # 값들의 합집합 print(s.unique())	삼성전자 전기,전자 LG전자 전기,전자 현대차 운수장비 NAVER 서비스업 카카오 서비스업 dtype: object ['전기,전자' '운수장비' '서비스업']

map

from pandas import Series s = Series(["1,234", "5,678", "9,876"]) print( int(s) ) # 문자형int 자료형은 astype 및 자료변환 불가	에러발생 => map(applymap) 이용
s = Series(["1,234", "5,678", "9,876"]) def remove_comma(x) : return int(x.replace(",", "")) result = s.map(remove_comma) # Series.map(함수) print(result)	0 1234 1 5678 2 9876 dtype: int64
def is_greater_than_5000(x): if x > 5000: return "크다" else: return "작다" s = Series([1234, 5678, 9876]) s = s.map(is_greater_than_5000) print(s)	0 작다 1 크다 2 크다 dtype: object

필터링

from pandas import Series data = [42500, 42550, 41800, 42550, 42650] index = ['2019-05-31', '2019-05-30', '2019-05-29', '2019-05-28', '2019-05-27'] s = Series(data=data, index=index) cond = s > 42000 print(cond)	2019-05-31 True 2019-05-30 True 2019-05-29 False 2019-05-28 True 2019-05-27 True dtype: bool
print(s[cond])	2019-05-31 42500 2019-05-30 42550 2019-05-28 42550 2019-05-27 42650 dtype: int64
close = [42500, 42550, 41800, 42550, 42650] open = [42600, 42200, 41850, 42550, 42500] index = ['2019-05-31', '2019-05-30', '2019-05-29', '2019-05-28', '2019-05-27'] open = Series(data=open, index=index) close = Series(data=close, index=index) cond = close > open print(cond)	2019-05-31 False 2019-05-30 True 2019-05-29 False 2019-05-28 False 2019-05-27 True dtype: bool
cond = close > open print(close[cond])	2019-05-30 42550 2019-05-27 42650 dtype: int64
print(close.index[close > open]) # Series.index[condition] print(close[close > open].index) # Series[condition].index	Index(['2019-05-30', '2019-05-27'], dtype='object')

정렬 및 순위

from pandas import Series data = [3.1, 2.0, 10.1, 5.1] index = ['000040','000010', '000030', '000020'] s = Series(data=data, index=index) print(s)	000040 3.1 000010 2.0 000030 10.1 000020 5.1 dtype: float64
# 정렬 (오름차순) s1 = s.sort_values() print(s)	000010 2.0 000040 3.1 000020 5.1 000030 10.1 dtype: float64
# 정렬 (내림차순) s2 = s.sort_values(ascending=False) print(s2)	000030 10.1 000020 5.1 000040 3.1 000010 2.0 dtype: float64
# index 오름차순 정렬 s = s.sort_index() print(s)	000010 2.0 000020 5.1 000030 10.1 000040 3.1 dtype: float64
data = [3.1, 2.0, 10.1, 3.1] index = ['000010', '000020', '000030', '000040'] s = Series(data=data, index=index) print(s.rank()) # 오름차순 순위	000010 2.5 000020 1.0 000030 4.0 000040 2.5 dtype: float64
# 내림차순 순위 print(s.rank(ascending=False))	000010 2.5 000020 4.0 000030 1.0 000040 2.5 dtype: float64

ch03정리.ipynb

0.03MB

저작자표시 (새창열림)

'DataFrame' 카테고리의 다른 글

판다스 데이터프레임 (데이터 읽기/ 저장하기) (0)	2022.04.07
판다스 데이터프레임 (고급기능2) (0)	2022.04.07
판다스 데이터프레임 (고급기능1) (0)	2022.04.07
판다스 데이터프레임 (0)	2022.04.07
ndarry (0)	2022.04.07

자동매매

Series

출처 : 금융 데이터 분석을 위한 파이썬 판다스

Series( List , [ index , dtype ] )

Series( Dict , [ dtype ] )

시리즈 생성

시리즈 인덱스

reindex

시리즈 인덱싱/슬라이싱

시리즈 수정/추가/삭제

시리즈 연산

map

필터링

정렬 및 순위

'DataFrame' 카테고리의 다른 글

댓글

티스토리툴바

Series

출처 : 금융 데이터 분석을 위한 파이썬 판다스

Series( List , [ index , dtype ] )

Series( Dict , [ dtype ] )

시리즈 생성

시리즈 인덱스

reindex

시리즈 인덱싱/슬라이싱

시리즈 수정/추가/삭제

시리즈 연산

map

필터링

정렬 및 순위

'DataFrame' 카테고리의 다른 글

관련글

댓글

티스토리툴바