본문 바로가기
python 기본개념

네이버 증권 크롤링

by 자동매매 2022. 3. 18.

파이썬에서는 requests 모듈을 이용해 HTML 코드를 다운로드하고 BeautifulSoup 모듈로 원하는 데이터를 파싱합니다.

 

설치 모듈 : bs4 / html5lib / lxml

 

1. 파싱

from bs4 import BeautifulSoup

html = '''
<html>
    <table border=1> 
        <tr>
            <td> 항목 </td> 
            <td> 2013 </td> 
            <td> 2014 </td> 
            <td> 2015 </td>
        </tr> 
        <tr>
            <td> 매출액 </td> 
            <td> 100 </td> 
            <td> 200 </td>
            <td> 300 </td>
        </tr> 
    </table>
    <ul>
        <li> 100 </li> 
        <li> 200 </li>
    </ul> 
    <ol>
        <li> 300 </li> 
        <li> 400 </li>
    </ol>
</html>
'''
soup = BeautifulSoup(html, 'html5lib')

# td 출력
result = soup.select('td')
print("1: ", result)

# 첫열 출력
result = soup.select('td:nth-of-type(1)')
print("2: ", result)

# ui li 출력
result = soup.select('ul li')
print("3: ", result)

# 텍스트 출력
for r in result:
    print(r.text)

2. HTML 코드 다운로드

네이버 기업현황 크롤링

import requests

response = requests.get("http://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd=035720")
print(response.text)

현금배당 수익률 추출

import requests
import re
import  pandas  as  pd


def get_financial_statements(code):
    re_enc = re.compile("encparam: '(.*)'", re.IGNORECASE)
    re_id = re.compile("id: '([a-zA-Z0-9]*)' ?", re.IGNORECASE)

    url = "http://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd={}".format(code)
    html = requests.get(url).text
    encparam = re_enc.search(html).group(1)
    encid = re_id.search(html).group(1)

    url = "http://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?cmp_cd={}&fin_typ=0&freq_typ=A&encparam={}&id={}".format(code, encparam, encid)
    headers = {"Referer": "HACK"}
    html = requests.get(url, headers=headers).text

    dfs = pd.read_html(html)
    df = dfs[1]['연간연간컨센서스보기']
    df.index = dfs[1]['주요재무정보'].values.flatten()
    df = df.loc['현금배당수익률']
    df.index = df.index.str[:7]

    return df.to_dict()

if __name__ == "__main__":
    code = "035720"
    dividend_dict = get_financial_statements(code)
    print(dividend_dict)

webreader.py

from bs4 import BeautifulSoup
import requests
from 배당률 import get_financial_statements
import datetime


def get_3year_treasury():
    url = "http://www.index.go.kr/strata/jsp/showStblGams3.jsp?stts_cd=288401&idx_cd=2884&freq=Y&period=1998:2016"
    html = requests.get(url).text

    soup = BeautifulSoup(html, 'html5lib')
    td_data = soup.select("tr td")

    treasury_3year = {}
    start_year = 1998

    for x in td_data:
        treasury_3year[start_year] = x.text
        start_year += 1

    print(treasury_3year)
    return treasury_3year

def get_current_3year_treasury():
    url = "https://finance.naver.com/marketindex/interestDetail.naver?marketindexCd=IRR_GOVT03Y"
    html = requests.get(url).text

    soup = BeautifulSoup(html, 'html5lib')
    td_data = soup.select("tr td")
    return td_data[1].text

def get_dividend_yield(code):
    url = "http://companyinfo.stock.naver.com/company/c1010001.aspx?cmp_cd=" + code
    html = requests.get(url).text

    soup = BeautifulSoup(html, 'html5lib')
    dt_data = soup.select("td dl dt")

    dividend_yield = dt_data[-2].text
    dividend_yield = dividend_yield.split(' ')[1]
    dividend_yield = dividend_yield[:-1]

    return dividend_yield

def get_estimated_dividend_yield(code):
    dividend_yield = get_financial_statements(code)
    dividend_yield = sorted(dividend_yield.items())[-1]
    return dividend_yield[1]

def get_previous_dividend_yield(code):
    dividend_yield = get_financial_statements(code)

    now = datetime.datetime.now()
    cur_year = now.year

    previous_dividend_yield = {}

    for year in range(cur_year-5, cur_year):
        if str(year) in dividend_yield.index:
            previous_dividend_yield[year] = dividend_yield[str(year)]

    return previous_dividend_yield

if __name__ == "__main__":
    dividend_yield = get_dividend_yield('058470')
    print(dividend_yield)

    estimated_dividend_yield = get_estimated_dividend_yield('058470')
    print(estimated_dividend_yield)

    print(get_current_3year_treasury())

    print(get_previous_dividend_yield('058470'))

'python 기본개념' 카테고리의 다른 글

거래량 급등종목 포착  (0) 2022.03.18
보유종목현황  (0) 2022.03.17
주문창 구현  (0) 2022.03.17
pyinstaller로 만든 실행파일(exe) 에러처리  (0) 2022.03.15
pycharm 실행파일 만들기  (0) 2022.03.15

댓글