Python - Pandas Dataframe - los datos no coinciden con la fuente
Estoy tratando de usar datos de existencias mensuales de Yahoo para analizar patrones. Por alguna razón, las devoluciones mensuales que el programa está escupiendo en un marco de datos para un stock en particular (ATVI) no coinciden con las devoluciones del sitio real de Yahoo. Comparé los retornos mensuales para el período 2015 e incluí columnas para aumentos y disminuciones promedio, así como el número de ocurrencias de cada uno.
Enlace de Yahoo:https://finance.yahoo.com/q/hp?s=ATVI&a=00&b=1&c=2015&d=11&e=31&f=2015&g=m
Mi código:
from datetime import datetime
from pandas_datareader import data, wb
import pandas_datareader.data as web
import pandas as pd
from pandas_datareader._utils import RemoteDataError
import csv
import sys
import os
import time
class MonthlyChange(object):
months = { 0:'JAN', 1:'FEB', 2:'MAR', 3:'APR', 4:'MAY',5:'JUN', 6:'JUL', 7:'AUG', 8:'SEP', 9:'OCT',10:'NOV', 11:'DEC' }
def __init__(self,month):
self.month = MonthlyChange.months[month-1]
self.sum_of_pos_changes=0
self.sum_of_neg_changes=0
self.total_neg=0
self.total_pos=0
def add_change(self,change):
if change < 0:
self.sum_of_neg_changes+=change
self.total_neg+=1
elif change > 0:
self.sum_of_pos_changes+=change
self.total_pos+=1
def get_data(self):
if self.total_pos == 0:
return (self.month,0.0,0,self.sum_of_neg_changes/self.total_neg,self.total_neg)
elif self.total_neg == 0:
return (self.month,self.sum_of_pos_changes/self.total_pos,self.total_pos,0.0,0)
else:
return (self.month,self.sum_of_pos_changes/self.total_pos,self.total_pos,self.sum_of_neg_changes/self.total_neg,self.total_neg)
for ticker in ['ATVI']:
try:
data = web.DataReader(ticker.strip('\n'), "yahoo", datetime(2015,01,1), datetime(2015,12,31))
data['ymd'] = data.index
year_month = data.index.to_period('M')
data['year_month'] = year_month
first_day_of_months = data.groupby(["year_month"])["ymd"].min()
first_day_of_months = first_day_of_months.to_frame().reset_index(level=0)
last_day_of_months = data.groupby(["year_month"])["ymd"].max()
last_day_of_months = last_day_of_months.to_frame().reset_index(level=0)
fday_open = data.merge(first_day_of_months,on=['ymd'])
fday_open = fday_open[['year_month_x','Open']]
lday_open = data.merge(last_day_of_months,on=['ymd'])
lday_open = lday_open[['year_month_x','Open']]
fday_lday = fday_open.merge(lday_open,on=['year_month_x'])
monthly_changes = {i:MonthlyChange(i) for i in range(1,13)}
for index,ym, openf,openl in fday_lday.itertuples():
month = ym.strftime('%m')
month = int(month)
diff = (openf-openl)/openf
monthly_changes[month].add_change(diff)
changes_df = pd.DataFrame([monthly_changes[i].get_data() for i in monthly_changes],columns=["Month","Avg Inc.","Inc","Avg.Dec","Dec"])
print ticker
print changes_df