"""
Data-Frame tools module
*********************************
.. toctree::
:maxdepth: 2
ut_5tout_6
This is the main toolbox designed to aid main processsing the Mixed-layer height data frames through the use of
the Python Module `Pandas <https://pandas.pydata.org/>`_.
"""
# -*- coding: utf8 -*-
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
import pandas as pd
import glob
import os
import sys
import datetime
from ceilotools import *
#from growth_rates import *
#filename='/home/jorge/Documentos/Python/Tesis/Results/201607_10min_db.csv'
#output='/home/jorge/Documentos/Python/Tesis/Results/Plots/'
#df=pd.read_csv(filename,index_col='Fecha y hora')
#df.index=df.index.rename('Fecha y hora')
#df.index=pd.to_datetime(df.index)
#df=df[df > 100]
#Function to reset index of data frame, changing from UT-5 to UT-6 the dates needed.
[docs]def ut_5tout_6(df):
r"""
Convert times in UTC-5 to UTC-6 and make database time-homogeneus.
**Parameters**
**df**: `pandas.dataframe`
Pandas Data-Frame object containing a datetime index and columns of MLH.
:rtype: `df` pandas dataframe
*See Also*
:ref:`mlhtodf`.
"""
# Index values
indice=df.index.values
for i,dt in enumerate(df.index):
if ((dt.month==4 and dt.day<=6)):
indice[i]=dt-datetime.timedelta(hours=1)
df.index=indice
return df
def hourly(df,show,save,name,wstd):
yerror=None
hourly=df.groupby(df.index.hour).std()
tvec=range(1,25)
hourly=np.asarray(hourly)
bsicplot(u'Desviacion estandar horaria',hourly,tvec,show,save,name,wstd,yerror)
def corrplot(dx,carpeta):
df=dx['C2']
odf=dx['Delice']
minavg=df.groupby([df.index.hour,df.index.minute]).mean()
total=odf.groupby([odf.index.hour,odf.index.minute]).mean()
minavg=np.asarray(minavg)
filtered=np.asarray(total)
r=np.corrcoef(minavg,filtered)
r0=np.around(r[0,1],4)
plt.scatter(minavg,filtered,s=5,c='k')
plt.xlabel('Raw MH [m]',fontsize=16)
plt.ylabel('Filtered MH [m]',fontsize=16)
plt.title('Mean MH daily evolution comparison',fontsize=18)
# plt.xlim([500,2500])
plt.text(800,3000,r'$r= '+str(r0)+'$'+'\n$n= '+str(len(minavg))+'$',fontsize=15)
plt.grid()
plt.savefig(carpeta+'scatcomparison_2.eps')
plt.show()
[docs]def minutes(df,odf,show,rmean,save,name,wstd,c):
r"""
Plot mean diurnal evolution of MLH
**Parameters**
**df**: `pandas.dataframe`
Pandas Data-Frame object containing a datetime index and columns of MLH.
**odf**: `pandas.dataframe`
Second Pandas Data-Frame object containing a datetime index and columns of MLH if anomaly is to be plotted.
**show**: `Boolean`
True or False if you wish the plot to be shown.
**rmean**: `Boolean`
True or False if you wish to see the plot smoothed by our own function of runnin mean :ref:`runningMeanFast`.
**save**:`Boolean`
True or False if you wish to save the plot.
**name**:`str`
String of saved plot
**wstd**:`Boolean`
True or False if intention is to plot standard deviaiton and not mean.
**c**:`string`
Color string: choose from `colors <https://matplotlib.org/examples/color/named_colors.html>`_
:rtype: `df` pandas dataframe
*See Also*
:ref:`bsicplot`.
"""
#print minavg
if wstd:
minavg=df.groupby([df.index.hour,df.index.minute]).std()
title=u'Desviacion estandar'
ylabel=r'$\sigma$ (m)'
else:
minavg=df.groupby([df.index.hour,df.index.minute]).mean()
total=odf.groupby([odf.index.hour,odf.index.minute]).mean()
anomal=minavg-total
#minavg=minavg-total
#title=u'Term diurnal evolution (anomaly)'
title=u'MH mean diurnal evolution'
ylabel=r'MH (m)'
minarray=np.asarray(minavg)
if rmean:
minarray=runningMeanFast(minarray,4)
tvec=frange(0,24,float(10)/60.0)
bsicplot(title,minarray,tvec,show,save,name,ylabel,c)
[docs]def bsicplot(titles,y,tvec,show,save,name,ylabel,c):
r"""
Basic plot function for a 24-hr plot
**Parameters**
**titles**: `string`
String for title of plot
**y**: `np.array`
Array to plot
**tvec**:`np.array`
Time-vector.
**show**: `Boolean`
True or False if you wish the plot to be shown.
**rmean**: `Boolean`
True or False if you wish to see the plot smoothed by our own function of runnin mean :ref:`runningMeanFast`.
**save**:`Boolean`
True or False if you wish to save the plot.
**name**:`str`
String of saved plot
**ylabel**:`string`
Label of y-axis.
**c**:`string`
Color string: choose from `colors <https://matplotlib.org/examples/color/named_colors.html>`_
:rtype: `df` pandas dataframe
*See Also*
:ref:`minutes`.
"""
plt.plot(tvec,y,c,label=name,linewidth=1.5)
plt.title(titles,fontsize=20)
plt.xlabel(u'Time (UTC-6)',fontsize=18)
plt.xticks(np.arange(0,25,2))
plt.xticks(size=16)
plt.yticks(size=16)
plt.ylabel(ylabel,fontsize=18)
plt.xlim([0,24])
plt.grid()
if save:
plt.savefig(name)
if show:
plt.show()
plt.close()
def bimonthly(df,out):
i=0
#print i
tvec=frange(0,24,float(10)/60.0)
j=1
plt.figure(figsize=(15,10))
c=['b','r','g','c']
while j<12:
if j==100:
ij=12
mn=2
else:
ij=j+2
mn=2
newdf=df[(df.index.month==j)|(df.index.month==j+1)]
j+=mn
total=newdf.groupby([newdf.index.hour,newdf.index.minute]).mean()
argu=df.groupby([df.index.hour,df.index.minute]).mean()
#for date in newdf.index:
# print date
total=np.asarray(total)
dif=total-np.array(argu)
dif=runningMeanFast(dif,6)
plt.plot(tvec,dif)
#minutes(newdf,df,False,True,False,'bimestral.png',False,'-')
i+=1
tot=df.groupby([df.index.hour,df.index.minute]).mean()
difi=np.asarray(tot)
#dif=np.diff(difi)
#plt.plot(tvec,difi,'--')
plt.grid()
#minutes(df,df,False,True,False,'b',False,'-')
plt.xlim([0,24])
plt.xlabel('Time (UT-6)',fontsize=18)
plt.ylabel(r'MLH anomaly (m)',fontsize=18)
#minutes(df,df,False,True,False,'bimestral.png',False,'--')
plt.grid()
# legend=plt.legend(['Jan-Feb','Mar-Apr','May-Jun','Jul-Aug','Sep-Oct','Nov-Dec'],loc='upper left',title='Term',fontsize=13)
legend=plt.legend(['JF','MA','MJ','JA','OS','ND','Annual mean'],loc='upper left',fontsize=14)
plt.grid()
# legend.get_title().set_fontsize('14')
#plt.grid()
plt.savefig(out)
plt.show()
plt.close()
#bimestre=df.groupby([df.index.hour,df.index.minute,df.index.month])
#print bimestre.get_group((0,0,1:2))
def longmaxmin(meses, maximo):
counter=0
cdate=meses[counter]
startyear=2011
mesesits=['Ene','Feb','Mar','Apr','May','Jun','Jul','Ago','Sep','Oct','Nov','Dic']
while startyear!=2017:
lista=[]
while cdate.year==startyear:
lista.append(maximo[counter])
counter+=1
cdate=meses[counter]
if cdate == meses[-1]:
break
lista=np.array(lista)
valmin=min(lista)
value=max(lista)
index=np.where(lista==value)
indicit=np.where(lista==valmin)
startyear+=1
def yearly(df,filtered,output,name):
yr1=df.groupby([df.index.hour,df.index.minute,df.index.year]).mean()
yrly=df.groupby([df.index.hour,df.index.minute,df.index.year]).mean().unstack()
# print yrly
columns=yrly.columns
tvec=frange(0,24,10.0/60.0)
index=range(2008,2016)
figu=plt.figure()
ax= figu.add_subplot(1,1,1)
for i,j in enumerate(index):
if filtered:
if j==2008 or j==2010:# or j==2012 or j==2009:
continue
y=np.asarray(yrly[columns[i]])
y=runningMeanFast(y,3)
ax.plot(tvec,y,label=str(index[i]))
plt.xticks(np.arange(0,24,2))
plt.grid()
ax=customax(ax,'Promedios anuales cada 10 minutos','lower right')
plt.show()
plt.savefig(output+name+'.png')
#plt.show()
plt.close()
def customax(ax,titles,legendloc):
plt.legend(loc=legendloc)
plt.title(titles)
plt.xlim([0, 24])
plt.xlabel('Horas')
plt.ylabel('Altura de capa de mezcla (m)')
#minutes(df,True,True,True,output+'10minavg_rmean_filtered.png',False)
#yearly(df,True)
#bimonthly(df)
def pullclouds(df,cloudfile):
cloudf=pd.read_csv(cloudfile)
indx=cloudf.columns[1]
dfsnubs=df.copy()
cloudate=pd.to_datetime(cloudf[indx])
cloudate=cloudate.tolist()
for i,j in enumerate(cloudate):
dfsnubs[j]=np.nan
return dfsnubs
def cleanday(df):
start=datetime.datetime(2008,1,1,12,0)
end=datetime.datetime(2008,1,1,18,50)
final=datetime.datetime(2016,5,1,0,0)
newdates=[]
index=0
df=pd.DataFrame(data=df,index=df.index)
df.index=pd.to_datetime(df.index)
horas=[]
maxi=[]
mini=[]
imin=[]
imax=[]
#print type(df.index)
while start != final:
startd=df.index.searchsorted(start)
endd=df.index.searchsorted(end)
newdf=df[startd:endd]
if len(newdf.index) > (4*6):
newdates.append(newdf.index[0])
nwdf=newdf.groupby([newdf.index.hour]).mean()
maxi.append(nwdf.max())
mini.append(nwdf.min())
imax.append(nwdf.idxmax())
imin.append(nwdf.idxmin())
index+=len(newdf.index)
start=start+datetime.timedelta(days=1)
end=end+datetime.timedelta(days=1)
#df=df.groupby([df.index.hour,df.ixndex.month,df.index.year]).mean()
#df=df.groupby([df.index.day,df.index.month,df.index.year]).max().unstack()
#print df
newdates=np.array(newdates)
maxis=np.array(maxi)
plt.plot(newdates,maxis)
plt.show()
def minimum(mlh,dat):
#First check
fc=np.min(mlh)
#Second check
sc=mlh[np.argmax(np.diff(mlh))]
if fc == sc:
#print 'first try'
return sc
else:
st=[]
diff=0
for j in range(len(mlh)-3):
dife=mlh[j+3]-mlh[j]
if dife > diff:
diff=dife
savej=j
if mlh[savej]==fc or mlh[savej]==sc:
# print 'second try'
return mlh[savej]
else:
dif2=0
dif4=0
for j in range(len(mlh)-4):
df4=mlh[j+4]-mlh[j]
df2=mlh[j+2]-mlh[j]
if df2 > dif2 and df4 >dif4:
dif4=df4
dif2=df2
fj=j
if mlh[fj]==fc or mlh[fj]==sc or fj == savej:
# print '3rd try'
return mlh[fj]
else:
# print 'missing'
# print savej,fj,np.argmin(mlh),np.argmax(np.diff(mlh))
if np.abs(savej - np.argmax(np.diff(mlh))) <=3:
nj=int((savej+np.argmax(np.diff(mlh)))/2.)
return mlh[nj]
else:
avgn=int((savej+np.argmax(np.diff(mlh))+fj)/3.)
return mlh[avgn]
def growth(dates,mlh):
mlh=np.array(mlh)
fmlh=mlh
imin=np.argmin(mlh)
imax=np.argmax(mlh)
if imin < imax:
dates=dates[imin:imax]
mlh=mlh[imin:imax]
date0=dates
xdate=[]
for dt in dates:
m=dt.minute
h=dt.hour
add=m/60.
xdate.append(h+add)
#print np.polyfit(xdate,mlh,1)
r0= np.corrcoef(xdate, mlh)[0,1]
r2= r0**2.
date0=xdate
stindex=len(mlh)
r=r0
x=True
while x:
back=mlh[:-1]
front=mlh[1:]
bdt=xdate[:-1]
fdt=xdate[1:]
backr=np.corrcoef(bdt,back)[0,1]
fr=np.corrcoef(fdt,front)[0,1]
if backr > r and backr> fr:
xdate=xdate[:-1]
mlh=mlh[:-1]
elif fr > r and fr > backr:
xdate=xdate[1:]
mlh=mlh[1:]
else:
x=False
r=np.corrcoef(xdate,mlh)[0,1]
if len(mlh)<=20:
x=False
# print 'entering while'
r0=r
# print r
maxindex=stindex-len(mlh)
m,b=np.polyfit(xdate,mlh,1)
xdate=np.array(xdate)
nmlh=xdate*m+b
#plt.figure(2)
#plt.plot(date0,fmlh,label='original')
#plt.plot(xdate,nmlh,label='model')
#plt.legend()
#plt.grid()
#plt.show()
#plt.close()
return m,maxindex
def grwthrate(dates,mlh):
onemax=np.argmax(np.diff(mlh))
nmlh=[]
nmlh2=[]
dat=[]
dat2=[]
meses=[]
#plt.figure(2)
#plt.plot(dates,mlh)
#plt.show()
mesesito=dates[0].month
meses.append(mesesito)
for i,dt in enumerate(dates):
if dt.hour > 6.0 and dt.hour < 15.0:
# print dt.minute
nmlh.append(mlh[i])
dat.append(dates[i])
if dt.hour < 11.5:
nmlh2.append(mlh[i])
dat2.append(dates[i])
dindex=np.where(mlh==nmlh[0])
indice=minimum(nmlh2,dat2)
indice=np.where(nmlh==indice)
indice=indice[0][0]
#print dat2[indice],nmlh2[indice]
nlmh=nmlh[indice:]
# print nlmh
dat=dat[indice:]
m,maxindex=growth(dat,nlmh)
return m,dindex+indice,maxindex
def mlhmax(df,c,f,axarr,lab,name):
df.index=pd.to_datetime(df.index)
#print df.tail
#dx=df.truncate(after='2016-05-30 00:00:00',before='2011-01-01 00:00:00')
#dx=df[(df.index.month<6)or(df.index.year<2017)]
# print dx.head, dx.tail
dx=df
xy=df.groupby([df.index.year,df.index.month,df.index.day,df.index.hour]).mean()
xyz=xy.unstack()
lista=[]
data=df
mes=xy.index[0][1]
year=xy.index[0][0]
day=xy.index[0][2]
list2=[]
maxday=[]
minday=[]
listmin=[]
dates=[]
maxi=[]
dates=[]
mini=[]
meses=[]
for i,j in enumerate(xy.index):
if xy.index[i][2]!=day:
if len(list2)>2:
try:
maxday.append(np.nanmax(list2))
except:
pass
if len(listmin)>2:
try:
minday.append(min(listmin))
except:
print('Vacio')
if xy.index[i][1]!= mes:
#print j
maxi.append(np.mean(maxday))
mini.append(np.mean(minday))
maxday=[]
minday=[]
m=datetime.datetime(xy.index[i-1][0],xy.index[i-1][1],1)
meses.append(m)
mes=xy.index[i][1]
listmin=[]
list2=[]
dates.append(datetime.datetime(xy.index[i-1][0],xy.index[i-1][1],xy.index[i-1][2]))
day=xy.index[i][2]
if xy.index[i][3] >11 and xy.index[i][3]<19:
list2.append(xy.loc[j])
elif xy.index[i][3] <10 and xy.index[i][3]>2:
listmin.append(xy.loc[j])
if xy.index[i][0]==2016 and xy.index[i][1]==5:
break
maxi=runningMeanFast(maxi,4)
# maxi=runningMeanFast(maxi,3)
mini=runningMeanFast(mini,5)
# mini=runningMeanFast(mini,3)
monthly=data.groupby([data.index.year,data.index.month,data.index.hour,data.index.minute]).mean()
meanly=data.groupby([data.index.year,data.index.month]).mean()
meanly=dx.groupby([dx.index.year,dx.index.month]).mean()
# print meanly
meanly=np.asarray(meanly)
# meanly=runningMeanFast(meanly,4)
meanly=runningMeanFast(meanly,3)
ano=monthly.index[0][0]
mes=monthly.index[0][1]
fdatey=2017
fdatem=5
j=0
# print ano, mes
#print meses, maxi
#longmaxmin(meses,maxi)
m=[]
dt=[]
while not(ano == fdatey and mes >= fdatem):
date=[datetime.datetime(monthly.index[j][0],monthly.index[j][1],1,monthly.index[j][2],monthly.index[j][3])]
x=np.array(monthly.loc[monthly.index[j]])
while mes == monthly.index[j][1]:
mes=monthly.index[j][1]
date.append(datetime.datetime(monthly.index[j][0],monthly.index[j][1],1,monthly.index[j][2],monthly.index[j][3]))
x=np.append(x,monthly.loc[monthly.index[j]])
j+=1
# x=yy
# print x
m1,minimo,maxis=grwthrate(date,x)
# print m1
m.append(m1)
dt.append(datetime.datetime(date[0].year,date[0].month,1))
# plt.plot(date,x)
ano=monthly.index[j][0]
mes=monthly.index[j][1]
j+=1
dt=np.array(dt)
#global m1
#m2=m
m1=runningMeanFast(m,4)
m2=runningMeanFast(m1,3)
#longmaxmin(meses,m2)
dates,areas=blhanomaly(df,'name')
axarr[0].set_title('Time series ',fontsize=24)
axarr[0].plot(meses,maxi,c,label=lab,linewidth=1.5)
#label = axarr[0].yaxis.get_major_ticks(.label
labels= axarr[0].yaxis.get_major_ticks()#.set_fontsize(16)
for label in labels:
label=label.label
label.set_fontsize(16)
# axarr[0].set_yticklabels(size=16)
axarr[0].set_ylabel('MH max [m]',fontsize=19)
axarr[0].grid()
# plt.show()
#axarr[0].set_title(r'MLH$_{max}$ time series',fontsize=16)
# axarr[1].set_ylabel(r'MH min (m)',fontsize=20)
# axarr[1].plot(meses,mini,c,label=lab,linewidth=1.5)
#label = axarr[0].yaxis.get_major_ticks(.label
# labels= axarr[1].yaxis.get_major_ticks()#.set_fontsize(16)
# for label in labels:
# label=label.label
# label.set_fontsize(16)
# axarr[0].set_yticklabels(size=16)
axarr[1].plot(dt,m2,c,label=lab,linewidth=1.5)
axarr[1].set_ylabel(r'Growth rate (m/h)',fontsize=20)
labels= axarr[1].yaxis.get_major_ticks()#.set_fontsize(16)
for label in labels:
label=label.label
label.set_fontsize(16)
axarr[1].grid()
# axarr[2].plot(meses,meanly,c,label=lab,linewidth=1.5)
# axarr[2].set_ylabel(r'MH mean [m]',fontsize=20)
# axarr[2].grid()
# labels= axarr[2].yaxis.get_major_ticks()#.set_fontsize(16)
# for label in labels:
# label=label.label
# label.set_fontsize(16)
# axarr[2].grid()
axarr[2].plot(dates,areas,c,label=lab,linewidth=1.5)
axarr[2].set_ylabel(r'Anomaly Area (-)',fontsize=20)
axarr[2].grid()
labels= axarr[2].yaxis.get_major_ticks()#.set_fontsize(16)
for label in labels:
label=label.label
label.set_fontsize(16)
axarr[2].grid()
#axarr[1].set_title(r'MLH$_{min}$ series',fontsize axarr[1].set_ylabel(r'MLH$_{min}$ (m)',fontsize=14)
#bsicplot(u'Serie de tiempo de $MLH_{max}$',maxi,meses,False,False,name,'Altura de capa de mezcla')
#plt.plot(dates,maxi)
#plt.plot(dates,mini)
#plt.show()
plt.savefig(name)
plt.show()#
#xy.plot()
def blhanomaly(df,name):
prom=df.mean()
start=datetime.datetime(2008,12,1,1,0)
df=df['2011':]
total=df.groupby([df.index.hour,df.index.minute]).mean()
year=2011
mes=1
tvec=frange(0,24,10.0/60.0)
areas=[]
dates=[]
while not (year==2017 and mes == 5):
newdf=df[(df.index.month==mes)&(df.index.year==year)]
#print mes, year
#print len(newdf)
media=newdf.groupby([newdf.index.hour,newdf.index.minute]).mean()
anomal=media-total
anomal=anomal.dropna()
#print np.array(anomal)
#print np.trapz(np.array(anomal),dx=10.0/60.0)
areas.append(np.trapz(anomal,dx=10.0/60.0))
dates.append(datetime.datetime(year,mes,1))
mes+=1
if mes>12:
year+=1
mes=1
bd=runningMeanFast(areas,3)
bd=runningMeanFast(bd,3)
#print areas
return dates,bd
#plt.plot(dates,bd)
#plt.grid()
#bd.plot()
# #plt.show()
# x=yy
# df[(df.index.month==j)|(df.index.month==j+1)]
# mes=df.index[0].month
# fechas=[df.index[0]]
# for date in df.index:
# if date.month != mes:
# fechas.append(date)
# mes=date.month
# bd=np.array(bd)
# # for i,b in enumerate(bd):
# #bd[i]=bd[i]-prom
# bd=runningMeanFast(bd,3)
# # bd=runningMeanFast(bd,3)
# print len(fechas), len(bd)
# plt.plot(bd,'k')
# # plt.xlabel('Year',fontsize=13)
# # plt.title('Time series')
# # plt.ylabel('Mean MLH')
# plt.grid()
# # plt.show()
# # plt.savefig(name)
# # x=yy
# plt.xticks(np.arange(0,12),['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dic'])
# plt.xlim([0,11])
# plt.xlabel('Month')
# plt.ylabel('MLH (m)')
# plt.title('Time Series Anomaly')
# # plt.grid()
# # plt.show()
# plt.savefig(name)
def monthtable(df):
total=df.groupby([df.index.hour,df.index.month]).mean().unstack()
df=pd.DataFrame(total)
df=np.around(df,0)
df.columns=['Enero','Febrero','Marzo','Abril','Mayo','Junio','Julio','Agosto','Septiembre','Octubre','Noviembre','Diciembre']
df.index.name='Hora/Mes'
df.to_csv('tablamensual.txt',sep='\t')