"""
$Revision: 1.13 $ by Prof. Richard Sonnenfeld, New Mexico Tech Physics Department
$Date: 2020/04/19 06:50:49 $
Runs in python 2.7 or python 3.
To upgrade to python 3 in a Debian-like distro
apt-get install python3-numpy python3-matplotlib python3-tk python3-pandas

Operates on csv files obtained here: https://github.com/nytimes/covid-19-data
Free for all non-commercial use.  Please retain this docstring in your altered code.

USAGE: python cv-trends.py
NOTE: The relevant .csv file should be in the same directory as virus_public.
The basename of the datafile is hard-coded below.
"""
#2019 populations
#https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2019-01.xlsx
pop={'CA':39.512223,'NM':2.096829,'NY':19.453561,'WA':7.614893,
        'TX':28.995,'AZ':7.278}
#These numbers are in millions

def main():
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    #Pandas LOVES mixed data.  It reads the table into a Dataframe, each
    #column of which can be treated like a numpy array, but the non numerical columns can
    #be dealt with separately.
    #basename='20200317us-states'
    import matplotlib as mpl
    import datetime
    from matplotlib.dates import DayLocator, DateFormatter, drange


    basename='20200520us-states'
    basename='20200602us-states'
    basename='20200617us-states'
    basename='20200625us-states'
    basename='20200704us-states'
    basename='20200718us-states'
    fname=basename+'.csv'
    fname2=basename[0:10]+'.csv'
    
    x=pd.read_csv(fname)
    y=pd.read_csv(fname2)
    #Note that the variable "cases" is cumulative cases
    cumcase=x.cases
    death=x.deaths
    dates=x.date
    window=5
    
    washcum=cumcase[x.state=='Washington']
    washnew=np.diff(washcum)
    washcum=washcum[1:]
    washsmooth=smooth(washnew,window,'np.hamming')
    
    azcum=cumcase[x.state=='Arizona']
    aznew=np.diff(azcum)
    azcum=azcum[1:]
    azsmooth=smooth(aznew,window,'np.hamming')
    azdeath=death[x.state=='Arizona']
    aznewdeath=np.diff(azdeath)
    azsmoothdth=smooth(aznewdeath,window,'np.hamming')
    azdate=dates[x.state=='Arizona']
    azdate=azdate[1:]
    
    txcum=cumcase[x.state=='Texas']
    txnew=np.diff(txcum)
    txcum=txcum[1:]
    txsmooth=smooth(txnew,window,'np.hamming')
    txdeath=death[x.state=='Texas']
    txnewdeath=np.diff(txdeath)
    txsmoothdth=smooth(txnewdeath,window,'np.hamming')
    txdate=dates[x.state=='Texas']
    txdate=txdate[1:]
    
    calcum=cumcase[x.state=='California']
    calnew=np.diff(calcum)
    calcum=calcum[1:]
    calsmooth=smooth(calnew,window,'np.hamming')
    
    nycum=cumcase[x.state=='New York']
    nynew=np.diff(nycum)
    nycum=nycum[1:]
    nysmooth=smooth(nynew,window,'np.hamming')
    
    nmcum=cumcase[x.state=='New Mexico']
    nmnew=np.diff(nmcum)
    nmcum=nmcum[1:]
    nmsmooth=smooth(nmnew,window,'np.hamming')
    nmdeath=death[x.state=='New Mexico']
    nmnewdeath=np.diff(nmdeath)
    nmsmoothdth=smooth(nmnewdeath,window,'np.hamming')
    nmdate=dates[x.state=='New Mexico']
    nmdate=nmdate[1:]
    
    uscum=y.cases
    usdate=y.date
    usdeath=y.deaths
    usnew=np.diff(uscum)
    ussmooth=smooth(usnew,window,'np.hamming')
    usnewdeath=np.diff(usdeath)
    ussmoothdth=smooth(usnewdeath,window,'np.hamming')
    uscum=uscum[1:]
    usdate=usdate[1:]
    
#   fig, ax = plt.subplots(nrows=1, ncols=1)
#    #plt.loglog(nycum,nynew,calcum,calnew,washcum,washnew,nmcum,nmnew,'k')
#    plt.loglog(nycum,nysmooth,calcum,calsmooth,washcum,washsmooth,nmcum,nmsmooth,'k')
#    plt.loglog(nycum,nysmooth,'bx',calcum,calsmooth,'rx',washcum,washsmooth,'gx',nmcum,nmsmooth,'kx')
#    plt.legend(('New York','California','Washington','New Mexico'),loc='lower right')
#    plt.xlabel('Total Cases')
#    plt.ylabel('Daily New Cases')
#    plt.xlim([1E1,3.3E6])
#    plt.ylim([1E1,3.3E5])
#    plt.title(fname)
#    fig.suptitle('Graph to detect epidemic control in selected US States')
#    #description1='NY, CA, WA begin to show decreasing slopes.  Keep it up!'
#    description2='See https://aatishb.com/covidtrends/ and Minutephysics for explanation.'
#    description3='[Data from The NY Times, based on state & local health agency reports.]'
#    description4='x denotes successive day'
#    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
#    #plt.text(0.05, 0.94, description1,fontsize=16,transform=ax.transAxes)
#    plt.text(0.05, 0.89, description2, fontsize=14,transform=ax.transAxes)
#    plt.text(0.05, 0.85, description3, fontsize=14,transform=ax.transAxes)
#    plt.text(0.05, 0.79, description4, fontsize=14,transform=ax.transAxes)
#    plt.text(0.62, -0.13 , authorstring, fontsize=12, transform=ax.transAxes)
#    plt.savefig(basename+'_trend.png')

#    fig2, ax2 = plt.subplots(nrows=1, ncols=1)
#    azc=azcum/pop['AZ'];azs=nysmooth/pop['AZ'] 
#    cac=calcum/pop['CA'];cas=calsmooth/pop['CA'] 
#    nyc=nycum/pop['NY'];nys=nysmooth/pop['NY'] 
#    txc=txcum/pop['TX'];txs=txsmooth/pop['TX'] 
#    wac=washcum/pop['WA'];was=washsmooth/pop['WA'] 
#    plt.loglog(nyc,nys,cac,cas,wac,was,nmc,nms,'k',linewidth=5)
#    plt.legend(('New York','California','Washington','New Mexico'),loc='lower right')
#    plt.xlabel('Total Cases (per million population)')
#    plt.ylabel('Daily New Cases (per million population)')
#    plt.xlim([1E0,10.1E4])
#    plt.ylim([1E0,3.3E3])
#    plt.title(fname)
#    fig2.suptitle('COVID19 cases expressed as cases per million people')
#    description2='See https://aatishb.com/covidtrends/ and Minutephysics for explanation.'
#    description3='[Data from The NY Times, based on state & local health agency reports.]'
#    #description4='x denotes successive day'
#    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
#    plt.text(0.05, 0.89, description2, fontsize=14,transform=ax.transAxes)
#    plt.text(0.05, 0.85, description3, fontsize=14,transform=ax.transAxes)
#    plt.text(0.62, -0.13 , authorstring, fontsize=12, transform=ax.transAxes)
#    plt.savefig(basename+'_ppm.png')
    
    fig, ax = plt.subplots(nrows=1, ncols=1)
    lg=np.log10
    plt.plot_date(usdate,lg(usnew),'bx')
    plt.plot_date(usdate,lg(ussmoothdth),'kx')
    hln=2920*np.linspace(1,1.00001,len(usdate))
    plt.plot_date(usdate,lg(hln),'r:')
    date1 = datetime.datetime(2020, 3, 2)
    date2 = datetime.datetime(2020, 8, 1 )
    delta = datetime.timedelta(hours=24)
    dates = drange(date1, date2, delta)
    ax.set_xlim(dates[0], dates[-1])
    plt.ylim([1,5.1])
    ax.set_yticks([lg(10),2,3,lg(2920),4,5])
    ax.set_yticklabels(['10','100','1000','2920','10,000','100,000'])
    majorFmt=mpl.dates.DateFormatter('%b-%d')
    ax.xaxis.set_major_formatter(majorFmt)
    daysLoc=mpl.dates.DayLocator(interval=1)
    ax.xaxis.set_minor_locator(daysLoc)
    #minorFmt=mpl.dates.DateFormatter('%a')
    #ax.xaxis.set_minor_formatter(minorFmt)
    fig.autofmt_xdate(rotation=30,bottom=0.2)
    plt.legend(('New Cases','Daily Deaths','9/11/2001 Deaths'),loc='lower right')
    plt.xlabel('Date')
    plt.ylabel('Daily Counts')
    plt.title(fname2)

    fig.suptitle('New Deaths and New Cases vs time in all USA')
    description2='[Data from The NY Times, based on state & local health agency reports.]'
    description3='-- On May 7, NYT changed from reporting "confirmed" to "probable" cases --'
    description4='x denotes successive day'
    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
    plt.text(0.05, 0.96, description2, fontsize=14,transform=ax.transAxes)
    plt.text(0.05, 0.91, description3, fontsize=14,transform=ax.transAxes)
    plt.text(0.05, 0.86, description4, fontsize=14,transform=ax.transAxes)
    plt.text(0.62, -0.23 , authorstring, fontsize=12, transform=ax.transAxes)
    plt.savefig('usdeathvstime.png')
   
#===============
    fig, ax = plt.subplots(nrows=1, ncols=1)
    plt.plot_date(txdate,np.log10(txnew),'bx')
    plt.plot_date(txdate,np.log10(txsmoothdth),'kx')
    ax.set_xlim(dates[0], dates[-1])
    plt.ylim([0,lg(10000)])
    ax.set_yticks([0,lg(2),lg(4),lg(6),1,lg(30),2,lg(300),3,lg(3000),lg(10000)])
    ax.set_yticklabels(['1','2','4','6','10','30','100','300','1000','3000','10000'])
    majorFmt=mpl.dates.DateFormatter('%b-%d')
    ax.xaxis.set_major_formatter(majorFmt)
    daysLoc=mpl.dates.DayLocator(interval=1)
    ax.xaxis.set_minor_locator(daysLoc)
    fig.autofmt_xdate(rotation=30,bottom=0.2)
    plt.legend(('New Cases','Daily Deaths'),loc='lower right')
    plt.xlabel('Date')
    plt.ylabel('Daily Counts')
    plt.title(fname2)
    fig.suptitle('New Deaths and New Cases vs time in Texas')
    description3='[Data from The NY Times, based on state & local health agency reports.]'
    description4='Cases and deaths are 5-day (Hamming) averages (thus you can have 1.5 deaths)'
    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
    plt.text(0.05, 0.94, description3, fontsize=14,transform=ax.transAxes)
    plt.text(0.05, 0.89, description4, fontsize=14,transform=ax.transAxes)
    plt.text(0.62, -0.23 , authorstring, fontsize=12, transform=ax.transAxes)
    plt.savefig('txdeathvstime.png')

#===============
    fig, ax = plt.subplots(nrows=1, ncols=1)
    plt.plot_date(azdate,np.log10(aznew),'bx')
    plt.plot_date(azdate,np.log10(azsmoothdth),'kx')
    ax.set_xlim(dates[0], dates[-1])
    plt.ylim([0,lg(10000)])
    ax.set_yticks([0,lg(2),lg(4),lg(6),1,lg(30),2,lg(300),3,lg(3000),lg(10000)])
    ax.set_yticklabels(['1','2','4','6','10','30','100','300','1000','3000','10000'])
    majorFmt=mpl.dates.DateFormatter('%b-%d')
    ax.xaxis.set_major_formatter(majorFmt)
    daysLoc=mpl.dates.DayLocator(interval=1)
    ax.xaxis.set_minor_locator(daysLoc)
    fig.autofmt_xdate(rotation=30,bottom=0.2)
    plt.legend(('New Cases','Daily Deaths'),loc='lower right')
    plt.xlabel('Date')
    plt.ylabel('Daily Counts')
    plt.title(fname2)
    fig.suptitle('New Deaths and New Cases vs time in Arizona')
    description3='[Data from The NY Times, based on state & local health agency reports.]'
    description4='Cases and deaths are 5-day (Hamming) averages (thus you can have 1.5 deaths)'
    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
    plt.text(0.05, 0.94, description3, fontsize=14,transform=ax.transAxes)
    plt.text(0.05, 0.89, description4, fontsize=14,transform=ax.transAxes)
    plt.text(0.62, -0.23 , authorstring, fontsize=12, transform=ax.transAxes)
    plt.savefig('azdeathvstime.png')

#===============
    fig, ax = plt.subplots(nrows=1, ncols=1)
    plt.plot_date(nmdate,np.log10(nmnew),'bx')
    plt.plot_date(nmdate,np.log10(nmsmoothdth),'kx')
   # ax.axhline(4,color='green',linestyle='dotted')
    ax.set_xlim(dates[0], dates[-1])
    plt.ylim([0,3.1])
    ax.set_yticks([0,lg(2),lg(4),lg(6),lg(8),1,lg(30),2,lg(300),3])
    ax.set_yticklabels(['1','2','4','6','8','10','30','100','300','1000'])
    majorFmt=mpl.dates.DateFormatter('%b-%d')
    ax.xaxis.set_major_formatter(majorFmt)
    daysLoc=mpl.dates.DayLocator(interval=1)
    ax.xaxis.set_minor_locator(daysLoc)
    #minorFmt=mpl.dates.DateFormatter('%a')
    #ax.xaxis.set_minor_formatter(minorFmt)
    fig.autofmt_xdate(rotation=30,bottom=0.2)
    plt.legend(('New Cases','Daily Deaths'),loc='lower right')
    plt.xlabel('Date')
    plt.ylabel('Daily Counts')
    plt.title(fname2)

    fig.suptitle('New Deaths and New Cases vs time in New Mexico')
    description3='[Data from The NY Times, based on state & local health agency reports.]'
    description4='Cases and deaths are 5-day (Hamming) averages (thus you can have 1.5 deaths)'
    authorstring='by Richard Sonnenfeld, New Mexico Tech Physics'
    plt.text(0.05, 0.94, description3, fontsize=14,transform=ax.transAxes)
    plt.text(0.05, 0.89, description4, fontsize=14,transform=ax.transAxes)
    plt.text(0.62, -0.23 , authorstring, fontsize=12, transform=ax.transAxes)
    plt.savefig('nmdeathvstime.png')


    plt.show()

def isEven(n):
    """Return True for even, False otherwise"""
    return n % 2 == 0


def smooth(x,window_len,window):
    import numpy as np
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,31)
    x=sin(t)+np.random.randn(len(t))*0.1
    y=smooth(x) [If put defaults back ... otherwise]
    y=smooth(x,9,'np.hamming')
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string
    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
    """

    if x.ndim != 1:
        raise (ValueError, "smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise (ValueError, "Input vector needs to be bigger than window size.")

    if x.size < 2*window_len:
        print("Warning [smooth]: This code was intended for long data arrays.  Your array is only slightly longer than the window.  Find another tool!")

    if window_len<3:
        raise (ValueError, "Why are you smoothing with a length 1 or 2 Window?")

    if isEven(window_len):
        raise (ValueError, "Windows for smooth function must be odd numbers.")

    if not window in ['np.flat', 'np.hanning', 'np.hamming', 'np.bartlett', 'np.blackman']:
        raise (ValueError, "Window is not one of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")


    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    #This construction takes x and appends the last window_len/2 elements of itself, reversed, to its end
    #It also preprends the first window_len/2 elements of itself, reversed, to its beginning
    #print(len(s))

    w=eval(window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    #The 'valid' mode returns the part of the convolution that is valid (it removes the end effects)

    pad=int(window_len/2)
    #The final result, y, while "valid" is longer than the original vector x because it has
    #pad extra elements on the front and the back
    return y[pad:-pad]
    #This conveniently returns an array the same length that was passed ... which is certainly more convenient since,
    #in general, I am interesting in filtering very long arrays.


main()

#$Log: cv-trends.py,v $
#Revision 1.13  2020/04/19 06:50:49  richard
#Added New Mexico Deaths and Cases vs. time.
#
#Revision 1.12  2020/04/17 18:04:43  richard
#Added national plotting of Deaths and new cases vs. date.
#
#Revision 1.11  2020/04/11 08:46:30  richard
#Added instructions for what libraries to install for python3.
#
#Revision 1.10  2020/04/11 08:44:01  richard
#This is now python3 compatible.
#
#Revision 1.8  2020/04/07 05:36:53  richard
#Now generates ppm figure as well as raw numbers.
#
#Revision 1.7  2020/04/06 05:29:38  richard
#Prepare to add ppm.
#
#Revision 1.6  2020/04/04 13:02:43  richard
#Changed name of program from virus_public to cv-trends!
#
#Revision 1.5  2020/04/04 02:02:16  richard
#Added x's for daily reports.
#
#Revision 1.4  2020/04/03 16:40:06  richard
#Added author string.
#
#Revision 1.3  2020/04/03 16:22:58  richard
#Delete unneeded commented out lines.
#
#Revision 1.2  2020/04/03 16:20:33  richard
#Get version control started.
#