# -*- coding: utf-8 -*-
'''**Calculates the economic optimum nitrogen rate and plots the results**
``EONR`` is a Python package for computing the economic optimum nitrogen
fertilizer rate using data from agronomic field trials under economic
conditions defined by the user (i.e., grain price and fertilizer cost).
It can be used for any crop (e.g., corn, wheat, potatoes, etc.), but the
current version only supports use of the quadratic-plateau piecewise
model.
**Therefore, use caution in making sure that a quadratic-plateau model is
appropriate for your application.** Future versions should add support for
other models (quadratic, spherical, etc.) that may improve the fit of
experimental yield response to nitrogen for other crops.
*Optional arguments when creating an instance of* ``EONR``:
Parameters:
cost_n_fert (float, optional): Cost of N fertilizer (default: 0.50)
cost_n_social (float, optional): Social cost of N fertilier (default: 0.00)
costs_fixed (float, optional): Fixed costs on a per area basis (default:
0.00)
price_grain (float, optional): Price of grain (default: 3.00)
col_n_app (str, optional): Column name pointing to the rate of applied N
fertilizer data (default: 'rate_n_applied_lbac')
col_yld (str, optional): Column name pointing to the grain yield data. This
column is multiplied by price_grain to create the 'grtn' column in
``EONR.df_data`` (default: 'yld_grain_dry_buac')
col_crop_nup (str, optional): Column name pointing to crop N uptake data
(default: 'nup_total_lbac')
col_n_avail (str, optional): Column name pointing to available soil N plus
fertilizer (default: 'soil_plus_fert_n_lbac')
col_year (str, optional): Column name pointing to year (default: 'year')
col_location (str, optional): Column name pointing to location (default:
'location')
col_time_n (str, optional): Column name pointing to nitrogen application
timing (default: 'time_n')
unit_currency (str, optional): String describing the curency unit (default:
'$')
unit_fert (str, optional): String describing the "fertilizer" unit
(default: 'lbs')
unit_grain (str, optional): String describing the "grain" unit (default:
'bu')
unit_area (str, optional): String descibing the "area" unit (default: 'ac')
model (str, optional): Statistical model used to fit N rate response.
*'quad_plateau'* = quadratic plateau; *'quadratic'* = quadratic;
``None`` = fits each of the preceding models, and uses the one with the
highest R2 (default: 'quad_plateau').
ci_level (float, optional): Confidence interval level to save in
``EONR.df_results`` and to display in the EONR plot; note that
confidence intervals are calculated at many alpha levels, and we should
choose from that list - should be one of [0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
0.667, 0.7, 0.8, 0.9, 0.95, or 0.99] (default: 0.90)
base_dir (str, optional): Base file directory when saving results (default:
None)
base_zero (``bool``, optional): Determines if gross return to N is
expressed as an absolute value or relative to the yield/return at the
zero rate. If base_zero is True, observed data for the zero nitrogen
rate will be standardized by subtracting the y-intercept
(:math:`\\beta_0`) from the 'grtn' column of ``EONR.df_data``.
(default: True)
print_out (``bool``, optional): Determines if "non-essential" results are
printed in the Python console (default: False)
Requirements:
The minimum data requirement to utilize this package is observed (or
simulated) experimental data of agronomic yield response to nitrogen
fertilizer. In other words, your experiment should have multiple nitrogen
rate treatments, and you should have measured the yield for each
experimental plot at the end of the season. Suitable experimental design
for your particular experiment is always suggested (e.g., it should
probably be replicated).
'''
import numpy as np
import os
import pandas as pd
import re
from scikits import bootstrap
from scipy import stats
from scipy.optimize import curve_fit
from scipy.optimize import minimize
from scipy.optimize import minimize_scalar
from scipy.optimize import OptimizeWarning
import uncertainties as unc
from uncertainties import unumpy as unp
import warnings
from eonr import Models
from eonr import Plotting_tools
[docs]class EONR(object):
'''
``EONR`` is a Python tool for computing the optimum nitrogen rate and its
confidence intervals from agricultural research data.
'''
def __init__(self,
cost_n_fert=0.5,
cost_n_social=0.0,
costs_fixed=0.0,
price_grain=3.00,
col_n_app='rate_n_applied_lbac',
col_yld='yld_grain_dry_buac',
col_crop_nup='nup_total_lbac',
col_n_avail='soil_plus_fert_n_lbac',
col_year='year', col_location='location', col_time_n='time_n',
unit_currency='$',
unit_fert='lbs', unit_grain='bu', unit_area='ac',
model='quad_plateau', ci_level=0.9, base_dir=None,
base_zero=True, print_out=False):
self.df_data = None
self.cost_n_fert = cost_n_fert
self.cost_n_social = cost_n_social
self.costs_fixed = costs_fixed
self.price_grain = price_grain
self.price_ratio = ((self.cost_n_fert + self.cost_n_social) /
self.price_grain)
self.col_n_app = col_n_app
self.col_yld = col_yld
self.col_crop_nup = col_crop_nup
self.col_n_avail = col_n_avail
self.col_year = col_year
self.col_location = col_location
self.col_time_n = col_time_n
self.unit_currency = unit_currency
self.unit_fert = unit_fert
self.unit_grain = unit_grain
self.unit_area = unit_area
self.unit_rtn = '{0} per {1}'.format(unit_currency, unit_area)
self.unit_nrate = '{0} per {1}'.format(unit_fert, unit_area)
self.model = model
self.model_temp = model
self.ci_level = ci_level
self.base_dir = base_dir
self.base_zero = base_zero
self.print_out = print_out
self.location = None
self.year = None
self.time_n = None
self.onr_name = None
self.onr_acr = None
self.R = 0 # price_ratio to use when finding theta2
self.coefs_grtn = {}
self.coefs_grtn_primary = {} # only used if self.base_zero is True
self.coefs_nrtn = {}
self.coefs_social = {}
self.results_temp = {}
self.mrtn = None
self.eonr = None
self.costs_at_onr = None
self.df_ci = None
self.df_ci_pdf = None
self.df_delta_tstat = None
self.df_der = None
self.df_linspace = None
self.fig_delta_tstat = None
self.fig_derivative = None
self.fig_eonr = None
self.fig_tau = None
self.ci_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.667, 0.7, 0.8, 0.9,
0.95, 0.99]
self.alpha_list = [1 - xi for xi in self.ci_list]
self.df_results = pd.DataFrame(columns=['price_grain', 'cost_n_fert',
'cost_n_social', 'costs_fixed',
'price_ratio',
'unit_price_grain',
'unit_cost_n',
'location', 'year', 'time_n',
'model', 'base_zero', 'eonr',
'eonr_bias', 'R*',
'costs_at_onr', 'ci_level',
'ci_wald_l', 'ci_wald_u',
'ci_pl_l', 'ci_pl_u',
'ci_boot_l', 'ci_boot_u',
'mrtn', 'grtn_r2_adj',
'grtn_rmse',
'grtn_max_y', 'grtn_crit_x',
'grtn_y_int', 'scn_lin_r2',
'scn_lin_rmse', 'scn_exp_r2',
'scn_exp_rmse'])
self.bootstrap_ci = None
if self.unit_grain == 'kg' and self.unit_area == 'ha':
self.metric = True
elif self.unit_grain == 'lbs' and self.unit_area == 'ac':
self.metric = False
else: # unknown
self.metric = False
if self.base_dir is not None:
if not os.path.isdir(self.base_dir):
os.makedirs(self.base_dir)
else:
self.base_dir = os.getcwd()
folder_name = 'eonr_temp_output'
self.base_dir = os.path.join(self.base_dir, folder_name)
if not os.path.isdir(self.base_dir):
os.makedirs(self.base_dir)
self.base_dir = os.path.join(self.base_dir, 'trad_000')
if self.cost_n_social > 0:
self.onr_name = 'Socially'
self.onr_acr = 'SONR'
elif self.cost_n_fert > 0:
self.onr_name = 'Economic'
self.onr_acr = 'EONR'
else:
self.onr_name = 'Agronomic'
self.onr_acr = 'AONR'
self.models = Models(self)
self.plotting_tools = Plotting_tools(self)
self.plot_eonr.__func__.__doc__ = self.plotting_tools.plot_eonr.__doc__
self.plot_tau.__func__.__doc__ = self.plotting_tools.plot_tau.__doc__
self.plot_save.__func__.__doc__ = self.plotting_tools.plot_save.__doc__
# Following are the miscellaneous functions
def _reset_temp(self):
'''
Resets temporary variables to be sure nothing carries through from a
previous run
'''
self.results_temp = {'grtn_y_int': None,
'scn_lin_r2': None,
'scn_lin_rmse': None,
'scn_exp_r2': None,
'scn_exp_rmse': None}
def _find_trial_details(self):
'''
Uses EONR.col_XXXXXXX to get year, location, and time_n from
EONR.df_data
'''
df = self.df_data.copy()
try:
self.location = df.iloc[0][self.col_location]
except KeyError:
if self.location is not None:
print('Was not able to infer "{0}" from EONR.df_data; '
'"{0}" is currently set to {1}. If this is not '
'correct, adjust prior to plotting using '
'EONR.set_column_names(col_location="your_loc_col_name")'
' or EONR.set_trial_details({0}="your_location")'
''.format('location', self.location))
else:
print('{0} is not currently set. You may want to set prior to '
'plotting using '
'EONR.set_column_names(col_location="your_loc_col_name")'
' or EONR.set_trial_details({0}="your_location")'
''.format('location'))
try:
self.year = df.iloc[0][self.col_year]
except KeyError:
if self.year is not None:
print('Was not able to infer "{0}" from EONR.df_data; '
'"{0}" is currently set to {1}. If this is not '
'correct, adjust prior to plotting using '
'EONR.set_column_names(col_year="your_year_col_name")'
' or EONR.set_trial_details({0}="your_year")'
''.format('year', self.year))
else:
print('{0} is not currently set. You may want to set prior to '
'plotting using '
'EONR.set_column_names(col_year="your_year_col_name")'
' or EONR.set_trial_details({0}="your_year")'
''.format('year'))
try:
self.time_n = df.iloc[0][self.col_time_n]
except KeyError:
if self.time_n is not None:
print('Was not able to infer "{0}" from EONR.df_data; '
'"{0}" is currently set to {1}. If this is not '
'correct, adjust prior to plotting using '
'EONR.set_column_names(col_time_n="your_time_n_col_name")'
' or EONR.set_trial_details({0}="your_time_n")'
''.format('time_n', self.time_n))
else:
print('{0} is not currently set. You may want to set prior to '
'plotting using '
'EONR.set_column_names(col_time_n="your_time_n_col_name")'
' or EONR.set_trial_details({0}="your_time_n")'
''.format('time_n'))
def _set_df(self, df):
'''
Basic setup for dataframe
'''
self.df_data = df.copy()
self._find_trial_details()
print('\nComputing {0} for {1} {2} {3}'
'\nCost of N fertilizer: {4}{5:.2f} per {6}'
'\nPrice grain: {4}{7:.2f} per {8}'
'\nFixed costs: {4}{9:.2f} per {10}'
''.format(self.onr_acr, self.location, self.year, self.time_n,
self.unit_currency, self.cost_n_fert, self.unit_fert,
self.price_grain, self.unit_grain,
self.costs_fixed, self.unit_area))
if self.cost_n_social > 0:
print('Social cost of N: {0}{1:.2f} per {2}'
''.format(self.unit_currency, self.cost_n_social,
self.unit_fert))
def _replace_missing_vals(self, missing_val='.'):
'''
Finds missing data in pandas dataframe and replaces with np.nan
'''
df = self.df_data.copy()
for row in df.index:
for col in df.columns:
if df.at[row, col] == missing_val:
df.at[row, col] = np.nan
self.df_data = df.copy()
# Following are curve_fit helpers and statistical calculations
def _best_fit_lin(self, col_x, col_y):
'''
Computes slope, intercept, r2, and RMSE of linear best fit line between
<col_x> and <col_y> of eonr.df_data
Note that the 'social_cost_n' (<col_y>) already considers the economic
scenario, so it doesn't have to be added again later
'''
df = self.df_data.copy()
X = df[col_x].values.reshape(-1)
y = df[col_y].values.reshape(-1)
mx, b, r_value, p_value, std_err = stats.linregress(X, y)
lin_r2 = r_value**2
res = y - (b + mx*X)
ss_res = np.sum(res**2)
lin_rmse = (ss_res)**0.5
self.coefs_social['lin_mx'] = mx
self.coefs_social['lin_b'] = b
self.coefs_social['lin_r2'] = lin_r2
self.coefs_social['lin_rmse'] = lin_rmse
if self.print_out is True:
print('\ny = {0:.5} + {1:.5}x'.format(b, mx))
print('lin_r2 = {0:.3}'.format(lin_r2))
print('RMSE = {0:.1f}\n'.format(lin_rmse))
def _best_fit_exp(self, col_x, col_y, guess_a=10, guess_b=0.0001,
guess_c=-10):
'''
Computes a, b, and c of best fit exponential function between <col_x>
and <col_y>
Note that the 'social_cost_n' (<col_y>) already considers the economic
scenario, so it doesn't have to be added again later
'''
df = self.df_data.copy()
x = df[col_x].values.reshape(-1)
y = df[col_y].values.reshape(-1)
popt = None
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format('_best_fit_exp() -> _f_exp', col_x, col_y))
try:
popt, pcov = self._curve_fit_opt(self.models.exp, x, y,
p0=(guess_a, guess_b, guess_c),
maxfev=1500, info=info)
except RuntimeError as err:
print('\n{0}\nTrying a new guess before giving up..'.format(err))
pass
if popt is None:
try:
popt, pcov = self._curve_fit_opt(self.models.exp, x, y,
p0=(guess_a*10,
guess_b**10,
guess_c*10),
info=info)
except RuntimeError as err:
print('\n{0}\nTry adjusting the initial guess parameters..'
''.format(err))
pass
except np.linalg.LinAlgError as err:
print('\n{0}'.format(err))
pass
if popt is None or np.any(popt == np.inf):
if self.print_out is True:
print("Couldn't fit data to an exponential function..\n")
self.coefs_social['exp_gamma0'] = None
self.coefs_social['exp_gamma1'] = None
self.coefs_social['exp_gamma2'] = None
self.coefs_social['exp_r2'] = 0
self.coefs_social['exp_rmse'] = None
else:
exp_r2, _, _, _, exp_rmse = self._get_rsq(self.models.exp, x, y,
popt)
gamma0, gamma1, gamma2 = popt
if self.print_out is True:
print('y = {0:.5} * exp({1:.5}x) + {2:.5} '.format(gamma0, gamma1, gamma2))
print('exp_r2 = {0:.3}'.format(exp_r2))
print('RMSE = {0:.1f}\n'.format(exp_rmse))
self.coefs_social['exp_gamma0'] = gamma0
self.coefs_social['exp_gamma1'] = gamma1
self.coefs_social['exp_gamma2'] = gamma2
self.coefs_social['exp_r2'] = exp_r2
self.coefs_social['exp_rmse'] = exp_rmse
def _calc_aic(self, x, y, dist='gamma'):
'''
Calculate the Akaike information criterion (AIC) using either a gamma
(<dist>='gamma') or normal (<dist>='normal') distribution
'''
if dist == 'gamma':
fitted_params = stats.gamma.fit(y)
log_lik = np.sum(stats.gamma.logpdf(y, fitted_params[0],
loc=fitted_params[1],
scale=fitted_params[2]))
elif dist == 'normal':
fitted_params = stats.norm.fit(y)
log_lik = np.sum(stats.norm.logpdf(y, fitted_params[0],
loc=fitted_params[1],
scale=fitted_params[2]))
k = len(fitted_params)
aic = 2 * k - 2 * log_lik
return aic
def _curve_fit_bs(self, f, xdata, ydata, p0=None, maxfev=800):
'''
Helper function to be suppress the OptimizeWarning. The bootstrap
computation doesn't use the covariance matrix anyways (which is the
main cause of the OptimizeWarning being thrown).
(added so I can figure out what part of the code is causing it)
'''
with warnings.catch_warnings():
warnings.simplefilter('error', OptimizeWarning)
try:
popt, pcov = curve_fit(f, xdata, ydata, p0=p0,
maxfev=maxfev)
return popt, pcov
except OptimizeWarning:
pass
warnings.simplefilter('ignore', OptimizeWarning) # hides warning
popt, pcov = curve_fit(f, xdata, ydata, p0=p0, maxfev=maxfev)
return popt, pcov
def _curve_fit_opt(self, f, xdata, ydata, p0=None, maxfev=800, info=None):
'''
Helper function to suppress the OptimizeWarning. The bootstrap
computation doesn't use the covariance matrix anyways (which is the
main cause of the OptimizeWarning being thrown).
(added so I can figure out what part of the code is causing it)
<info> is a variable holding the column headings of the x and y data
and is printed out to provide information to know which curve_fit
functions are throwing the OptimizeWarning
'''
if info is None:
popt, pcov = curve_fit(f, xdata, ydata, p0=p0, maxfev=maxfev)
else:
with warnings.catch_warnings():
warnings.simplefilter('error', OptimizeWarning)
try:
popt, pcov = curve_fit(f, xdata, ydata, p0=p0,
maxfev=maxfev)
except OptimizeWarning:
if self.print_out is True:
print('Information for which the OptimizeWarning was '
'thrown:\n{0}'.format(info))
warnings.simplefilter('ignore', OptimizeWarning) # hides warn
# essentially ignore warning and run anyway
popt, pcov = curve_fit(f, xdata, ydata, p0=p0,
maxfev=maxfev)
return popt, pcov
def _curve_fit_runtime(self, func, x, y, guess, maxfev=800, info=None):
'''
Helper function to run curve_fit() and watch for a RuntimeError. If we
get a RuntimeError, then increase maxfev by 10x and try again.
Sometimes this solves the problem of not being able to fit the
function. If so, returns popt and pcov; if not, prints the error and
returns None.
'''
popt = None
try:
popt, pcov = self._curve_fit_opt(func, x, y, p0=guess,
maxfev=maxfev, info=info)
except RuntimeError as err:
print(err)
maxfev *= 10
print('Increasing the maximum number of calls to the function to '
'{0} before giving up.\n'.format(maxfev))
if popt is None:
try:
popt, pcov = self._curve_fit_opt(func, x, y, p0=guess,
maxfev=maxfev, info=info)
except RuntimeError as err:
print(err)
print('Was not able to fit data to the function.')
if popt is not None:
return popt, pcov
else:
return None, None
def _compute_R(self, col_x, col_y, epsilon=1e-3, step_size=0.1):
'''
Given the true EONR with social cost considered, goal is to find the
price ratio that will provide a sum of squares calculation for the true
EONR
'''
df_data = self.df_data.copy()
x = df_data[col_x].values
y = df_data[col_y].values
if self.cost_n_social > 0 and self.eonr is not None:
b2 = self.coefs_grtn['b2'].n
b1 = self.coefs_grtn['b1'].n
self.R = b1 + (2 * b2 * self.eonr) # as a starting point
self.models.update_eonr(self)
guess = (self.coefs_grtn['b0'].n,
self.eonr,
self.coefs_grtn['b2'].n)
if self.model_temp == 'quadratic':
f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(f_model, col_x, col_y))
popt, pcov = self._curve_fit_runtime(f_model_theta2, x, y, guess,
maxfev=800)
dif = abs(popt[1] - self.eonr)
count = 0
while dif > epsilon:
print('Using the optimize_R() algorithm')
if count == 10:
epsilon *= 10
elif count >= 100:
print('Could not converge R to fit the '
'EONR after {0} attemps. Fitted '
'model is within {1} {2} of the '
'computed EONR.'
''.format(count, dif,
self.unit_nrate))
break
count += 1
if popt[1] > self.eonr:
self.R += step_size # increase R
else: # assumes R should aloways be positive
# go back one and increase at finer step
self.R -= step_size
step_size *= 0.1 # change step_size
self.R += step_size
self.models.update_eonr(self)
popt, pcov = self._curve_fit_runtime(f_model_theta2,
x, y, guess, maxfev=800)
dif = abs(popt[1] - self.eonr)
res = y - f_model_theta2(x, *popt)
ss_res = np.sum(res**2)
if (popt is None or np.any(popt == np.inf) or
np.any(pcov == np.inf)):
b0 = unc.ufloat(popt[0], 0)
theta2 = unc.ufloat(popt[1], 0)
b2 = unc.ufloat(popt[2], 0)
else:
b0, theta2, b2 = unc.correlated_values(popt, pcov)
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(f_model, col_x,
col_y + ' (residuals)'))
# func = self.models.quad_plateau
popt, pcov = self._curve_fit_opt(lambda x, b1: f_model(
x, b0.n, b1, b2.n), x, y, info=info)
popt = np.insert(popt, 0, b2.n)
popt = np.insert(popt, 2, b0.n)
f = np.poly1d(popt)
result = minimize_scalar(-f)
self.coefs_nrtn['b0'] = b0
self.coefs_nrtn['theta2'] = result['x']
self.coefs_nrtn['b2'] = b2
self.coefs_nrtn['theta2_social'] = theta2
self.coefs_nrtn['popt_social'] = [popt[2],
theta2,
popt[0]]
self.coefs_nrtn['ss_res_social'] = ss_res
self.coefs_nrtn['eonr_bias'] = theta2 - self.eonr
elif self.cost_n_social == 0:
self.R = self.price_ratio * self.price_grain
else:
assert self.eonr is not None, 'Please compute EONR'
self.models.update_eonr(self)
def _get_rsq(self, func, x, y, popt):
'''
Calculate the r-square (and ajusted r-square) of <y> for function
<func> with <popt> parameters
'''
res = y - func(x, *popt)
ss_res_mean = np.mean(res**2)
rmse = (ss_res_mean)**0.5
y_mean = np.mean(y)
ss_res = np.sum(res**2)
ss_tot = np.sum((y-y_mean)**2)
r2 = 1-ss_res/ss_tot
p = len(popt)
n = len(x)
r2_adj = 1-(1-r2)*(n-1)/(n-p-1)
return r2, r2_adj, ss_res, ss_tot, rmse
# Following are higher level functions
def _build_mrtn_lines(self, n_steps=None):
'''
Builds the Net Return to N (MRTN) line for plotting
Parameters:
n_steps (``int``): Number of nitrogen rates to be calculated to
represent the GRTN curve (default: None - set dynamically as two
steps per unit N).
'''
df = self.df_data.copy()
x_min = float(df.loc[:, [self.col_n_app]].min(axis=0))
x_max = float(df.loc[:, [self.col_n_app]].max(axis=0))
if n_steps is None:
n_steps = int((x_max - x_min)*2)
eonr_social_n = 0
x1, y_fert_n, x1a = self._setup_ncost_curve(x_min, x_max, n_steps)
eonr_fert_n = self.eonr * self.cost_n_fert
y_grtn = self._setup_grtn_curve(x1, x1a, n_steps)
if self.cost_n_social > 0:
y_social_n, eonr_social_n, _, _ = self._build_social_curve(
x1, fixed=False)
rtn = y_grtn - (y_fert_n + y_social_n)
self.linspace_cost_n_social = (x1, y_social_n)
else:
rtn = y_grtn - y_fert_n
while len(y_grtn) != n_steps:
if len(y_grtn) < n_steps:
y_grtn = np.append(y_grtn, y_grtn[-1])
else:
y_grtn = y_grtn[:-1]
rtn_der = np.insert(abs(np.diff(rtn)), 0, np.nan)
rtn_der2 = np.insert(abs(np.diff(rtn_der)), 0, np.nan)
df_linspace = pd.DataFrame({'x': x1,
'cost_n_fert': y_fert_n,
'quad_plat': y_grtn,
'rtn': rtn,
'rtn_der': rtn_der,
'rtn_der2': rtn_der2})
if self.cost_n_social > 0:
df_linspace['cost_n_social'] = y_social_n
self.df_linspace = df_linspace
self.costs_at_onr = eonr_fert_n + eonr_social_n
def _ci_pdf(self, run_n=None, n_steps=1000):
'''
Calculates the probability density function across all calculatd
confidence interval levels
Parameters:
run_n (``int``): (default: ``None``)
'''
if run_n is None:
df_ci = self.df_ci[self.df_ci['run_n'] ==
self.df_ci['run_n'].max()]
else:
df_ci = self.df_ci[self.df_ci['run_n'] == run_n]
val_min = df_ci[df_ci['level'] == 0.99]['pl_l'].values[0]
val_max = df_ci[df_ci['level'] == 0.99]['pl_u'].values[0]
x_all = np.linspace(val_min, val_max, n_steps)
level_list = list(df_ci['level'].unique())
weights = np.zeros(x_all.shape)
for level in level_list:
if level != 0:
pl_l = df_ci[df_ci['level'] == level]['pl_l'].values[0]
pl_u = df_ci[df_ci['level'] == level]['pl_u'].values[0]
weight_in = (level*100)
weight_out = 100-weight_in # 99 because 0 CI is excluded
# idx_l = (np.abs(x_all - pl_l)).argmin() # find nearest index
# idx_u = (np.abs(x_all - pl_u)).argmin()
dif_l = (x_all - pl_l) # find index above
dif_u = (pl_u - x_all) # find index below
idx_l = np.where(dif_l>0, dif_l, dif_l.max()).argmin()
idx_u = np.where(dif_u>0, dif_u, dif_u.max()).argmin()
unit_weight_in = weight_in / (idx_u - idx_l)
unit_weight_out = weight_out / ((idx_l - x_all.argmin()) + (n_steps - idx_u))
weights[:idx_l] += unit_weight_out # add to weights
weights[idx_u:] += unit_weight_out
weights[idx_l:idx_u] += unit_weight_in
df_ci_pdf = pd.DataFrame({'rate_n': x_all,
'weights': weights})
self.df_ci_pdf = df_ci_pdf
def _rtn_derivative(self,):
'''
Calcuales the first derivative of the return to N curve
Parameters:
run_n (``int``): (default: ``None``)
'''
df_ci = self.df_ci[self.df_ci['run_n'] ==
self.df_ci['run_n'].max()].copy()
pl_l = df_ci[df_ci['level'] == self.ci_level]['pl_l'].values[0]
pl_u = df_ci[df_ci['level'] == self.ci_level]['pl_u'].values[0]
df = self.df_linspace[['x', 'rtn_der', 'rtn_der2']].copy()
df_trim = df[(df['x'] >= pl_l) & (df['x'] <= pl_u)]
df_trim = df_trim.loc[~(df_trim == 0).any(axis=1)]
der_max = df_trim['rtn_der'].iloc[-10:].max()
df_trim = df_trim[(df_trim['rtn_der'] <= der_max) &
(df_trim['rtn_der2'] > 0.001)]
df_der_left = df_trim[df_trim['x'] < self.eonr]
df_der_right = df_trim[df_trim['x'] > self.eonr]
slope_coef = (len(df_trim['x']) /
(df_trim['x'].max() - df_trim['x'].min()))
try:
slope_l, _, _, _, _ = stats.linregress(df_der_left['x'],
df_der_left['rtn_der'])
self.coefs_nrtn['der_slope_lower'] = slope_l * slope_coef
except ValueError:
self.coefs_nrtn['der_slope_lower'] = np.nan
try:
slope_u, _, _, _, _ = stats.linregress(df_der_right['x'],
df_der_right['rtn_der'])
self.coefs_nrtn['der_slope_upper'] = slope_u * slope_coef
except ValueError:
self.coefs_nrtn['der_slope_upper'] = np.nan
def _build_social_curve(self, x1, fixed=False):
'''
Generates an array for the Social cost of N curve
'''
ci_l, ci_u = None, None
if fixed is True:
y_social_n = x1 * self.cost_n_social
eonr_social_n = self.eonr * self.cost_n_social
else:
if self.coefs_social['lin_r2'] > self.coefs_social['exp_r2']:
y_social_n = self.coefs_social['lin_b'] +\
(x1 * self.coefs_social['lin_mx'])
eonr_social_n = self.coefs_social['lin_b'] +\
(self.eonr * self.coefs_social['lin_mx'])
else:
x1_exp = self.coefs_social['exp_gamma0'] *\
unp.exp(self.coefs_social['exp_gamma1'] * x1) +\
self.coefs_social['exp_gamma2']
y_social_n = unp.nominal_values(x1_exp)
eonr_social_n = self.coefs_social['exp_gamma0'] *\
unp.exp(self.coefs_social['exp_gamma1'] * self.eonr) +\
self.coefs_social['exp_gamma2']
std = unp.std_devs(x1_exp)
ci_l = (y_social_n - 2 * std)
ci_u = (y_social_n - 2 * std)
return y_social_n, eonr_social_n, ci_l, ci_u
def _calc_grtn(self):
'''
Computes Gross Return to N and saves in df_data under column heading of
'grtn'
'''
self.df_data['grtn'] = self.df_data[self.col_yld]*self.price_grain
self._fit_model(col_x=self.col_n_app, col_y='grtn')
# if model == 'quad_plateau':
# # Calculate the coefficients describing the quadratic plateau model
# self._quad_plateau(col_x=self.col_n_app, col_y='grtn')
# elif model == 'quadratic':
# self.
# elif model == 'lin_plateau':
# self._r_lin_plateau(col_x=self.col_n_app, col_y='grtn')
# self._r_confint(level=0.8)
# else:
# raise NotImplementedError('{0} model not implemented'
# ''.format(model))
self.results_temp['grtn_y_int'] = self.coefs_grtn['b0'].n
if self.base_zero is True:
self.df_data['grtn'] = (self.df_data['grtn'] -
self.coefs_grtn['b0'].n)
self._fit_model(col_x=self.col_n_app, col_y='grtn', rerun=True)
self.models.update_eonr(self)
def _calc_nrtn(self, col_x, col_y):
'''
Calculates the net return to N. If cost_n_social > 0,
_f_qp_theta2 uses actual N uptake data to derive the absolute
cost of excess (or net negative) fertilizer (see _best_fit_lin() and
_best_fit_exp()). This cost is already in units of $ based on the
economic scenario, but keep in mind that it will almost certainly have
an intercept other than zero.
For example, if more N is taken up than applied, there is a net
negative use (-net_use); in terms of dollars, net_use can be divided
by the social cost/price of N to get into units of $, which is a unit
that can be used with the price ratio R.
'''
df_data = self.df_data.copy()
x = df_data[col_x].values
y = df_data[col_y].values
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
guess = (self.coefs_grtn['b0'].n,
self.coefs_grtn['crit_x'],
self.coefs_grtn['b2'].n)
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(f_model_theta2,
col_x, col_y))
popt, pcov = self._curve_fit_opt(f_model_theta2, x, y,
p0=guess, maxfev=1000, info=info)
res = y - f_model_theta2(x, *popt)
# if cost_n_social > 0, this will be dif than coefs_grtn['ss_res']
ss_res = np.sum(res**2)
if popt is None or np.any(popt == np.inf) or np.any(pcov == np.inf):
b0 = unc.ufloat(popt[0], 0)
theta2 = unc.ufloat(popt[1], 0)
b2 = unc.ufloat(popt[2], 0)
else:
b0, theta2, b2 = unc.correlated_values(popt, pcov)
self.coefs_nrtn = {
'b0': b0,
'theta2': theta2,
'b2': b2,
'popt': popt,
'pcov': pcov,
'ss_res': ss_res
}
def _calc_social_cost(self, col_x, col_y):
'''
Computes the slope and intercept for the model describing the added
social cost of N
'''
self.df_data['resid_n'] = (self.df_data[self.col_n_avail] -
self.df_data[self.col_crop_nup])
self.df_data['social_cost_n'] = self.df_data['resid_n'] *\
self.cost_n_social
if self.print_out is True:
print('Computing best-fit line between {0} and {1}..'
''.format(col_x, col_y))
self._best_fit_lin(col_x, col_y)
self._best_fit_exp(col_x, col_y)
self.results_temp['scn_lin_r2'] = self.coefs_social['lin_r2']
self.results_temp['scn_lin_rmse'] = self.coefs_social['lin_rmse']
self.results_temp['scn_exp_r2'] = self.coefs_social['exp_r2']
self.results_temp['scn_exp_rmse'] = self.coefs_social['exp_rmse']
def _print_grtn(self):
'''
Prints results of Gross Return to N calculation
'''
print('\nN Rate vs. Gross Return to N ({0} at ${1:.2f} per {2})'
''.format(self.unit_rtn, self.price_grain, self.unit_grain))
print('y = {0:.5} + {1:.5}x + {2:.5}x^2'.format(
self.coefs_grtn['b0'].n,
self.coefs_grtn['b1'].n,
self.coefs_grtn['b2'].n))
print('Critical N Rate: {0:.4}'.format(self.coefs_grtn['crit_x']))
print('Maximum Y (approximate): {0:.4}'.format(
self.coefs_grtn['max_y']))
print('Adjusted R2: {0:.3f}'.format(self.coefs_grtn['r2_adj']))
print('RMSE: {0:.1f} {1}'.format(self.coefs_grtn['rmse'],
self.unit_rtn))
def _print_results(self):
'''
Prints results of Economic Optimum N Rate calculation
'''
last_run_n = self.df_ci['run_n'].max()
df_ci_last_all = self.df_ci[self.df_ci['run_n'] == last_run_n]
fert_l = df_ci_last_all[df_ci_last_all['level'] == 0.5]['pl_l'].values[0]
fert_u = df_ci_last_all[df_ci_last_all['level'] == 0.5]['pl_u'].values[0]
df_ci_last = self.df_ci[(self.df_ci['run_n'] == last_run_n) &
(self.df_ci['level'] == self.ci_level)]
try:
pl_l = df_ci_last['pl_l'].values[0]
pl_u = df_ci_last['pl_u'].values[0]
wald_l = df_ci_last['wald_l'].values[0]
wald_u = df_ci_last['wald_u'].values[0]
if self.bootstrap_ci is True:
boot_l = df_ci_last['boot_l'].values[0]
boot_u = df_ci_last['boot_u'].values[0]
except TypeError as err:
print(err)
print('{0} optimum N rate ({1}): {2:.1f} {3} [{4:.1f}, '
'{5:.1f}] ({6:.1f}% confidence)'
''.format(self.onr_name, self.onr_acr, self.eonr,
self.unit_nrate, pl_l, pl_u, self.ci_level*100))
print('Maximum return to N (MRTN): {0}{1:.2f} per {2}'
''.format(self.unit_currency, self.mrtn, self.unit_area))
# print('Acceptable range in recommended fertilizer rate for {0} {1} '
# '{2}: {3:.0f} to {4:.0f} {5}\n'
# ''.format(self.location, self.year, self.time_n, fert_l, fert_u,
# self.unit_nrate))
if self.print_out is True:
print('Profile likelihood confidence bounds (90%): [{0:.1f}, '
'{1:.1f}]'.format(pl_l, pl_u))
print('Wald confidence bounds (90%): [{0:.1f}, {1:.1f}]'
''.format(wald_l, wald_u))
print('Bootstrapped confidence bounds (90%): [{0:.1f}, {1:.1f}]\n'
''.format(boot_l, boot_u))
def _fit_model(self, col_x, col_y, rerun=False):
'''
Fits the specified model (EONR.model); if EONR.model is None, fits both
then uses the model with the highest R^2 hereafter
<col_x (``str``): df column name for x axis
<col_y (``str``): df column name for y axis
'''
df_data = self.df_data.copy()
x = df_data[col_x].values
y = df_data[col_y].values
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format('_fit_model() -> _f_quad_plateau', col_x, col_y))
guess = self._get_guess_qp(rerun=rerun)
# TODO: Add a try/except to catch a bad guess.. or at least warn the
# user that the guess is *extremely* sensitive
if self.model is None and rerun is False:
print('Checking quadratic and quadric-plateau models for best '
'fit..')
model_q = self.models.quadratic
model_qp = self.models.quad_plateau
popt_q, pcov_q = self._curve_fit_opt(model_q, x, y,
p0=guess, info=info)
_, r2_adj_q, _, _, rmse_q = self._get_rsq(
model_q, x, y, popt_q)
popt_qp, pcov_qp = self._curve_fit_opt(model_qp, x,
y,p0=guess, info=info)
_, r2_adj_qp, _, _, rmse_qp = self._get_rsq(
model_qp, x, y, popt_qp)
print('Quadratic model r^2: {0:.2f}'.format(r2_adj_q))
print('Quadratic-plateau model r^2: {0:.2f}'.format(r2_adj_qp))
if r2_adj_q > r2_adj_qp:
self.model_temp = 'quadratic'
# model = self.models.quadratic
# popt, pcov = popt_q, pcov_q
print('Using the quadratic model..')
else:
self.model_temp = 'quad_plateau'
# model = self.models.quad_plateau
# popt, pcov = popt_qp, pcov_qp
print('Using the quadratic-plateau model..')
elif self.model is None and rerun is True:
# Using self.model_temp because it was already determined
pass
else:
self.model_temp = self.model
if self.model_temp == 'quadratic':
f_model = self.models.quadratic
elif self.model_temp == 'quad_plateau':
f_model = self.models.quad_plateau
else:
raise NotImplementedError('{0} model not implemented'
''.format(self.models.quadratic))
popt, pcov = self._curve_fit_opt(f_model, x, y, p0=guess, info=info)
# the following should be made robust to dynamically find the starting values for a dataset..
# print(guess)
# print(popt, pcov)
# if popt[0] < 100 and rerun is False:
# guess = (100, guess[1], guess[2])
# if popt[1] < 1 and rerun is False:
# guess = (guess[0], guess[1] * 4, guess[2])
# if popt[2] > 0 and rerun is False:
# guess = (guess[0], guess[1], guess[2] * 5)
# popt, pcov = self._curve_fit_opt(f_model, x, y, p0=guess, info=info)
# print('\n\n')
# print(guess)
# print(popt, pcov)
if popt is None or np.any(popt == np.inf) or np.any(pcov == np.inf):
b0 = unc.ufloat(popt[0], 0)
b1 = unc.ufloat(popt[1], 0)
b2 = unc.ufloat(popt[2], 0)
else:
try:
b0, b1, b2 = unc.correlated_values(popt, pcov)
except np.linalg.LinAlgError:
b0 = unc.ufloat(popt[0], 0)
b1 = unc.ufloat(popt[1], 0)
b2 = unc.ufloat(popt[2], 0)
crit_x = -b1.n/(2*b2.n)
max_y = f_model(crit_x, b0.n, b1.n, b2.n)
r2, r2_adj, ss_res, ss_tot, rmse = self._get_rsq(
f_model, x, y, popt)
aic = self._calc_aic(x, y, dist='gamma')
if rerun is False:
self.coefs_grtn = {
'b0': b0,
'b1': b1,
'b2': b2,
'pcov': pcov,
'pval_a': None,
'pval_b': None,
'pval_c': None,
'r2': r2,
'r2_adj': r2_adj,
'AIC': aic,
'BIC': None,
'max_y': max_y,
'crit_x': crit_x,
'ss_res': ss_res,
'ss_tot': ss_tot,
'rmse': rmse
}
self.coefs_nrtn = {
'b0': b0,
'theta2': None,
'b2': b2,
'popt': None,
'pcov': None,
'ss_res': None,
'eonr_bias': None,
'theta2_social': None,
'popt_social': None,
'ss_res_social': None
}
else:
self.coefs_grtn_primary = self.coefs_grtn.copy()
self.coefs_grtn = {}
self.coefs_grtn = {
'b0': b0,
'b1': b1,
'b2': b2,
'pcov': pcov,
'pval_a': None,
'pval_b': None,
'pval_c': None,
'r2': r2,
'r2_adj': r2_adj,
'AIC': aic,
'BIC': None,
'max_y': max_y,
'crit_x': crit_x,
'ss_res': ss_res,
'ss_tot': ss_tot,
'rmse': rmse
}
def _setup_ncost_curve(self, x_min, x_max, n_steps):
'''
Generates an array for the N cost curve
'''
if self.coefs_grtn['crit_x'] >= x_max:
num_thresh = n_steps
else:
num_thresh = int(n_steps * (self.coefs_grtn['crit_x'] / (x_max)))
step_size = (x_max - x_min) / (n_steps-1)
x1a, ss1 = np.linspace(x_min,
((x_min + (num_thresh-1) * step_size)),
num=num_thresh, retstep=True)
x1b, ss2 = np.linspace(((x_min + (num_thresh) * step_size)),
x_max,
num=n_steps-num_thresh,
retstep=True)
x1 = np.concatenate((x1a, x1b))
y_fert_n = (x1 * self.cost_n_fert) + self.costs_fixed
return x1, y_fert_n, x1a # x1a used in _setup_grtn_curve()
def _setup_grtn_curve(self, x1, x1a, n_steps):
'''
Generates an array for GRTN curve
'''
y_max = (self.coefs_grtn['b0'].n +
(self.coefs_grtn['crit_x'] * self.coefs_grtn['b1'].n) +
(self.coefs_grtn['crit_x'] * self.coefs_grtn['crit_x'] *
self.coefs_grtn['b2'].n))
# Find index where all x = max
y_temp = (self.coefs_grtn['b0'].n +
(x1*self.coefs_grtn['b1'].n) +
(x1*x1*self.coefs_grtn['b2'].n))
y_max_idx = np.argmax(y_temp)
y2a = (self.coefs_grtn['b0'].n +
(x1a[:y_max_idx]*self.coefs_grtn['b1'].n) +
(x1a[:y_max_idx]*x1a[:y_max_idx]*self.coefs_grtn['b2'].n))
if self.eonr <= self.df_data[self.col_n_app].max():
y2b = np.linspace(y_max, y_max, num=n_steps-y_max_idx)
else: # EONR is past the point of available data, plot last val again
last_pt = (self.coefs_grtn['b0'].n +
(x1a[-1]*self.coefs_grtn['b1'].n) +
(x1a[-1]*x1a[-1]*self.coefs_grtn['b2'].n))
y2b = np.linspace(last_pt, last_pt, num=n_steps-y_max_idx)
y_grtn = np.concatenate((y2a, y2b))
# if necessary, modify y_grtn so it has correct number of values
if len(y_grtn) < n_steps:
while len(y_grtn) < n_steps:
y_grtn = np.concatenate((y_grtn, np.array(([y_max]))))
elif len(y_grtn) > n_steps:
while len(y_grtn) > n_steps:
y_grtn = y_grtn[:-1].copy()
else:
pass
return y_grtn
def _solve_eonr(self):
'''
Uses scipy.optimize to find the maximum value of the return curve
'''
f_eonr1 = np.poly1d([self.coefs_nrtn['b2'].n,
self.coefs_nrtn['theta2'].n,
self.coefs_nrtn['b0'].n])
f_eonr2 = np.poly1d([self.coefs_grtn['b2'].n,
self.coefs_grtn['b1'].n,
self.coefs_grtn['b0'].n])
if self.cost_n_social > 0:
# if self.coefs_social['lin_r2'] > self.coefs_social['exp_r2']:
# print('method 1')
# # subtract only cost of fertilizer
# first_order = self.cost_n_fert + self.coefs_social['lin_mx']
# f_eonr1 = self._modify_poly1d(f_eonr1, 1,
# f_eonr1.coef[1] - first_order)
# f_eonr1 = self._modify_poly1d(f_eonr1, 0,
# self.coefs_social['lin_b'])
# result = minimize_scalar(-f_eonr1)
# self.f_eonr = f_eonr1
# else: # add together the cost of fertilizer and cost of social N
# print('method 2')
x_max = self.df_data[self.col_n_app].max()
result = minimize_scalar(self.models.combine_rtn_cost,
bounds=[-100, x_max+100],
method='bounded')
else:
first_order = self.coefs_grtn['b1'].n - self.cost_n_fert
f_eonr2 = self._modify_poly1d(f_eonr2, 1, first_order)
f_eonr2 = self._modify_poly1d(
f_eonr2, 0, self.coefs_grtn['b0'].n-self.costs_fixed)
result = minimize_scalar(-f_eonr2)
# theta2 is EOR (minimum) only if total cost of N increases linearly
# at a first order with an intercept of zero..
self.eonr = result['x']
self.mrtn = -result['fun']
def _theta2_error(self):
'''
Calculates a error between EONR and theta2 from _f_qp_theta2
'''
df = self.df_data.copy()
x = df[self.col_n_app].values
y = df['grtn'].values
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
guess = (self.coefs_grtn['b0'].n,
self.eonr,
self.coefs_grtn['b2'].n)
popt, pcov = self._curve_fit_opt(f_model_theta2, x, y, p0=guess,
maxfev=800)
self.coefs_nrtn['eonr_bias'] = popt[1] - self.eonr
# Following are functions used in calculating confidence intervals
def _bs_statfunction(self, x, y):
'''
'''
maxfev = 1000
b0 = self.coefs_grtn['b0'].n
b2 = self.coefs_grtn['b2'].n
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
guess = (b0, self.eonr, b2)
# y = self.models.quad_plateau(x, a, b, c) + res
# try_n = 0
popt = [None, None, None]
try:
popt, _ = self._curve_fit_bs(f_model_theta2, x, y,
p0=guess, maxfev=maxfev)
except RuntimeError as err:
print(err)
maxfev = 10000
print('Increasing the maximum number of calls to the function to '
'{0} before giving up.\n'.format(maxfev))
if popt[1] is None:
try:
popt, _ = self._curve_fit_bs(f_model_theta2, x,
y, p0=guess, maxfev=maxfev)
except RuntimeError as err:
print(err)
return popt[1]
def _build_df_ci(self):
'''
Builds a template to store confidence intervals in a dataframe
'''
df_ci = pd.DataFrame(data=[[self.df_data.iloc[0]['location'],
self.df_data.iloc[0]['year'],
self.df_data.iloc[0]['time_n'],
self.price_grain,
self.cost_n_fert,
self.cost_n_social,
self.price_ratio,
0, 0, 0, np.nan, np.nan, np.nan, np.nan,
np.nan, np.nan,
'N/A', 'N/A']],
columns=['location', 'year', 'time_n',
'price_grain',
'cost_n_fert', 'cost_n_social',
'price_ratio', 'f_stat', 't_stat',
'level', 'wald_l', 'wald_u',
'pl_l', 'pl_u', 'boot_l', 'boot_u',
'opt_method_l', 'opt_method_u'])
return df_ci
def _calc_sse_full(self, x, y):
'''
Calculates the sum of squares across the full set of parameters,
solving for theta2
'''
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
guess = (self.coefs_grtn['b0'].n,
self.eonr,
self.coefs_grtn['b2'].n)
col_x = None # Perhaps we should keep in col_x/ytil curve_fit runs..?
col_y = None
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(f_model_theta2,
col_x, col_y))
popt, pcov = self._curve_fit_opt(f_model_theta2, x, y, p0=guess,
info=info)
res = y - f_model_theta2(x, *popt)
sse_full_theta2 = np.sum(res**2)
return sse_full_theta2
def _check_progress_pl(self, step_size, tau_temp, tau, f_stat,
step_size_start, tau_delta_flag, count):
'''
Checks progress of profile likelihood function and adjusts step
size accordingly. Without this function, there is a chance some
datasets will take millions of tries if the ci is wide
'''
print('tau_temp', tau_temp)
print('tau', tau)
print('f_stat', f_stat)
tau_delta = tau_temp - tau
progress = tau_temp / f_stat
print('progress', progress)
print('')
if progress < 0.9 and tau_delta < 1e-3:
step_size = step_size_start
step_size *= 1000
tau_delta_flag = True
elif progress < 0.9 and tau_delta < 1e-2:
step_size = step_size_start
step_size *= 100
tau_delta_flag = True
elif progress < 0.95 and count > 100:
step_size = step_size_start
step_size *= 1000
tau_delta_flag = True
elif progress < 0.9:
step_size = step_size_start
step_size *= 10
tau_delta_flag = True
else:
if tau_delta_flag is True and count < 100:
step_size = step_size_start
# else keep step size the same
return step_size, tau_delta_flag
def _compute_bootstrap(self, alpha=0.1, samples_boot=9999):
'''
Uses bootstrapping to estimate EONR based on the sampling distribution
0) Initialize vector x with residuals - mean
1) Sample with replacement from vector x (n = number of initial
samples..)
2) Calculate the percentile and "bias corrected and accelerated"
bootstrapped CIs
'''
boot_ci = [np.nan, np.nan]
x = self.df_data[self.col_n_app].values
y = self.df_data['grtn'].values
try:
boot_ci = bootstrap.ci((x, y), statfunction=self._bs_statfunction,
alpha=alpha, n_samples=samples_boot,
method='bca')
except TypeError:
if not isinstance(alpha, list):
print('Unable to compute bootstrap confidence intervals at '
'alpha = {0}'.format(alpha))
return boot_ci
def _compute_cis(self, col_x, col_y, bootstrap_ci=True,
delta_tstat=False, samples_boot=9999):
'''
Computes Wald, Profile Likelihood, and Bootstrap confidence intervals.
'''
alpha_list = self.alpha_list
df_ci = self._build_df_ci()
cols = df_ci.columns
self.df_delta_tstat = None # reset from any previous datasets
if bootstrap_ci is True:
# df_ci = self._run_bootstrap(df_ci, alpha_list, n_samples=9999)
pctle_list = self._parse_alpha_list(alpha_list)
boot_ci = self._compute_bootstrap(alpha=pctle_list,
samples_boot=samples_boot)
else:
boot_ci = [None] * ((len(self.alpha_list) * 2))
# else:
# boot_ci = np.insert(boot_ci, [0], [np.nan, np.nan])
if boot_ci is not None:
df_boot = self._parse_boot_ci(boot_ci)
for alpha in alpha_list:
level = 1 - alpha
f_stat = stats.f.ppf(1-alpha, dfn=1, dfd=len(self.df_data)-3)
t_stat = stats.t.ppf(1-alpha/2, len(self.df_data)-3)
# if level == self.ci_level:
pl_l, pl_u, wald_l, wald_u, opt_method_l, opt_method_u =\
self._get_likelihood(alpha, col_x, col_y, stat='t',
delta_tstat=delta_tstat)
# else:
# pl_l, pl_u, wald_l, wald_u, opt_method_l, opt_method_u =\
# self._get_likelihood(alpha, col_x, col_y, stat='t',
# delta_tstat=False)
if bootstrap_ci is True:
if boot_ci is None:
pctle = self._parse_alpha_list(alpha)
boot_l, boot_u = self._compute_bootstrap(
alpha=pctle, samples_boot=samples_boot)
else:
boot_l = df_boot[df_boot['alpha']==alpha]['boot_l'].values[0]
boot_u = df_boot[df_boot['alpha']==alpha]['boot_u'].values[0]
else:
boot_l, boot_u = np.nan, np.nan
df_row = pd.DataFrame([[self.df_data.iloc[0]['location'],
self.df_data.iloc[0]['year'],
self.df_data.iloc[0]['time_n'],
self.price_grain,
self.cost_n_fert,
self.cost_n_social,
self.price_ratio,
f_stat, t_stat, level,
wald_l, wald_u,
pl_l, pl_u, boot_l, boot_u,
opt_method_l, opt_method_u]],
columns=cols)
df_ci = df_ci.append(df_row, ignore_index=True)
# if df_row['level'].values[0] == self.ci_level:
# df_ci_last = df_row
# if bootstrap_ci is True:
## df_ci = self._run_bootstrap(df_ci, alpha_list, n_samples=9999)
# pctle_list = self._parse_alpha_list(alpha_list)
# df_boot = self._run_bootstrap(pctle_list, n_samples=9999)
# df_ci = pd.concat([df_ci, df_boot], axis=1)
if self.df_ci is None:
df_ci.insert(loc=0, column='run_n', value=1)
self.df_ci = df_ci
else:
last_run_n = self.df_ci.iloc[-1, :]['run_n']
df_ci.insert(loc=0, column='run_n', value=last_run_n+1)
self.df_ci = self.df_ci.append(df_ci, ignore_index=True)
last_run_n = self.df_ci.iloc[-1, :]['run_n']
# self.df_ci_last = self.df_ci[(self.df_ci['run_n'] == last_run_n) &
# (self.df_ci['level'] == self.ci_level)]
def _compute_residuals(self):
'''
Computes the residuals of the gross return values and saves to
<df_data> for use in the _compute_cis() function (confidence intervals)
'''
col_x = self.col_n_app
col_y = 'grtn'
df_data = self.df_data.copy()
x = df_data[col_x].values
y = df_data[col_y].values
if self.model_temp == 'quadratic':
f_model = self.models.quadratic
elif self.model_temp == 'quad_plateau':
f_model = self.models.quad_plateau
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(f_model, col_x, col_y))
guess = (self.coefs_grtn['b0'].n, self.coefs_grtn['b1'].n,
self.coefs_grtn['b2'].n)
# print('popt: {0}'.format(guess))
popt, pcov = self._curve_fit_opt(f_model, x, y, p0=guess, info=info)
# if self.base_zero is False:
# popt, pcov = self._curve_fit_opt(f_model, x, y,
# p0=(600, 3, -0.01), info=info)
# else:
# popt, pcov = self._curve_fit_opt(f_model, x, y,
# p0=(0, 3, -0.01), info=info)
# print('popt: {0}'.format(popt)) # if same, do we need previous 4 lines?
# res = y - self.models.quad_plateau(x, *popt)
res = y - f_model(x, *popt)
res -= res.mean()
df_temp = pd.DataFrame(data=res, index=df_data.index,
columns=['grtn_res'])
self.df_temp = df_temp
df_data = pd.concat([df_data, df_temp], axis=1)
self.df_data = df_data
def _compute_wald(self, n, p, alpha, s2c=None):
'''
Computes the Wald confidence intervals for a range of alphas. <n> and
<p> are used to determine tau (from the t-statistic)
From page 104 - 105 (Gallant, 1987)
<n (``int``): number of samples
<p (``int``): number of parameters
<s2c (``float``): the variance of the EONR value(notation from Gallant, 1987)
s2 = SSE / (n - p)
c = s2c / s2
tau * (s2 * c)**(0.5)
'''
if s2c is None:
if self.cost_n_social > 0:
s2c = self.coefs_nrtn['theta2_social'].s**2
else:
s2c = self.coefs_nrtn['theta2'].s**2
tau = stats.t.ppf(1-alpha/2, n-p) # Wald should use t_stat
ci_l = self.eonr - (tau * s2c**(0.5))
ci_u = self.eonr + (tau * s2c**(0.5))
return ci_l, ci_u
def _get_guess_qp(self, rerun=False):
'''
Gets a reasonable guess for p0 of curve_fit function
* Note that these guesses are *extremely* sensitive to the result that
will be generated from the curve_fit() function. The parameter guesses
are set realistically based on yield response to N in MN, but there is
no guarantee it will work for all datasets..
'''
if rerun is False and self.metric is False:
guess = (600, 3, -0.01)
elif rerun is False and self.metric is True:
guess = (900, 10, -0.03)
# if rerun is True, don't we already know the beta1 and beta2 params?
elif rerun is True:
if self.base_zero is True:
b0 = 0
else:
b0 = self.coefs_grtn['b0'].n
b1 = self.coefs_grtn['b1'].n
b2 = self.coefs_grtn['b2'].n
guess = (b0, b1, b2)
return guess
class _pl_steps_init(object):
'''Initializes variables required for _get_pl_steps()'''
def __init__(self, theta2_start, alpha, x, y, side, tau_start,
step_size, guess, sse_full, col_x, col_y,
cost_n_social, **kwargs):
self.theta2_start = theta2_start
self.alpha = alpha
self.x = x
self.y = y
self.side = side
self.tau_start = tau_start
self.step_size = step_size
self.guess = guess
self.sse_full = sse_full
# self.col_x = col_x
# self.col_y = col_y
self.cost_n_social = cost_n_social
self.__dict__.update(kwargs)
msg = ('Please choose either "upper" or "lower" for <side> of '
'confidence interval to compute.')
assert side.lower() in ['upper', 'lower'], msg
self.q = 1
self.n = len(x)
self.p = len(guess)
self.f_stat = stats.f.ppf(1-alpha, dfn=self.q, dfd=self.n-self.p) # ppf is inv of cdf
self.t_stat = stats.t.ppf(1-alpha/2, self.n-self.p)
# f_stat = stats.f.ppf(1-alpha, dfn=q, dfd=n-p)
self.s2 = sse_full / (self.n - self.p)
self.theta2 = theta2_start
self.tau = tau_start
self.step_size_start = step_size
self.tau_delta_flag = False
self.stop_flag = False
str_func = '_get_likelihood() -> _f_qp_theta2'
self.info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(str_func, col_x, col_y))
def _get_pl_steps(self, theta2_start, alpha, x, y, side, tau_start=0,
step_size=1, epsilon=1e-9, df=None, sse_full=None,
stat='t', count=0):
'''
Computes the profile-likelihood confidence values by incrementing by
<step_size> until tau and test statistic are within <epsilon>
<theta2_start>: starting point of theta2 (should be set to maximum
likelihood value)
<alpha>: the significance level to compute the likelihood for
<x> and <y>: the x and y data that likelihood should be computed for
<side>: The side of the confidence interval to compute the likelihood
for - should be either "upper" or "lower" (dtype = str)
Uses <alpha> to calculate the inverse of the cdf (cumulative
distribution function) of the F statistic. The T statistic can be used
as well (they will give the same result).
'''
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
guess = (self.coefs_grtn['b0'].n,
self.coefs_grtn['crit_x'],
self.coefs_grtn['b2'].n)
if sse_full is None:
sse_full = self._calc_sse_full(x, y)
if df is None:
df = pd.DataFrame(columns=['theta', 'f_stat', 't_stat'])
col_x = self.col_n_app
col_y = 'grtn'
cost_n_social = self.cost_n_social
# Second, call like_init() class to itialize the get_likelihood() func
li = self._pl_steps_init(
theta2_start=theta2_start, alpha=alpha, x=x, y=y, side=side,
tau_start=tau_start, step_size=step_size, epsilon=epsilon,
guess=guess, sse_full=sse_full, col_x=col_x,
col_y=col_y, cost_n_social=cost_n_social)
if stat == 't':
crit_stat = li.t_stat
else:
crit_stat = li.f_stat
# Third, minimize the difference between tau and the test statistic
# call, anything in _pl_steps_init() using li.<variable>
# e.g., li.tau will get you the <tau> variable
# Rule of thumb: if saved in both EONR and _pl_steps_init, use
# variable from EONR; if passed directly to _get_pl_steps(), use the
# variable directly
# Testing quadratic model 8/7/2019
# b0 = my_eonr.coefs_grtn['b0'].n
# b1 = my_eonr.coefs_grtn['b1'].n
# b2 = my_eonr.coefs_grtn['b2'].n
# theta2 = my_eonr.coefs_nrtn['theta2'].n
#
# y1 = my_eonr.models.quadratic(x, b0, b1, b2)
# y2 = my_eonr.models.q_theta2(x, b0, theta2, b2)
#
# sns.scatterplot(x, y1)
# sns.scatterplot(x, y2)
while li.tau < crit_stat:
popt, pcov = self._curve_fit_runtime(
lambda x, b0, b2: f_model_theta2(
x, b0, li.theta2, b2), x, y,
guess=(1, 1), maxfev=800, info=li.info)
if popt is not None:
popt = np.insert(popt, 1, li.theta2)
res = y - f_model_theta2(x, *popt)
sse_res = np.sum(res**2)
tau_temp_f = ((sse_res - sse_full) / li.q) / li.s2
with warnings.catch_warnings():
warnings.simplefilter('error', RuntimeWarning)
try:
tau_temp_t = tau_temp_f**0.5
except RuntimeWarning:
tau_temp_t = 1e-6 # when 0, we get an overflow error
warnings.simplefilter('ignore', RuntimeWarning)
# print(err)
# tau_temp_t = tau_temp_f**0.5
if stat == 't': # Be SURE tau is compared to correct stat!
tau_temp = tau_temp_t
else:
tau_temp = tau_temp_f
# The following is used in Hernandez and Mulla (2008), but adds
# an unnecessary complexity/confusion to testing significance
# tau_temp = abs(li.theta2 - self.eonr)**0.5 * tau_temp_f**0.5
# print(alpha)
# print(tau_temp - crit_stat)
if count >= 1000:
print('{0:.1f}% {1} profile likelihood failed to converge '
'after {2} iterations. Stopping calculation and using '
'{3:.1f} {4}'.format((1-alpha)*100, side.capitalize(),
count, li.theta2,
self.unit_nrate))
tau_temp_f = li.f_stat
tau_temp_t = li.t_stat
li.stop_flag = True
theta_same = df['theta'].iloc[-1]
df2 = pd.DataFrame([[theta_same, li.f_stat, li.t_stat]],
columns=['theta', 'f_stat', 't_stat'])
df = df.append(df2, ignore_index=True)
break # break out of while loop
elif popt is None:
print('{0:.1f}% {1} profile likelihood failed to find optimal '
'parameters. Stopping calculation.'
''.format((1-alpha)*100, side.capitalize()))
tau_temp_f = li.f_stat
tau_temp_t = li.t_stat
li.stop_flag = True
if df['theta'].iloc[-1] is not None:
theta_rec = df['theta'].iloc[-1]
if theta_rec < 1.0:
theta_rec = 0
else:
theta_rec = np.NaN
df2 = pd.DataFrame([[theta_rec, li.f_stat, li.t_stat]],
columns=['theta', 'f_stat', 't_stat'])
df = df.append(df2, ignore_index=True)
break # break out of while loop
else:
df2 = pd.DataFrame([[li.theta2, tau_temp_f, tau_temp_t]],
columns=['theta', 'f_stat', 't_stat'])
df = df.append(df2, ignore_index=True)
count += 1
# if count > 3:
# step_size, li.tau_delta_flag = self._check_progress_pl(
# step_size, tau_temp, li.tau, crit_stat,
# li.step_size_start, li.tau_delta_flag, count)
if side.lower() == 'upper':
li.theta2 += step_size
elif side.lower() == 'lower':
li.theta2 -= step_size
li.tau = tau_temp
if len(df) <= 1:
# start over, reducing step size
_, _, df = self._get_pl_steps(
theta2_start, alpha, x, y, side, tau_start=tau_start,
step_size=(step_size/10), epsilon=epsilon, df=df,
sse_full=sse_full, stat=stat, count=count)
elif li.stop_flag is True: # If we had to stop calculation..
pass # stop trying to compute profile-likelihood
# if CI is moving faster than epsilon
elif abs(df['theta'].iloc[-1] - df['theta'].iloc[-2]) > epsilon:
# Can't stop when within x of epsilon because sometimes convergence
# isn't reached
# elif abs(tau_temp - crit_stat) > epsilon:
df = df[:-1]
# At this point, we could get stuck in a loop if we never make any
# headway on closing in on epsilon
if stat == 't':
tau_start = df['t_stat'].iloc[-1]
else:
tau_start = df['f_stat'].iloc[-1]
_, _, df = self._get_pl_steps(
df['theta'].iloc[-1], alpha, x, y, side,
tau_start=tau_start, step_size=(step_size/10),
epsilon=epsilon, df=df, sse_full=sse_full, stat=stat,
count=count)
else:
pass #
# boot_l, boot_u = self._compute_bootstrap(alpha, n_samples=5000)
# theta2_out = df['theta'].iloc[-2:].mean()
# tau_out = df['f_stat'].iloc[-2:].mean()
theta2_out = df['theta'].iloc[-1]
tau_out = df['t_stat'].iloc[-1]
return theta2_out, tau_out, df
def _run_minimize_pl(self, f, theta2_opt, pl_guess, method='Nelder-Mead',
side='lower', pl_guess_init=None):
'''
Runs the minimize function making sure the result is suitable/as
expected for the profile likelihood
'''
if pl_guess_init is None:
pl_guess_init = pl_guess
if side == 'lower':
initial_guess = theta2_opt - pl_guess
elif side == 'upper':
initial_guess = theta2_opt + pl_guess
result = minimize(f, initial_guess, method=method)
# print(result)
if pl_guess > 800:
pl_out = None
elif result.success is not True:
return self._run_minimize_pl(f, theta2_opt,
pl_guess*1.05,
method=method,
side=side,
pl_guess_init=pl_guess_init)
elif result.success is True and side == 'lower':
# if result.x[0] > self.eonr:
if result.x[0] > theta2_opt:
return self._run_minimize_pl(f, theta2_opt,
pl_guess*1.05,
method=method,
side=side,
pl_guess_init=pl_guess_init)
else:
pl_out = result.x[0]
elif result.success is True and side == 'upper':
if result.x[0] < theta2_opt:
return self._run_minimize_pl(f, theta2_opt,
pl_guess*1.05,
method=method,
side=side,
pl_guess_init=pl_guess_init)
else:
pl_out = result.x[0]
# else: # finally, return result
# pl_out = result.x[0]
return pl_out
def _get_likelihood(self, alpha, col_x, col_y, stat='t',
last_ci=[None, None], delta_tstat=False):
'''
Computes the profile liklihood confidence values using the sum of
squares (see Gallant (1987), p. 107)
<alpha>: the significance level to compute the likelihood for
<x> and <y>: the x and y data that likelihood should computed for
Uses <alpha> to calculate the inverse of the cdf (cumulative
distribution function) of the F statistic. The T statistic can be used
as well (they will give the same result).
'''
# First, initialize variables
df = self.df_data.copy()
x = df[col_x].values
y = df[col_y].values
guess = (self.coefs_grtn['b0'].n,
self.eonr,
self.coefs_grtn['b2'].n)
sse_full = self._calc_sse_full(x, y)
q = 1 # number of params being checked (held constant)
n = len(x)
p = len(guess)
f_stat = stats.f.ppf(1-alpha, dfn=q, dfd=n-p) # ppf is inv of cdf
t_stat = stats.t.ppf(1-alpha/2, n-p)
s2 = sse_full / (n - p) # variance
self.str_func = '_get_likelihood() -> _f_qp_theta2'
info = ('func = {0}\ncol_x = {1}\ncol_y = {2}\n'
''.format(self.str_func, col_x, col_y))
# Second, minimize the difference between tau and the test statistic
# call, anything in _get_likelihood_init() using li.<variable>
# e.g., li.tau will get you the <tau> variable
# Rule of thumb: if saved in both EONR and _get_likelihood_init, use
# variable from EONR; if passed directly to _get_likelihood(), use the
# variable directly
def _f_like_opt(theta2):
'''
Function for scipy.optimize.newton() to optimize (find the minimum)
of the difference between tau and the test statistic. This function
returns <dif>, which will equal zero when the likelihood ratio is
exactly equal to the test statistic (e.g., t-test or f-test)
'''
if self.model_temp == 'quadratic':
# f_model = self.models.quadratic
f_model_theta2 = self.models.q_theta2
elif self.model_temp == 'quad_plateau':
# f_model = self.models.quad_plateau
f_model_theta2 = self.models.qp_theta2
try:
popt, pcov = self._curve_fit_runtime(
lambda x, b0, b2: f_model_theta2(
x, b0, theta2, b2), x, y, guess=(1, 1),
maxfev=800, info=info)
except TypeError as e:
popt = None
pcov = None
print('{0}\n{1}\nAlpha: {2}\n'.format(e, info, alpha))
if popt is not None:
popt = np.insert(popt, 1, theta2)
res = y - f_model_theta2(x, *popt)
sse_res = np.sum(res**2)
tau_temp_f = ((sse_res - sse_full) / q) / s2
with warnings.catch_warnings():
warnings.simplefilter('error', RuntimeWarning)
try:
tau_temp_t = tau_temp_f**0.5
except RuntimeWarning:
tau_temp_t = 1e-6 # when 0, we get an overflow error
warnings.simplefilter('ignore', RuntimeWarning)
if stat == 't': # Be SURE tau is compared to correct stat!
tau_temp = tau_temp_t
crit_stat = t_stat
else:
tau_temp = tau_temp_f
crit_stat = f_stat
dif = abs(crit_stat - tau_temp)
elif popt is None:
dif = None
return dif
def _delta_tstat(theta2_opt, pl_l, pl_u, alpha):
'''
Executes _f_like_opt() for a range of N rates/theta2 values
'''
if pl_l is np.nan:
theta2_l = theta2_opt-100
else:
theta2_l = theta2_opt - (abs(theta2_opt-pl_l) * 1.1)
if pl_u is np.nan:
theta2_u = theta2_opt+100
else:
theta2_u = theta2_opt + (abs(theta2_opt-pl_u) * 1.1)
theta2_hats = np.linspace(theta2_l, theta2_u, 400)
dif_list = []
# level_list = []
for theta2_hat in theta2_hats:
dif_list.append(_f_like_opt(theta2_hat))
# level_list.append(1-alpha)
df_delta_tstat = pd.DataFrame(data={'rate_n': theta2_hats,
'delta_tstat': dif_list,
'level': 1-alpha})
return df_delta_tstat
def _check_convergence(f, theta2_opt, pl_guess, pl_l, pl_u, alpha,
thresh=0.5, method='Nelder-Mead'):
'''
Check initial guess to see if delta_tau is close to zero - if not,
redo with another intitial guess.
Parameters
thresh (float): tau(theta_2) threshold; anything greater than this
is considered a poor fit, probably due to finding a local
minimum.
'''
if pl_l is None:
print('\nUpper {0:.2f} profile-likelihood CI may not have '
'optimized..'.format(alpha))
pl_l = np.nan
elif f(pl_l) > thresh:
guess_l = theta2_opt - (pl_guess / 2)
pl_l_reduced = minimize(f, guess_l, method=method)
guess_l = theta2_opt - (pl_guess * 2)
pl_l_increased = minimize(f, guess_l, method=method)
if f(pl_l_reduced.x) < f(pl_l):
pl_l = pl_l_reduced.x[0]
elif f(pl_l_increased.x) < f(pl_l):
pl_l = pl_l_increased.x[0]
else:
print('\nLower {0:.2f} profile-likelihood CI may not have '
'optimized..'.format(alpha))
if pl_u is None:
print('\nUpper {0:.2f} profile-likelihood CI may not have '
'optimized..'.format(alpha))
pl_u = np.nan
elif f(pl_u) > thresh:
guess_u = theta2_opt + (pl_guess / 2)
pl_u_reduced = minimize(f, guess_u, method=method)
guess_u = theta2_opt + (pl_guess * 2)
pl_u_increased = minimize(f, guess_u, method=method)
if f(pl_u_reduced.x) < f(pl_u):
pl_u = pl_u_reduced.x[0]
elif f(pl_u_increased.x) < f(pl_u):
pl_u = pl_u_increased.x[0]
else:
print('\nUpper {0:.2f} profile-likelihood CI may not have '
'optimized..'.format(alpha))
return pl_l, pl_u
# popt, pcov = self._curve_fit_opt(self._f_qp_theta2, x, y, p0=guess, maxfev=800, info=info)
wald_l, wald_u = self._compute_wald(n, p, alpha)
pl_guess = (wald_u - self.eonr) # Adjust +/- init guess based on Wald
theta2_bias = self.coefs_nrtn['eonr_bias']
theta2_opt = self.eonr + theta2_bias # check if this should add the 2
# Lower CI: uses the Nelder-Mead algorithm
method='Nelder-Mead'
pl_l = self._run_minimize_pl(_f_like_opt, theta2_opt, pl_guess,
method=method, side='lower')
pl_u = self._run_minimize_pl(_f_like_opt, theta2_opt, pl_guess,
method=method, side='upper')
pl_l, pl_u = _check_convergence(_f_like_opt, theta2_opt, pl_guess,
pl_l, pl_u, alpha, thresh=0.5,
method=method)
# if pl_l is not None:
# pl_l += theta2_bias
# if pl_u is not None:
# pl_u += theta2_bias
# if pl_l is None:
# pl_l = np.nan
# if pl_u is None:
# pl_u = np.nan
if pl_l > self.eonr or pl_u < self.eonr: # can't trust the data
print('Profile-likelihood calculations are not realistic: '
'[{0:.1f}, {1:.1f}] setting to NaN'.format(pl_l, pl_u))
pl_l = np.nan
pl_u = np.nan
# TODO: this will still add CIs to df_ci after the CI falls
# above/below the EONR. Could assume a uniform distribution and
# add this to theta2_bias for subsequent calculations. It seems
# as if theta2_bias changed from when it was set in coefs_nrtn:
# perhaps it should be recalculated
if delta_tstat is True:
df_temp = _delta_tstat(theta2_opt, pl_l, pl_u, alpha)
if self.df_delta_tstat is None:
self.df_delta_tstat = df_temp
else:
self.df_delta_tstat = self.df_delta_tstat.append(df_temp)
return pl_l, pl_u, wald_l, wald_u, method, method
def _handle_no_ci(self):
'''
If critical x is greater than the max N rate, don't calculate
confidence intervals, but fill out df_ci with Wald CIs only
'''
df_ci = self._build_df_ci()
guess = (self.coefs_grtn['b0'].n,
self.coefs_grtn['b1'].n,
self.coefs_grtn['b2'].n)
for alpha in self.alpha_list:
level = 1 - alpha
n = len(self.df_data[self.col_n_app])
p = len(guess)
wald_l, wald_u = self._compute_wald(n, p, alpha)
f_stat = stats.f.ppf(1-alpha, dfn=1, dfd=len(self.df_data)-3)
t_stat = stats.t.ppf(1-alpha/2, len(self.df_data)-3)
df_row = pd.DataFrame([[self.df_data.iloc[0]['location'],
self.df_data.iloc[0]['year'],
self.df_data.iloc[0]['time_n'],
self.price_grain,
self.cost_n_fert,
self.cost_n_social,
self.price_ratio,
f_stat, t_stat, level,
wald_l, wald_u, np.nan, np.nan,
np.nan, np.nan, 'N/A', 'N/A']],
columns=df_ci.columns)
df_ci = df_ci.append(df_row, ignore_index=True)
# boot_ci = [None] * ((len(self.ci_list) * 2))
# boot_ci = [self.eonr, self.eonr] + list(boot_ci)
# df_boot = self._parse_boot_ci(boot_ci)
# df_ci = pd.concat([df_ci, df_boot], axis=1)
if self.df_ci is None:
df_ci.insert(loc=0, column='run_n', value=1)
self.df_ci = df_ci
else:
last_run_n = self.df_ci.iloc[-1, :]['run_n']
df_ci.insert(loc=0, column='run_n', value=last_run_n+1)
self.df_ci = self.df_ci.append(df_ci, ignore_index=True)
last_run_n = self.df_ci.iloc[-1, :]['run_n']
# self.df_ci_last = self.df_ci[(self.df_ci['run_n'] == last_run_n) &
# (self.df_ci['level'] == self.ci_level)]
def _modify_poly1d(self, f, idx, new_val):
'''
Modifies a poly1d object, and returns the modifed object.
'''
assert idx in [0, 1, 2], 'Choose idx of 1, 2, or 3.'
if idx == 2:
f_new = np.poly1d([new_val, f[1], f[0]])
elif idx == 1:
f_new = np.poly1d([f[2], new_val, f[0]])
elif idx == 0:
f_new = np.poly1d([f[2], f[1], new_val])
return f_new
def _parse_alpha_list(self, alpha):
'''
Creates a lower and upper percentile from a list of alpha values. The
lower is (alpha / 2) and upper is (1 - (alpha / 2)). Required for
scikits-bootstrap
'''
if isinstance(alpha, list):
alpha_pctle = []
for item in alpha:
pctle_l = item / 2
pctle_u = 1 - (item / 2)
alpha_pctle.extend([pctle_l, pctle_u])
else:
pctle_l = alpha / 2
pctle_u = 1 - (alpha / 2)
alpha_pctle = [pctle_l, pctle_u]
return alpha_pctle
def _parse_boot_ci(self, boot_ci):
'''
Parses a list of values by separating into pairs and where the first
item gets assigned to column 1 and the second items gets assigned to
column 2. Returns a dataframe with both columns.
'''
def grouped(iterable, n):
return zip(*[iter(iterable)]*n)
boot_l = []
boot_u = []
for lower, upper in grouped(boot_ci, 2):
boot_l.append(lower)
boot_u.append(upper)
df_boot = pd.DataFrame({'alpha': self.alpha_list,
'boot_l': boot_l,
'boot_u': boot_u})
return df_boot
# def _run_bootstrap(self, alpha, n_samples=9999):
# '''
# Calls the _compute_bootstrap() function.
# '''
# pctle = self._parse_alpha_list(alpha)
# boot_ci = self._compute_bootstrap(alpha=pctle,
# n_samples=n_samples)
## if boot_ci is None:
## boot_ci = []
## for item in pctle_list:
## boot_ci_temp = self._compute_bootstrap(alpha=item,
## n_samples=n_samples)
## boot_ci.append(boot_ci_temp)
## if boot_ci is None:
## boot_ci = [None] * ((len(self.ci_list) * 2) + 2)
## boot_ci = [self.eonr, self.eonr] + list(boot_ci)
## df_boot = self._parse_boot_ci(boot_ci)
## df_ci = pd.concat([df_ci, df_boot], axis=1)
# return boot_ci
[docs] def calc_delta(self, df_results=None):
'''
Calculates the change in EONR among economic scenarios.
``EONR.calc_delta`` filters all data by location, year, and
nitrogen timing, then the "delta" is calculated as the difference
relative to the economic scenario resulting in the highest EONR.
Parameters:
df_results (``Pandas dataframe``, optional): The dataframe
containing the results from ``EONR.calculate_eonr()``
(default: None).
Returns:
``pandas.DataFrame``:
**df_delta** -- The dataframe with the newly inserted EONR
delta.
Example:
Please complete the `EONR.calculate_eonr`_ example first because
this example builds on the results of the ``my_eonr`` object.
Change the economic scenario (using ``EONR.calculate_eonr``) and
calculate the EONR again for the same dataset (using
``EONR.calculate_eonr``)
>>> price_grain = 0.314 # in USD per kg grain
>>> my_eonr.update_econ(price_grain=price_grain)
>>> my_eonr.calculate_eonr(df_data)
Computing EONR for Minnesota 2012 Pre
Cost of N fertilizer: $0.88 per kg
Price grain: $0.31 per kg
Fixed costs: $0.00 per ha
Checking quadratic and quadric-plateau models for best fit..
Quadratic model r^2: 0.72
Quadratic-plateau model r^2: 0.73
Using the quadratic-plateau model..
Economic optimum N rate (EONR): 169.9 kg per ha [135.2, 220.9] (90.0% confidence)
Maximum return to N (MRTN): $1682.04 per ha
Use ``EONR.calc_delta`` to
>>> df_delta = my_eonr.calc_delta(my_eonr.df_results)
.. image:: ../img/calc_delta.png
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
'''
if df_results is None:
df = self.df_results.unique()
else:
df = df_results.copy()
years = df['year'].unique()
years.sort()
df_delta = None
for year in years:
df_year = df[df['year'] == year]
locs = df_year['location'].unique()
locs.sort()
for loc in locs:
df_loc = df_year[df_year['location'] == loc]
times = df_loc['time_n'].unique()
for time in times:
df_yloct = df_loc[df_loc['time_n'] == time]
eonr_base = df_yloct['eonr'].max() # lowest fert:grain rat
eonr_delta = df_yloct['eonr'] - eonr_base
df_yloct.insert(8, 'eonr_delta', eonr_delta)
if df_delta is None:
df_delta = pd.DataFrame(columns=df_yloct.columns)
df_delta = df_delta.append(df_yloct)
return df_delta
[docs] def calculate_eonr(self, df, col_n_app=None, col_yld=None,
col_crop_nup=None, col_n_avail=None,
col_year=None, col_location=None, col_time_n=None,
bootstrap_ci=False, samples_boot=9999,
delta_tstat=False):
'''
Calculates the EONR and its confidence intervals.
``col_n_app`` and ``col_yld`` are required by ``EONR``, but not
necessarily by ``EONR.calculate_eonr()``. They must either be set
during the initialization of ``EONR``, before running
``EONR.calculate_eonr`` (using ``EONR.set_column_names``), or they
must be passed in this ``EONR.calculate_eonr`` method.
Parameters:
df (``Pandas dataframe``): The dataframe containing the
experimental data.
col_n_app (``str``, optional): Column name pointing to the rate of
applied N fertilizer data (default: None).
col_yld (``str``, optional): Column name pointing to the grain
yield data. This column is multiplied by price_grain to create
the 'grtn' column in ``EONR.df_data`` (default: None).
col_crop_nup (``str``, optional): Column name pointing to crop N
uptake data (default: None).
col_n_avail (``str``, optional): Column name pointing to available
soil N at planting plus fertilizer throughout the season
(default: None).
col_year (``str``, optional): Column name pointing to year
(default: None).
col_location (``str``, optional): Column name pointing to location
(default: None).
col_time_n (``str``, optional): Column name pointing to nitrogen
application timing (default: None).
bootstrap_ci (``bool``, optional): Indicates whether bootstrap
confidence intervals are to be computed. If calculating the
EONR for many sites and/or economic scenarios, it may be
desirable to set to ``False`` because the bootstrap confidence
intervals take the most time to compute (default: False).
samples_boot (``int``, optional): Number of samples in the
bootstrap computation (default: 9999).
delta_tstat (``bool``, optional): Indicates whether the
difference from the t-statistic will be computed (as a function
of theta2/N rate). May be useful to observe what optimization
method is best suited to reach convergence when computing the
profile-likelihood CIs (default: False).
Note:
``col_crop_nup`` and ``col_n_avail`` are required to calculate the
socially optimum nitrogen rate, SONR. The SONR is the optimum
nitrogen rate considering the social cost of nitrogen, so
therefore, ``EONR.cost_n_social`` must also be set. ``col_year``,
``col_location``, and ``col_time_n`` are purely optional. They
only affect the titles and axes labels of the plots.
Example:
Load and initialize ``eonr``
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
Load the sample data
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> df_data = pd.read_csv(os.path.join(base_dir, 'data', 'minnesota_2012.csv'))
Set column names
>>> col_n_app = 'rate_n_applied_kgha'
>>> col_yld = 'yld_grain_dry_kgha'
Set units
>>> unit_currency = '$'
>>> unit_fert = 'kg'
>>> unit_grain = 'kg'
>>> unit_area = 'ha'
Set economic conditions
>>> cost_n_fert = 0.88 # in USD per kg nitrogen
>>> price_grain = 0.157 # in USD per kg grain
Initialize ``EONR``
>>> my_eonr = EONR(cost_n_fert=cost_n_fert,
price_grain=price_grain,
col_n_app=col_n_app,
col_yld=col_yld,
unit_currency=unit_currency,
unit_grain=unit_grain,
unit_fert=unit_fert,
unit_area=unit_area,
model=None,
base_dir=base_dir)
Calculate the economic optimum nitrogen rate using
``EONR.calculate_eonr``
>>> my_eonr.calculate_eonr(df_data)
Computing EONR for Minnesota 2012 Pre
Cost of N fertilizer: $0.88 per kg
Price grain: $0.16 per kg
Fixed costs: $0.00 per ha
Checking quadratic and quadric-plateau models for best fit..
Quadratic model r^2: 0.72
Quadratic-plateau model r^2: 0.73
Using the quadratic-plateau model..
Economic optimum N rate (EONR): 162.3 kg per ha [130.5, 207.8] (90.0% confidence)
Maximum return to N (MRTN): $767.93 per ha
'''
msg = ('Please set EONR.price_grain > 0.')
assert self.price_grain > 0, msg
if col_n_app is not None:
self.col_n_app = str(col_n_app)
if col_yld is not None:
self.col_yld = str(col_yld)
if col_crop_nup is not None:
self.col_crop_nup = str(col_crop_nup)
if col_n_avail is not None:
self.col_n_avail = str(col_n_avail)
self.bootstrap_ci = bootstrap_ci
self._reset_temp()
self._set_df(df)
self._replace_missing_vals(missing_val='.')
self._calc_grtn()
if self.cost_n_social > 0:
self._calc_social_cost(col_x=self.col_n_avail,
col_y='social_cost_n')
self._calc_nrtn(col_x=self.col_n_app, col_y='grtn')
self._solve_eonr()
self._compute_R(col_x=self.col_n_app, col_y='grtn') # models.update_eonr
self._theta2_error()
if self.eonr > self.df_data[self.col_n_app].max():
print('\n{0} is past the point of available data, so confidence '
'bounds are not being computed'.format(self.onr_acr))
self._handle_no_ci()
else:
self._compute_residuals()
self._compute_cis(col_x=self.col_n_app, col_y='grtn',
bootstrap_ci=bootstrap_ci,
samples_boot=samples_boot,
delta_tstat=delta_tstat)
self._build_mrtn_lines()
if self.costs_at_onr != 0:
self._rtn_derivative()
# self._ci_pdf()
if self.print_out is True:
self._print_grtn()
self._print_results()
if self.base_zero is True:
base_zero = self.coefs_grtn_primary['b0'].n
grtn_y_int = self.coefs_grtn['b0'].n
else:
base_zero = np.nan
grtn_y_int = self.coefs_grtn['b0'].n
'unit_grain', 'unit_costs',
unit_price_grain = self.unit_rtn
unit_cost_n = '{0} per {1}'.format(self.unit_currency,
self.unit_fert)
last_run_n = self.df_ci['run_n'].max()
df_ci_last = self.df_ci[(self.df_ci['run_n'] == last_run_n) &
(self.df_ci['level'] == self.ci_level)]
results = [[self.price_grain, self.cost_n_fert, self.cost_n_social,
self.costs_fixed,
self.price_ratio, unit_price_grain, unit_cost_n,
self.location, self.year, self.time_n, self.model_temp,
base_zero, self.eonr,
self.coefs_nrtn['eonr_bias'],
self.R, self.costs_at_onr,
self.ci_level, df_ci_last['wald_l'].values[0],
df_ci_last['wald_u'].values[0],
df_ci_last['pl_l'].values[0],
df_ci_last['pl_u'].values[0],
df_ci_last['boot_l'].values[0],
df_ci_last['boot_u'].values[0],
self.mrtn, self.coefs_grtn['r2_adj'],
self.coefs_grtn['rmse'],
self.coefs_grtn['max_y'],
self.coefs_grtn['crit_x'],
grtn_y_int,
self.results_temp['scn_lin_r2'],
self.results_temp['scn_lin_rmse'],
self.results_temp['scn_exp_r2'],
self.results_temp['scn_exp_rmse']]]
self.df_results = self.df_results.append(pd.DataFrame(
results, columns=self.df_results.columns),
ignore_index=True)
[docs] def plot_delta_tstat(self, level_list=None, style='ggplot'):
'''Plots the test statistic as a function nitrogen rate
Parameters:
level_list (``list``): The confidence levels to plot; should be a
subset of items in EONR.ci_list (default: None).
style (``str``, optional): The style of the plolt; can be any of
the options supported by ``matplotlib``
Example:
Load and initialize ``eonr``, then load the sample data
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> df_data = pd.read_csv(os.path.join(base_dir, 'data', 'minnesota_2012.csv'))
Set column names, units, and economic conditions
>>> col_n_app = 'rate_n_applied_kgha'
>>> col_yld = 'yld_grain_dry_kgha'
>>> unit_currency = '$'
>>> unit_fert = 'kg'
>>> unit_grain = 'kg'
>>> unit_area = 'ha'
>>> cost_n_fert = 0.88 # in USD per kg nitrogen
>>> price_grain = 0.157 # in USD per kg grain
Initialize ``EONR``
>>> my_eonr = EONR(cost_n_fert=cost_n_fert, price_grain=price_grain,
col_n_app=col_n_app, col_yld=col_yld,
unit_currency=unit_currency, unit_grain=unit_grain,
unit_fert=unit_fert, unit_area=unit_area,
model=None, base_dir=base_dir)
Calculate the economic optimum nitrogen rate using
``EONR.calculate_eonr``, being sure to set ``delta_stat`` to
``True``
>>> my_eonr.calculate_eonr(df_data, delta_tstat=True)
Computing EONR for Minnesota 2012 Pre
Cost of N fertilizer: $0.88 per kg
Price grain: $0.16 per kg
Fixed costs: $0.00 per ha
Checking quadratic and quadric-plateau models for best fit..
Quadratic model r^2: 0.72
Quadratic-plateau model r^2: 0.73
Using the quadratic-plateau model..
Economic optimum N rate (EONR): 162.3 kg per ha [130.5, 207.8] (90.0% confidence)
Maximum return to N (MRTN): $767.93 per ha
Plot the Delta t-stat plot using ``EONR.plot_delta_tstat``
>>> my_eonr.plot_delta_tstat()
.. image:: ../img/plot_delta_tstat.png
'''
if self.plotting_tools is None:
self.plotting_tools = Plotting_tools(self)
else:
self.plotting_tools.update_eonr(self)
self.plotting_tools.plot_delta_tstat(level_list=level_list,
style=style)
self.fig_delta_tstat = self.plotting_tools.fig_delta_tstat
[docs] def plot_derivative(self, ci_type='profile-likelihood', ci_level=None,
style='ggplot'):
'''
Plots a zoomed up view of the ONR and the derivative
Parameters:
ci_type (str): Indicates which confidence interval type should be
plotted. Options are 'wald', to plot the Wald
CIs; 'profile-likelihood', to plot the profile-likelihood
CIs; or 'bootstrap', to plot the bootstrap CIs (default:
'profile-likelihood').
ci_level (float): The confidence interval level to be plotted, and
must be one of the values in EONR.ci_list. If None, uses the
EONR.ci_level (default: None).
level (``float``): The confidence levels to plot; should be a
value from EONR.ci_list (default: 0.90).
style (``str``, optional): The style of the plolt; can be any of
the options supported by ``matplotlib``
Example:
Please complete the `EONR.calculate_eonr`_ example first because
this example builds on the results of the ``my_eonr`` object.
>>> my_eonr.plot_derivative()
.. image:: ../img/plot_derivative.png
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
'''
if self.plotting_tools is None:
self.plotting_tools = Plotting_tools(self)
else:
self.plotting_tools.update_eonr(self)
self.plotting_tools.plot_derivative(ci_type=ci_type, ci_level=ci_level,
style=style)
self.fig_derivative = self.plotting_tools.fig_derivative
[docs] def plot_eonr(self, ci_type='profile-likelihood', ci_level=None,
run_n=None, x_min=None, x_max=None, y_min=None, y_max=None,
show_model=True, style='ggplot'):
'''
Plots EONR, MRTN, GRTN, net return, and nitrogen cost.
If left as ``None``, ``x_min``, ``x_max``, ``y_min``, and ``y_max``
are set by ``Matplotlib``.
Parameters:
ci_type (``str``, optional): Indicates which confidence interval
type should be plotted. Options are 'wald', to plot the Wald
CIs; 'profile-likelihood', to plot the profile-likelihood
CIs; or 'bootstrap', to plot the bootstrap CIs (default:
'profile-likelihood').
ci_level (``float``, optional): The confidence interval level to be
plotted, and must be one of the values in EONR.ci_list. If
``None``, uses the
``EONR.ci_level`` (default: None).
run_n (``int``, optional): NOT IMPLEMENTED. The run number to plot,
as indicated in EONR.df_results; if None, uses the most recent,
or maximum, run_n in EONR.df_results (default: None).
x_min (``int``, optional): The minimum x-bounds of the plot
(default: None)
x_max (``int``, optional): The maximum x-bounds of the plot
(default: None)
y_min (``int``, optional): The minimum y-bounds of the plot
(default: None)
y_max (``int``, optional): The maximum y-bounds of the plot
(default: None)
show_model (str): Whether to display the type of fitted model in
the helper legend (default: True).
style (``str``, optional): The style of the plot; can be any of
the options supported by `matplotlib`_ (default: 'ggplot').
Example:
Please complete the `EONR.calculate_eonr`_ example first because
this example builds on the results of the ``my_eonr`` object.
>>> my_eonr.plot_eonr(x_min=-5, x_max=300, y_min=-100, y_max=1400)
.. image:: ../img/plot_eonr.png
.. _matplotlib: https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
'''
if self.plotting_tools is None:
self.plotting_tools = Plotting_tools(self)
else:
self.plotting_tools.update_eonr(self)
self.plotting_tools.plot_eonr(ci_type=ci_type, ci_level=ci_level,
run_n=run_n, x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max, style=style)
self.fig_eonr = self.plotting_tools.fig_eonr
[docs] def plot_modify_size(self, fig=None, plotsize_x=7, plotsize_y=4,
labelsize=11):
'''
Modifies the size of the last plot generated
Parameters:
fig (``Matplotlib Figure``, optional): Matplotlib figure to modify
(default: None)
plotsize_x (``float``, optional): Sets x size of plot in inches
(default: 7)
plotsize_y (``float``, optional): Sets y size of plot in inches
(default: 4)
labelsize (``float``, optional): Sets tick and label
(defaulat: 11)
Example:
Please complete the `EONR.calculate_eonr`_ and
`EONR.plot_eonr`_ examples first because this example builds on
the results of the ``my_eonr.fig_eonr.fig`` object.
>>> my_eonr.plot_modify_size(fig=my_eonr.fig_eonr.fig, plotsize_x=5, plotsize_y=3, labelsize=9)
.. image:: ../img/plot_modify_size.png
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
.. _EONR.plot_eonr: eonr.EONR.html#eonr.EONR.plot_eonr
'''
self.plotting_tools.modify_size(fig=fig, plotsize_x=plotsize_x,
plotsize_y=plotsize_y,
labelsize=labelsize)
[docs] def plot_modify_title(self, title_text, g=None, size_font=12):
'''
Allows user to replace the title text
Parameters:
title_text (``str``): New title text
g (``matplotlib.figure``): Matplotlib figure object to modify
(default: None)
size_font (``float``): Font size to use (default: 12)
Example:
Please complete the `EONR.calculate_eonr`_ and
`EONR.plot_eonr`_ examples first because this example builds on
the results of the ``my_eonr.fig_eonr.fig`` object.
>>> my_eonr.plot_modify_title('Preplant N fertilizer - Stewart, MN 2012', g=my_eonr.fig_eonr.fig, size_font=15)
.. image:: ../img/plot_modify_title.png
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
.. _EONR.plot_eonr: eonr.EONR.html#eonr.EONR.plot_eonr
'''
self.plotting_tools.modify_title(title_text, g=g, size_font=size_font)
[docs] def plot_save(self, fname=None, base_dir=None, fig=None, dpi=300):
'''Saves a generated matplotlib figure to file
Parameters:
fname (``str``, optional): Filename to save plot to (default: None)
base_dir (``str``, optional): Base file directory when saving
results (default: None)
fig (eonr.fig, optional): EONR figure object to save (default:
None)
dpi (``int``, optional): Resolution to save the figure to in dots
per inch (default: 300)
Example:
Please complete the `EONR.calculate_eonr`_ and
`EONR.plot_eonr`_ examples first because this example builds on
the results of the ``my_eonr.fig_eonr.fig`` object.
Set output filename
>>> fname = r'F:\\nigo0024\Downloads\eonr_fig.png'
Save the most recent figure
>>> my_eonr.plot_save(fname)
``fig`` is None, so saving the current (most recent) figure.
>>> os.path.isfile(fname)
True
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
.. _EONR.plot_eonr: eonr.EONR.html#eonr.EONR.plot_eonr
'''
self.plotting_tools.plot_save(fname=fname, base_dir=base_dir, fig=fig,
dpi=dpi)
[docs] def plot_tau(self, y_axis='t_stat', emphasis='profile-likelihood',
run_n=None, style='ggplot'):
'''Plots the test statistic as a function nitrogen rate
Parameters:
y_axis (``str``, optional): Value to plot on the y-axis. Options
are 't_stat', to plot the *T statistic*; 'f_stat', to plot the
*F-statistic*; or 'level', to plot the *confidence level*;
(default: 't_stat').
emphasis (``str``, optional): Indicates which confidence interval
type, if any, should be emphasized. Options are 'wald', to
empahsize the Wald CIs;
'profile-likelihood', to empahsize the profile-likelihood CIs;
'bootstrap', to empahsize the bootstrap CIs; or
``None``, to empahsize no CI (default: 'profile-likelihood').
run_n (``int``, optional): The run number to plot, as indicated in
``EONR.df_ci``; if ``None``, uses the most recent, or maximum,
run_n in ``EONR.df_ci`` (default: None).
style (``str``, optional): The style of the plolt; can be any of
the options supported by ``matplotlib``
Example:
Please complete the `EONR.calculate_eonr`_ example first because
this example builds on the results of the ``my_eonr`` object.
>>> my_eonr.plot_tau()
.. image:: ../img/plot_tau.png
.. _EONR.calculate_eonr: eonr.EONR.html#eonr.EONR.calculate_eonr
'''
if self.plotting_tools is None:
self.plotting_tools = Plotting_tools(self)
else:
self.plotting_tools.update_eonr(self)
self.plotting_tools.plot_tau(y_axis=y_axis, emphasis=emphasis,
run_n=run_n, style=style)
self.fig_tau = self.plotting_tools.fig_tau
[docs] def print_results(self):
'''
Prints the results of the optimum nitrogen rate computation
Example:
Please complete the `EONR.calculate_eonr`_ example first because
this example builds on the results of the ``my_eonr`` object.
>>> my_eonr.print_results()
Economic optimum N rate (EONR): 162.3 kg per ha [130.5, 207.8] (90.0% confidence)
Maximum return to N (MRTN): $767.93 per ha
'''
self._print_results()
[docs] def set_column_names(self, col_n_app=None, col_yld=None, col_crop_nup=None,
col_n_avail=None, col_year=None,
col_location=None, col_time_n=None):
'''
Sets the column name(s) for ``EONR.df_data``
If these descriptions are used as metadata in the input dataset, they
are accessed for plotting purposes. These parameters do not affect the
calculation of the EONR or its confidence intervals in any way.
Parameters:
col_n_app (``str``, optional): Column name pointing to the rate of
applied N fertilizer data (default: None).
col_yld (``str``, optional): Column name pointing to the grain
yield data. This column is multiplied by price_grain to create
the 'grtn' column in ``EONR.df_data`` (default: None).
col_crop_nup (``str``, optional): Column name pointing to crop N
uptake data (default: None).
col_n_avail (``str``, optional): Column name pointing to available
soil N at planting plus fertilizer throughout the season
(default: None).
col_year (``str``, optional): Column name pointing to year
(default: None).
col_location (``str``, optional): Column name pointing to location
(default: None).
col_time_n (``str``, optional): Column name pointing to nitrogen
application timing (default: None).
Example:
Load and initialize ``eonr``
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> my_eonr = EONR(model=None, base_dir=base_dir)
Set the column names using ``EONR.set_column_names``
>>> my_eonr.set_column_names(col_n_app='rate_n_applied_kgha', col_yld='yld_grain_dry_kgha')
>>> print(my_eonr.col_n_app)
>>> print(my_eonr.col_yld)
rate_n_applied_kgha
yld_grain_dry_kgha
'''
if col_n_app is not None:
self.col_n_app = str(col_n_app)
if col_yld is not None:
self.col_yld = str(col_yld)
if col_crop_nup is not None:
self.col_crop_nup = str(col_crop_nup)
if col_n_avail is not None:
self.col_n_avail = str(col_n_avail)
if col_year is not None:
self.col_year = str(col_year)
if col_location is not None:
self.col_location = str(col_location)
if col_time_n is not None:
self.col_time_n = str(col_time_n)
if self.df_data is not None:
self._find_trial_details() # Use new col_name(s) to update details
[docs] def set_units(self, unit_currency=None, unit_fert=None, unit_grain=None,
unit_area=None):
'''
Sets the units data in ``EONR.df_data`` and for reporting
Parameters:
unit_currency (``str``, optional): Currency unit, e.g., "$"
(default: None).
unit_fert (``str``, optional): Fertilizer unit, e.g., "lbs"
(default: None).
unit_grain (``str``, optional): Grain unit, e.g., "bu" (default:
None).
unit_area (``str``, optional): Area unit, e.g., "ac" (default:
None).
Example:
Load and initialize ``eonr``
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> my_eonr = EONR(model=None, base_dir=base_dir)
Set the units using ``EONR.set_units``
>>> my_eonr.set_units(unit_currency='USD', unit_fert='kg', unit_grain='kg', unit_area='ha')
>>> print(my_eonr.unit_currency)
>>> print(my_eonr.unit_fert)
>>> print(my_eonr.unit_grain)
>>> print(my_eonr.unit_area)
USD
kg
kg
ha
'''
if unit_currency is not None:
self.unit_currency = str(unit_currency)
if unit_fert is not None:
self.unit_fert = str(unit_fert)
if unit_grain is not None:
self.unit_grain = str(unit_grain)
if unit_area is not None:
self.unit_area = str(unit_area)
[docs] def set_trial_details(self, year=None, location=None, n_timing=None):
'''
Sets the year, location, or nitrogen timing
If these descriptions are used as metadata in the input dataset, they
are accessed for plotting purposes. These parameters do not affect the
calculation of the EONR or its confidence intervals in any way.
Parameters:
year (``str`` or ``int``, optional): Year of experimental trial
(default: None)
location (``str`` or ``int``, optional): Location of experimental
trial (default: None)
n_timing (``str`` or ``int``, optional): Nitrogen timing of
experimental trial (default: None)
Example:
Load and initialize ``eonr``
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> my_eonr = EONR(model=None, base_dir=base_dir)
Set the trial details using ``EONR.set_trial_details``
>>> my_eonr.set_trial_details(year=2019, location='St. Paul, MN', n_timing='At planting')
>>> print(my_eonr.year)
>>> print(my_eonr.location)
>>> print(my_eonr.n_timing)
2019
St. Paul, MN
At planting
'''
if year is not None:
self.year = int(year)
if location is not None:
self.location = location
if n_timing is not None:
self.n_timing = n_timing
[docs] def update_econ(self, cost_n_fert=None, cost_n_social=None,
costs_fixed=None, price_grain=None):
'''
Sets or resets the nitrogen fertilizer cost, social cost of nitrogen,
fixed costs, and/or grain price.
The price ratio is recomputed based on the passed information, then
the the lowest level folder in the base directory is renamed/adjusted
(``EONR.base_dir``) based on to the price ratio. The folder name is
set according to the economic scenario (useful when running ``EONR``
for many different economic scenarios then plotting and saving results
for each scenario).
Parameters:
cost_n_fert (``float``, optional): Cost of nitrogen fertilizer
(default: None).
cost_n_social (``float``, optional): Cost of pollution caused by
excess nitrogen (default: None).
costs_fixed (float, optional): Fixed costs on a per area basis
(default: None)
price_grain (``float``, optional): Price of grain (default: None).
Example:
Load and initialize ``eonr``
>>> from eonr import EONR
>>> import os
>>> import pandas as pd
>>> base_dir = r'F:\\nigo0024\Documents\GitHub\eonr\eonr'
>>> my_eonr = EONR(model=None, base_dir=base_dir)
Set/update the cost of fertilizer and price of grain using
``EONR.update_econ``
>>> my_eonr.update_econ(cost_n_fert=0.88, price_grain=0.157)
>>> print(my_eonr.price_ratio)
>>> print(my_eonr.base_dir)
5.605095541
F:\\nigo0024\Documents\GitHub\eonr\eonr\trad_5605
Set/update the social cost of nitrogen, again using
``EONR.update_econ``
>>> my_eonr.update_econ(cost_n_social=1.1)
>>> print(my_eonr.price_ratio)
>>> print(my_eonr.base_dir)
12.61146496
F:\\nigo0024\Documents\GitHub\eonr\eonr\social_12611_1100
'''
if cost_n_fert is not None:
self.cost_n_fert = cost_n_fert # in USD per lb
if cost_n_social is not None:
self.cost_n_social = cost_n_social # in USD per lb lost
if costs_fixed is not None:
self.costs_fixed = costs_fixed # in USD per lb lost
if price_grain is not None:
self.price_grain = price_grain # in USD
self.price_ratio = ((self.cost_n_fert + self.cost_n_social) /
self.price_grain)
if self.base_dir is not None:
if self.cost_n_social != 0 and self.metric is False:
join_name = '{0}_{1:.3f}_{2:.3f}'.format(
'social', self.price_ratio, self.cost_n_social)
elif self.cost_n_social != 0 and self.metric is True:
join_name = '{0}_{1:.1f}_{2:.3f}'.format(
'social', self.price_ratio, self.cost_n_social)
elif self.cost_n_social == 0 and self.metric is False:
join_name = '{0}_{1:.3f}'.format('trad', self.price_ratio)
elif self.cost_n_social == 0 and self.metric is True:
join_name = '{0}_{1:.1f}'.format('trad', self.price_ratio)
else:
join_name = '{0}_{1:.3f}'.format('trad', self.price_ratio)
join_name = re.sub(r'[.]', '', join_name)
self.base_dir = os.path.join(os.path.split(self.base_dir)[0],
join_name)
if self.cost_n_social > 0:
self.onr_name = 'Socially'
self.onr_acr = 'SONR'
elif self.cost_n_fert > 0:
self.onr_name = 'Economic'
self.onr_acr = 'EONR'
else:
self.onr_name = 'Agronomic'
self.onr_acr = 'AONR'