start_date = ['2019-06-03', '2019-06-13', '2018-11-05', '2019-05-31', '2019-06-01', '2019-09-01']
end_date = ['2019-08-31', '2019-08-23', '2018-11-25', '2019-07-1', '2019-07-31', '2019-10-25']
data = pd.DataFrame(list(zip(start_date,end_date)), columns = ['Start Date', 'End Date'])
# Store data index as a new column which will act as a primary key will be used later and initiate a blank new dataframe
data['Key'] = data.reset_index(drop = True).index
final = pd.DataFrame()
# Start the loop which will iterate on each particular combination of start data and end date -
for i in range(0,len(data)):
start_date = data['Start Date'][i]
end_date = data['End Date'][i]
print("-----------------------------------------------------------------------------")
print("Start Date is {0}, End Date is {1}".format(start_date, end_date))
print("-----------------------------------------------------------------------------")
# If both dates fall in same month and year, then the date range is the same ( no consecutive months exists), check for same and proceed if the any of these are not same
# Store Month start date falling between two dates in a new dataframe object
# Store Month end dates falling between two dates
# If first month start date is not equal to the start date of the date range, then we need to shift the index by one to accommodate for the enddate of the first month
if ((pd.to_datetime(start_date).month != pd.to_datetime(end_date).month) | (pd.to_datetime(start_date).year != pd.to_datetime(end_date).year)):
month_startdates = pd.DataFrame(pd.date_range(start_date, end_date, freq = 'MS'), columns=['Month_StartDate'])
month_enddates = pd.DataFrame(pd.date_range(start_date, end_date, freq = 'M'), columns = ['Month_EndDate'])
print("Ranges of months in the above date range : \n {0} \n {1}".format(month_startdates, month_enddates))
if not pd.to_datetime(start_date).month == pd.to_datetime(month_startdates['Month_StartDate'][0]).month:
month_startdates.index = range(1,len(month_startdates)+1)
df = pd.concat([month_startdates,month_enddates], axis = 1)
print("\n After adjusting for indexes : \n {0} ".format(df))
df['Key'] = i
final = final.append(df).reset_index(drop = True)
print("\n Map it to orignal index, so that it can be merged with oringal DF : \n {0}".format(df))
else:
# If month and year of both start date and end date are same, simply, create the record with the same
final.append({'Month_StartDate':pd.to_datetime(start_date),
'Month_EndDate': pd.to_datetime(end_date),
'Key': i}, ignore_index =True)
-----------------------------------------------------------------------------
Start Date is 2019-06-13, End Date is 2019-08-23
-----------------------------------------------------------------------------
Ranges of months in the above date range :
Month_StartDate
0 2019-07-01
1 2019-08-01
Month_EndDate
0 2019-06-30
1 2019-07-31
After adjusting for indexes :
Month_StartDate Month_EndDate
0 NaT 2019-06-30
1 2019-07-01 2019-07-31
2 2019-08-01 NaT
Map it to orignal index, so that it can be merged with oringal DF :
Month_StartDate Month_EndDate Key
0 NaT 2019-06-30 1
1 2019-07-01 2019-07-31 1
2 2019-08-01 NaT 1
-----------------------------------------------------------------------------
Start Date is 2019-05-31, End Date is 2019-07-1
-----------------------------------------------------------------------------
Ranges of months in the above date range :
Month_StartDate
0 2019-06-01
1 2019-07-01
Month_EndDate
0 2019-05-31
1 2019-06-30
After adjusting for indexes :
Month_StartDate Month_EndDate
0 NaT 2019-05-31
1 2019-06-01 2019-06-30
2 2019-07-01 NaT
Map it to orignal index, so that it can be merged with oringal DF :
Month_StartDate Month_EndDate Key
0 NaT 2019-05-31 3
1 2019-06-01 2019-06-30 3
2 2019-07-01 NaT 3