<pre><code>#read in data
df = pd.read_csv(StringIO(data),sep='\s{2,}', engine='python')
#give index a name
df.index.name = 'Date'
#convert to datetime
#and sort index
#usually safer to sort datetime index in Pandas
df.index = pd.to_datetime(df.index)
df = df.sort_index()
res = (df
#group by date and c/ic
.groupby([pd.Grouper(freq='1D',level='Date'),"C/IC"])
.size()
#get rows greater than 2 and incorrect
.loc[lambda x: x>2,"incorrect"]
#keep only the date index
.droplevel(-1)
.index
#datetime information trapped here
#and due to grouping, it is different from initial datetime
#as such, we convert to string
#and build another batch of dates
.astype(str)
.tolist()
)
res
['2019-05-17', '2019-05-20']
#build a numpy array of dates
idx = np.array(res, dtype='datetime64')
#exclude dates in idx and get final value
#aim is to get dates, irrespective of time
df.loc[~np.isin(df.index.date,idx)]
t_value C/IC
Date
2019-05-18 01:00:00 0 incorrect
2019-05-18 02:00:00 6 correct
2019-05-18 03:00:00 7 correct
2019-05-19 04:00:00 0 incorrect
2019-05-19 09:00:00 0 incorrect
2019-05-19 11:00:00 8 correct
</code></pre>