import numpy as np
import pandas as pd
float_data = pd.Series([1.2, -3.5, np.nan, 0])
float_data0 1.2
1 -3.5
2 NaN
3 0.0
dtype: float64
import numpy as np
import pandas as pd
float_data = pd.Series([1.2, -3.5, np.nan, 0])
float_data0 1.2
1 -3.5
2 NaN
3 0.0
dtype: float64
float_data.isna()0 False
1 False
2 True
3 False
dtype: bool
string_data = pd.Series(["aardvark", np.nan, None, "avocado"])
string_data0 aardvark
1 NaN
2 None
3 avocado
dtype: object
string_data.isna()0 False
1 True
2 True
3 False
dtype: bool
float_data = pd.Series([1, 2, None], dtype='float64')
float_data0 1.0
1 2.0
2 NaN
dtype: float64
float_data.isna()0 False
1 False
2 True
dtype: bool
data = pd.Series([1, np.nan, 3.5, np.nan, 7])
data0 1.0
1 NaN
2 3.5
3 NaN
4 7.0
dtype: float64
data.dropna()0 1.0
2 3.5
4 7.0
dtype: float64
data = pd.Series([1, np.nan, 3.5, np.nan, 7])
data0 1.0
1 NaN
2 3.5
3 NaN
4 7.0
dtype: float64
data[data.notna()]0 1.0
2 3.5
4 7.0
dtype: float64
data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])
data 0 1 2
0 1.0 6.5 3.0
1 1.0 NaN NaN
2 NaN NaN NaN
3 NaN 6.5 3.0
data.dropna() 0 1 2
0 1.0 6.5 3.0
data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])
data 0 1 2
0 1.0 6.5 3.0
1 1.0 NaN NaN
2 NaN NaN NaN
3 NaN 6.5 3.0
data.dropna(how="all") 0 1 2
0 1.0 6.5 3.0
1 1.0 NaN NaN
3 NaN 6.5 3.0
df = pd.DataFrame(np.random.standard_normal((7, 3)))
df 0 1 2
0 0.360151 0.722188 -0.281666
1 -1.552161 0.397278 -0.739955
2 -0.254815 0.410693 -0.602461
3 0.708214 -0.420772 -1.016734
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df.iloc[:4, 1] = np.nan
df.iloc[:2, 2] = np.nan
df 0 1 2
0 0.360151 NaN NaN
1 -1.552161 NaN NaN
2 -0.254815 NaN -0.602461
3 0.708214 NaN -1.016734
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df.dropna() 0 1 2
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df.dropna(thresh=2) 0 1 2
2 -0.254815 NaN -0.602461
3 0.708214 NaN -1.016734
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df.fillna(0) 0 1 2
0 0.360151 0.000000 0.000000
1 -1.552161 0.000000 0.000000
2 -0.254815 0.000000 -0.602461
3 0.708214 0.000000 -1.016734
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df.fillna({1: 0.5, 2: 0}) 0 1 2
0 0.360151 0.500000 0.000000
1 -1.552161 0.500000 0.000000
2 -0.254815 0.500000 -0.602461
3 0.708214 0.500000 -1.016734
4 -0.807647 -0.482419 -1.228014
5 1.645039 1.722498 -0.467191
6 0.870788 0.311962 -0.009629
df = pd.DataFrame(np.random.standard_normal((6, 3)))
df.iloc[2:, 1] = np.nan
df.iloc[4:, 2] = np.nan
df 0 1 2
0 -1.139638 0.509662 1.349670
1 -0.373828 -0.521660 -0.321083
2 0.543520 NaN 0.721912
3 0.111382 NaN -1.103382
4 0.293745 NaN NaN
5 0.628866 NaN NaN
df.fillna(method="ffill") 0 1 2
0 -1.139638 0.509662 1.349670
1 -0.373828 -0.521660 -0.321083
2 0.543520 -0.521660 0.721912
3 0.111382 -0.521660 -1.103382
4 0.293745 -0.521660 -1.103382
5 0.628866 -0.521660 -1.103382
df.fillna(method="ffill", limit=2) 0 1 2
0 -1.139638 0.509662 1.349670
1 -0.373828 -0.521660 -0.321083
2 0.543520 -0.521660 0.721912
3 0.111382 -0.521660 -1.103382
4 0.293745 NaN -1.103382
5 0.628866 NaN -1.103382
data = pd.Series([1., np.nan, 3.5, np.nan, 7])
data.fillna(data.mean())0 1.000000
1 3.833333
2 3.500000
3 3.833333
4 7.000000
dtype: float64
data = pd.DataFrame({"k1": ["one", "two"] * 3 + ["two"], "k2": [1, 1, 2, 3, 3, 4, 4]})
data k1 k2
0 one 1
1 two 1
2 one 2
3 two 3
4 one 3
5 two 4
6 two 4
data.duplicated()0 False
1 False
2 False
3 False
4 False
5 False
6 True
dtype: bool
data.drop_duplicates() k1 k2
0 one 1
1 two 1
2 one 2
3 two 3
4 one 3
5 two 4
data["v1"] = range(7)
data k1 k2 v1
0 one 1 0
1 two 1 1
2 one 2 2
3 two 3 3
4 one 3 4
5 two 4 5
6 two 4 6
data.drop_duplicates(subset=["k1"]) k1 k2 v1
0 one 1 0
1 two 1 1
data.drop_duplicates(["k1", "k2"], keep="last") k1 k2 v1
0 one 1 0
1 two 1 1
2 one 2 2
3 two 3 3
4 one 3 4
6 two 4 6