Python Pandas - Filtering data

import pandas as pd

# Read csv data from file
wideCrimesDF = pd.read_csv('Crime.csv')
wideCrimesDF.shape
(72,25)

wideCrimesDF.head()
Offence State 2000 2001 2002 2003 ... 0 Homicide NSW 262 313 256 233 ... 1 Assault NSW 68,714 75,461 80,028 79,890 ... 2 Sexual assault NSW 5,975 6,268 6,477 6,799 ... 3 Kidnapping NSW 385 470 435 421 ... 4 Robbery NSW 13,328 15,234 11,707 10,849 ... 5 rows × 25 columns


# Convert data to long format
crimesDF = pd.melt(wideCrimesDF, id_vars = ['Offence','State'], var_name = 'Year', value_name = 'Count')

crimesDF.head()
Offence State Year Count 0 Homicide NSW 2000 262 1 Assault NSW 2000 68,714 2 Sexual assault NSW 2000 5,975 3 Kidnapping NSW 2000 385 4 Robbery NSW 2000 13,328


crimesDF.shape
(1656, 4)

crimesDF.info()
RangeIndex: 1656 entries, 0 to 1655 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Offence 1656 non-null object 1 State 1656 non-null object 2 Year 1656 non-null int64 3 Count 1607 non-null float64 dtypes: float64(1), int64(1), object(2) memory usage: 51.9+ KB


## FILTERS ##
# Filter for columns in a dataframe
crimesDF[['State', 'Offence']]
State Offence 0 NSW Homicide 1 NSW Assault 2 NSW Sexual assault 3 NSW Kidnapping 4 NSW Robbery ... ... ... 1651 ACT Robbery 1652 ACT Blackmail 1653 ACT Unlawful entry into property 1654 ACT Motor vehicle theft 1655 ACT Other theft 1656 rows × 2 columns

# Filter dataframe by values in more than one column
crimesDF[(crimesDF.State == 'NSW') & (crimesDF.Year == 2022)]
Offence State Year Count 1584 Homicide NSW 2022 79.0 1585 Assault NSW 2022 69493.0 1586 Sexual assault NSW 2022 12412.0 1587 Kidnapping NSW 2022 216.0 1588 Robbery NSW 2022 2087.0 1589 Blackmail NSW 2022 845.0 1590 Unlawful entry into property NSW 2022 30934.0 1591 Motor vehicle theft NSW 2022 9909.0 1592 Other theft NSW 2022 103714.0

# Filter columns by value in another column
crimesDF[['Offence', 'Count', 'State']][crimesDF.State == 'NSW']	
Offence Count State 0 Homicide 262.0 NSW 1 Assault 68714.0 NSW 2 Sexual assault 5975.0 NSW 3 Kidnapping 385.0 NSW 4 Robbery 13328.0 NSW ... ... ... ... 1588 Robbery 2087.0 NSW 1589 Blackmail 845.0 NSW 1590 Unlawful entry into property 30934.0 NSW 1591 Motor vehicle theft 9909.0 NSW 1592 Other theft 103714.0 NSW 207 rows × 3 columns

# Filter columns by values in more than one column
crimesDF[['Offence','Count','State']][(crimesDF.State == 'NSW') & (crimesDF.Year == 2022)]	
Offence Count State 1584 Homicide 79.0 NSW 1585 Assault 69493.0 NSW 1586 Sexual assault 12412.0 NSW 1587 Kidnapping 216.0 NSW 1588 Robbery 2087.0 NSW 1589 Blackmail 845.0 NSW 1590 Unlawful entry into property 30934.0 NSW 1591 Motor vehicle theft 9909.0 NSW 1592 Other theft 103714.0 NSW

# Filter using a list
crimesDF[['Offence','Count', 'State']][crimesDF.State.isin(['NSW','WA'])]	
Offence Count State 0 Homicide 262.0 NSW 1 Assault 68714.0 NSW 2 Sexual assault 5975.0 NSW 3 Kidnapping 385.0 NSW 4 Robbery 13328.0 NSW ... ... ... ... 1624 Robbery 1236.0 WA 1625 Blackmail 215.0 WA 1626 Unlawful entry into property 22059.0 WA 1627 Motor vehicle theft 6718.0 WA 1628 Other theft 71852.0 WA 414 rows × 3 columns

# Filter using a list and a condition
crimesDF[['Offence','Count', 'State']][(crimesDF.State.isin(['NSW','WA'])) & (crimesDF.Year == 2022)]	
Offence Count State 1584 Homicide 79.0 NSW 1585 Assault 69493.0 NSW 1586 Sexual assault 12412.0 NSW 1587 Kidnapping 216.0 NSW 1588 Robbery 2087.0 NSW 1589 Blackmail 845.0 NSW 1590 Unlawful entry into property 30934.0 NSW 1591 Motor vehicle theft 9909.0 NSW 1592 Other theft 103714.0 NSW 1620 Homicide 49.0 WA 1621 Assault 38743.0 WA 1622 Sexual assault 3158.0 WA 1623 Kidnapping 24.0 WA 1624 Robbery 1236.0 WA 1625 Blackmail 215.0 WA 1626 Unlawful entry into property 22059.0 WA 1627 Motor vehicle theft 6718.0 WA 1628 Other theft 71852.0 WA

# Filter for data containing a string
crimesDF['State'][crimesDF.State.str.contains('W')]
0 NSW 1 NSW 2 NSW 3 NSW 4 NSW ... 1624 WA 1625 WA 1626 WA 1627 WA 1628 WA Name: State, Length: 414, dtype: object