import pandas as pd
# Read csv data from file
wideCrimesDF = pd.read_csv('Crime.csv')
wideCrimesDF.shape
wideCrimesDF.head()
Offence State 2000 2001 2002 2003 ...
0 Homicide NSW 262 313 256 233 ...
1 Assault NSW 68,714 75,461 80,028 79,890 ...
2 Sexual assault NSW 5,975 6,268 6,477 6,799 ...
3 Kidnapping NSW 385 470 435 421 ...
4 Robbery NSW 13,328 15,234 11,707 10,849 ...
5 rows × 25 columns
# Convert data to long format
crimesDF = pd.melt(wideCrimesDF, id_vars = ['Offence','State'], var_name = 'Year', value_name = 'Count')
crimesDF.head()
Offence State Year Count
0 Homicide NSW 2000 262
1 Assault NSW 2000 68,714
2 Sexual assault NSW 2000 5,975
3 Kidnapping NSW 2000 385
4 Robbery NSW 2000 13,328
crimesDF.shape
crimesDF.info()
RangeIndex: 1656 entries, 0 to 1655
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Offence 1656 non-null object
1 State 1656 non-null object
2 Year 1656 non-null int64
3 Count 1607 non-null float64
dtypes: float64(1), int64(1), object(2)
memory usage: 51.9+ KB
## FILTERS ##
# Filter for columns in a dataframe
crimesDF[['State', 'Offence']]
State Offence
0 NSW Homicide
1 NSW Assault
2 NSW Sexual assault
3 NSW Kidnapping
4 NSW Robbery
... ... ...
1651 ACT Robbery
1652 ACT Blackmail
1653 ACT Unlawful entry into property
1654 ACT Motor vehicle theft
1655 ACT Other theft
1656 rows × 2 columns
# Filter dataframe by values in more than one column
crimesDF[(crimesDF.State == 'NSW') & (crimesDF.Year == 2022)]
Offence State Year Count
1584 Homicide NSW 2022 79.0
1585 Assault NSW 2022 69493.0
1586 Sexual assault NSW 2022 12412.0
1587 Kidnapping NSW 2022 216.0
1588 Robbery NSW 2022 2087.0
1589 Blackmail NSW 2022 845.0
1590 Unlawful entry into property NSW 2022 30934.0
1591 Motor vehicle theft NSW 2022 9909.0
1592 Other theft NSW 2022 103714.0
# Filter columns by value in another column
crimesDF[['Offence', 'Count', 'State']][crimesDF.State == 'NSW']
Offence Count State
0 Homicide 262.0 NSW
1 Assault 68714.0 NSW
2 Sexual assault 5975.0 NSW
3 Kidnapping 385.0 NSW
4 Robbery 13328.0 NSW
... ... ... ...
1588 Robbery 2087.0 NSW
1589 Blackmail 845.0 NSW
1590 Unlawful entry into property 30934.0 NSW
1591 Motor vehicle theft 9909.0 NSW
1592 Other theft 103714.0 NSW
207 rows × 3 columns
# Filter columns by values in more than one column
crimesDF[['Offence','Count','State']][(crimesDF.State == 'NSW') & (crimesDF.Year == 2022)]
Offence Count State
1584 Homicide 79.0 NSW
1585 Assault 69493.0 NSW
1586 Sexual assault 12412.0 NSW
1587 Kidnapping 216.0 NSW
1588 Robbery 2087.0 NSW
1589 Blackmail 845.0 NSW
1590 Unlawful entry into property 30934.0 NSW
1591 Motor vehicle theft 9909.0 NSW
1592 Other theft 103714.0 NSW
# Filter using a list
crimesDF[['Offence','Count', 'State']][crimesDF.State.isin(['NSW','WA'])]
Offence Count State
0 Homicide 262.0 NSW
1 Assault 68714.0 NSW
2 Sexual assault 5975.0 NSW
3 Kidnapping 385.0 NSW
4 Robbery 13328.0 NSW
... ... ... ...
1624 Robbery 1236.0 WA
1625 Blackmail 215.0 WA
1626 Unlawful entry into property 22059.0 WA
1627 Motor vehicle theft 6718.0 WA
1628 Other theft 71852.0 WA
414 rows × 3 columns
# Filter using a list and a condition
crimesDF[['Offence','Count', 'State']][(crimesDF.State.isin(['NSW','WA'])) & (crimesDF.Year == 2022)]
Offence Count State
1584 Homicide 79.0 NSW
1585 Assault 69493.0 NSW
1586 Sexual assault 12412.0 NSW
1587 Kidnapping 216.0 NSW
1588 Robbery 2087.0 NSW
1589 Blackmail 845.0 NSW
1590 Unlawful entry into property 30934.0 NSW
1591 Motor vehicle theft 9909.0 NSW
1592 Other theft 103714.0 NSW
1620 Homicide 49.0 WA
1621 Assault 38743.0 WA
1622 Sexual assault 3158.0 WA
1623 Kidnapping 24.0 WA
1624 Robbery 1236.0 WA
1625 Blackmail 215.0 WA
1626 Unlawful entry into property 22059.0 WA
1627 Motor vehicle theft 6718.0 WA
1628 Other theft 71852.0 WA
# Filter for data containing a string
crimesDF['State'][crimesDF.State.str.contains('W')]
0 NSW
1 NSW
2 NSW
3 NSW
4 NSW
...
1624 WA
1625 WA
1626 WA
1627 WA
1628 WA
Name: State, Length: 414, dtype: object