Tracking Rental Prices Accurately
Tracking rental prices accurately is essential for renters and investors. This project scrapes rental listings from Rentals.ca and organizes them into an easy-to-analyze dataset. Initially, the average rent calculations were inconsistent due to some listings skewing the results. To ensure accuracy, I used SQL and pandas to clean the data, filter out anomalies, and compute correct averages for each bedroom type.
With this tool, users can:
Future Enhancements: Future enhancements may include interactive dashboards and predictive rent analysis using machine learning.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
# Open websites
firefox_options = Options()
driver = webdriver.Firefox(options=firefox_options)
driver.get('https://rentals.ca/vancouver')
#Accept cookies when opening the website
cookies = driver.find_element(By.XPATH, '//*[@id="ez-cookie-notification__accept"]')
cookies.click()
#Dictionary to save each room number price
bedroom_data = {
'0 Bedroom': {'prices': []},
'1 Bedroom': {'prices': []},
'2 Bedroom': {'prices': []},
'3 Bedroom': {'prices': []},
'4 Bedroom': {'prices': []},
'5 Bedroom': {'prices': []},
'1.5 Bedroom': {'prices': []},
'2.5 Bedroom': {'prices': []},
'3.5 Bedroom': {'prices': []},
}
all_data = []
cities = ["Vancouver", "Burnaby", "Surrey", "Richmond"]
print("Working...")
for city in cities:
print(f"Locating at {city}")
driver.get(f"https://rentals.ca/{city}")
addresses = []
for press in range(7): # Loop to go through multiple pages
list_container = driver.find_elements(By.CSS_SELECTOR, '.listing-card-container.col-12')
for x in list_container:
print("Working...")
x.click()
time.sleep(0.3) # Allow detail page to load
# Get listing details (number of bedrooms and prices)
info_div = driver.find_elements(By.CLASS_NAME, "floor-plan-group")
try:
address_element = driver.find_element(By.CSS_SELECTOR,
".listing-overview > div:nth-child(3) > div:nth-child(4) > h2:nth-child(6)")
address = address_element.text.strip()
except NoSuchElementException:
address = ""
addresses.append(address)
for n in info_div:
bed_elem = n.find_element(By.CSS_SELECTOR, 'div.d-flex.align-items-center.justify-content-between')
bed_text = bed_elem.text.strip() # Remove extra spaces
clean_bed = bed_text.split("\n")[0]
# Find price (adjust the selector based on the element's structure)
price_elem = n.find_element(By.CSS_SELECTOR, "li.unit-details__infos--price")
bedroom_data[clean_bed]['prices'].append(price_elem.text)
time.sleep(1.5) # Give time for page to load before going back
back = driver.find_element(By.CSS_SELECTOR, ".listing-overview__close")
back.click()
# Navigate to next page
next_button = driver.find_element(By.CSS_SELECTOR, "ul.pagination li.pagination__item a")
next_button.click()
# Append all data after collecting for each city
for n in range(len(bedroom_data['1 Bedroom']['prices'])):
listings = {
"City": city,
"Address": addresses[n] if n < len(addresses) else None,
"0 Bedroom": bedroom_data['0 Bedroom']['prices'][n] if '0 Bedroom' in bedroom_data and 'prices' in bedroom_data['0 Bedroom'] and n < len(bedroom_data['0 Bedroom']['prices']) else None,
"1 Bedroom": bedroom_data['1 Bedroom']['prices'][n] if n < len(bedroom_data['1 Bedroom']['prices']) else None,
"2 Bedroom": bedroom_data['2 Bedroom']['prices'][n] if n < len(bedroom_data['2 Bedroom']['prices']) else None,
"3 Bedroom": bedroom_data['3 Bedroom']['prices'][n] if n < len(bedroom_data['3 Bedroom']['prices']) else None,
"4 Bedroom": bedroom_data['4 Bedroom']['prices'][n] if n < len(bedroom_data['4 Bedroom']['prices']) else None,
"5 Bedroom": bedroom_data['5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
"1.5 Bedroom": bedroom_data['1.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
"2.5 Bedroom": bedroom_data['2.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
"3.5 Bedroom": bedroom_data['3.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None
}
all_data.append(listings)
# Save data to Excel
df = pd.DataFrame(all_data)
df.to_excel('rent_info.xlsx', index=False)
print("Done")
driver.quit()
-- Create new table and make a copy of the unclean table
CREATE TABLE rent
LIKE unclean_prices;
INSERT rent
SELECT *
FROM unclean_prices;
-- Clean prices by deleting $ sings, commands and white spaces
UPDATE rent
SET `0 Bedroom` = REPLACE(REPLACE(`0 Bedroom`, '$', ''), ',', '');
UPDATE rent
SET `1 Bedroom` = REPLACE(REPLACE(`1 Bedroom`, '$', ''), ',', '');
UPDATE rent
SET `2 Bedroom` = REPLACE(REPLACE(`2 Bedroom`, '$', ''), ',', '');
UPDATE rent
SET `3 Bedroom` = REPLACE(REPLACE(`3 Bedroom`, '$', ''), ',', '');
UPDATE rent
SET `0 Bedroom` = TRIM(`0 Bedroom`);
UPDATE rent
SET `1 Bedroom` = TRIM(`1 Bedroom`);
UPDATE rent
SET `2 Bedroom` = TRIM(`2 Bedroom`);
UPDATE rent
SET `3 Bedroom` = TRIM(`3 Bedroom`);
-- -----------------------------------------------------------------
-- Add a key column
ALTER TABLE rent ADD COLUMN id INT AUTO_INCREMENT PRIMARY KEY;
-- Set all empty spaces to null so we can switch the bedroom columns to ints
UPDATE rent
SET `0 Bedroom` = NULL
WHERE `0 Bedroom` = '';
UPDATE rent
SET `1 Bedroom` = NULL
WHERE `1 Bedroom` = '';
UPDATE rent
SET `2 Bedroom` = NULL
WHERE `2 Bedroom` = '';
UPDATE rent
SET `3 Bedroom` = NULL
WHERE `3 Bedroom` = '';
-- Modify columns from text to float
ALTER TABLE rent MODIFY COLUMN `0 Bedroom` FLOAT;
ALTER TABLE rent MODIFY COLUMN `1 Bedroom` FLOAT;
ALTER TABLE rent MODIFY COLUMN `2 Bedroom` FLOAT;
ALTER TABLE rent MODIFY COLUMN `3 Bedroom` FLOAT;
-- Tableau average was not the same as SQL average so I compared averages from both tools
SELECT COUNT(*)
FROM rent;
SELECT AVG(`2 Bedroom`)
FROM rent
WHERE CITY LIKE '%Van%';
SELECT City,AVG(`0 Bedroom`),AVG(`1 Bedroom`),AVG(`2 Bedroom`), AVG(`3 Bedroom`)
FROM rent
GROUP BY `City`;
SELECT *
FROM rent
WHERE `0 Bedroom` IS NULL
OR `1 Bedroom` IS NULL
OR `2 Bedroom` IS NULL
OR `3 Bedroom` IS NULL