PROJECT

Description

Tracking Rental Prices Accurately

Tracking rental prices accurately is essential for renters and investors. This project scrapes rental listings from Rentals.ca and organizes them into an easy-to-analyze dataset. Initially, the average rent calculations were inconsistent due to some listings skewing the results. To ensure accuracy, I used SQL and pandas to clean the data, filter out anomalies, and compute correct averages for each bedroom type.

With this tool, users can:

  • Compare rental prices by city and bedroom count.
  • Identify trends in the Lower Mainland rental market.
  • Access accurate and structured data, verified through SQL queries.

Future Enhancements: Future enhancements may include interactive dashboards and predictive rent analysis using machine learning.

Python Code


from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

# Open websites
firefox_options = Options()
driver = webdriver.Firefox(options=firefox_options)
driver.get('https://rentals.ca/vancouver')

#Accept cookies when opening the website
cookies = driver.find_element(By.XPATH, '//*[@id="ez-cookie-notification__accept"]')
cookies.click()

#Dictionary to save each room number price
bedroom_data = {
    '0 Bedroom': {'prices': []},
    '1 Bedroom': {'prices': []},
    '2 Bedroom': {'prices': []},
    '3 Bedroom': {'prices': []},
    '4 Bedroom': {'prices': []},
    '5 Bedroom': {'prices': []},
    '1.5 Bedroom': {'prices': []},
    '2.5 Bedroom': {'prices': []},
    '3.5 Bedroom': {'prices': []},
}

all_data = []
cities = ["Vancouver", "Burnaby", "Surrey", "Richmond"]
print("Working...")

for city in cities:
    print(f"Locating at {city}")
    driver.get(f"https://rentals.ca/{city}")
    addresses = []
    for press in range(7):  # Loop to go through multiple pages
        list_container = driver.find_elements(By.CSS_SELECTOR, '.listing-card-container.col-12')

        for x in list_container:
            print("Working...")
            x.click()
            time.sleep(0.3)  # Allow detail page to load

            # Get listing details (number of bedrooms and prices)
            info_div = driver.find_elements(By.CLASS_NAME, "floor-plan-group")

            try:
                address_element = driver.find_element(By.CSS_SELECTOR,
                                                        ".listing-overview > div:nth-child(3) > div:nth-child(4) > h2:nth-child(6)")
                address = address_element.text.strip()
            except NoSuchElementException:
                address = ""

            addresses.append(address)

            for n in info_div:
                bed_elem = n.find_element(By.CSS_SELECTOR, 'div.d-flex.align-items-center.justify-content-between')
                bed_text = bed_elem.text.strip()  # Remove extra spaces

                clean_bed = bed_text.split("\n")[0]

                # Find price (adjust the selector based on the element's structure)
                price_elem = n.find_element(By.CSS_SELECTOR, "li.unit-details__infos--price")

                bedroom_data[clean_bed]['prices'].append(price_elem.text)

            time.sleep(1.5)  # Give time for page to load before going back
            back = driver.find_element(By.CSS_SELECTOR, ".listing-overview__close")
            back.click()

        # Navigate to next page
        next_button = driver.find_element(By.CSS_SELECTOR, "ul.pagination li.pagination__item a")
        next_button.click()

    # Append all data after collecting for each city
    for n in range(len(bedroom_data['1 Bedroom']['prices'])):
        listings = {
            "City": city,
            "Address": addresses[n] if n < len(addresses) else None,
            "0 Bedroom": bedroom_data['0 Bedroom']['prices'][n] if '0 Bedroom' in bedroom_data and 'prices' in bedroom_data['0 Bedroom'] and n < len(bedroom_data['0 Bedroom']['prices']) else None,
            "1 Bedroom": bedroom_data['1 Bedroom']['prices'][n] if n < len(bedroom_data['1 Bedroom']['prices']) else None,
            "2 Bedroom": bedroom_data['2 Bedroom']['prices'][n] if n < len(bedroom_data['2 Bedroom']['prices']) else None,
            "3 Bedroom": bedroom_data['3 Bedroom']['prices'][n] if n < len(bedroom_data['3 Bedroom']['prices']) else None,
            "4 Bedroom": bedroom_data['4 Bedroom']['prices'][n] if n < len(bedroom_data['4 Bedroom']['prices']) else None,
            "5 Bedroom": bedroom_data['5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
            "1.5 Bedroom": bedroom_data['1.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
            "2.5 Bedroom": bedroom_data['2.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None,
            "3.5 Bedroom": bedroom_data['3.5 Bedroom']['prices'][n] if n < len(bedroom_data['5 Bedroom']['prices']) else None
        }
        all_data.append(listings)

# Save data to Excel
df = pd.DataFrame(all_data)
df.to_excel('rent_info.xlsx', index=False)
print("Done")
driver.quit()
                            

SQL

                    

-- Create new table and make a copy of the unclean table
CREATE TABLE rent
LIKE unclean_prices;

INSERT rent 
SELECT * 
FROM unclean_prices;

-- Clean prices by deleting $ sings, commands and white spaces 
UPDATE rent
SET `0 Bedroom` = REPLACE(REPLACE(`0 Bedroom`, '$', ''), ',', '');

UPDATE rent
SET `1 Bedroom` = REPLACE(REPLACE(`1 Bedroom`, '$', ''), ',', '');

UPDATE rent
SET `2 Bedroom` = REPLACE(REPLACE(`2 Bedroom`, '$', ''), ',', '');

UPDATE rent
SET `3 Bedroom` = REPLACE(REPLACE(`3 Bedroom`, '$', ''), ',', '');

UPDATE rent 
SET `0 Bedroom` = TRIM(`0 Bedroom`);

UPDATE rent 
SET `1 Bedroom` = TRIM(`1 Bedroom`);

UPDATE rent 
SET `2 Bedroom` = TRIM(`2 Bedroom`);

UPDATE rent 
SET `3 Bedroom` = TRIM(`3 Bedroom`);

-- -----------------------------------------------------------------
-- Add a key column
ALTER TABLE rent ADD COLUMN id INT AUTO_INCREMENT PRIMARY KEY;

-- Set all empty spaces to null so we can switch the bedroom columns to ints

UPDATE rent 
SET `0 Bedroom` = NULL
WHERE `0 Bedroom` = '';

UPDATE rent 
SET `1 Bedroom` = NULL
WHERE `1 Bedroom` = '';

UPDATE rent 
SET `2 Bedroom` = NULL
WHERE `2 Bedroom` = '';

UPDATE rent 
SET `3 Bedroom` = NULL
WHERE `3 Bedroom` = '';

-- Modify columns from text to float
ALTER TABLE rent MODIFY COLUMN `0 Bedroom` FLOAT;

ALTER TABLE rent MODIFY COLUMN `1 Bedroom` FLOAT;

ALTER TABLE rent MODIFY COLUMN `2 Bedroom` FLOAT;

ALTER TABLE rent MODIFY COLUMN `3 Bedroom` FLOAT;


-- Tableau average was not the same as SQL average so I compared averages from both tools
SELECT COUNT(*)
FROM rent;

SELECT AVG(`2 Bedroom`)
FROM rent
WHERE CITY LIKE '%Van%';

SELECT City,AVG(`0 Bedroom`),AVG(`1 Bedroom`),AVG(`2 Bedroom`), AVG(`3 Bedroom`)
FROM rent 
GROUP BY `City`;

SELECT * 
FROM rent
WHERE `0 Bedroom` IS NULL 
OR `1 Bedroom` IS NULL
OR `2 Bedroom` IS NULL 
OR `3 Bedroom` IS NULL                        

                        

Documentation

  • No documentation for this project