Initial commit

This commit is contained in:
Dennis Thiessen
2018-09-12 21:33:44 +02:00
commit 2ace4c3044
8 changed files with 661 additions and 0 deletions

11
.idea/ImoBot.iml generated Normal file
View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>

4
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ImoBot.iml" filepath="$PROJECT_DIR$/.idea/ImoBot.iml" />
</modules>
</component>
</project>

244
.idea/workspace.xml generated Normal file
View File

@@ -0,0 +1,244 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="784f4e67-6877-44d9-879c-93318a68f68b" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FUSProjectUsageTrigger">
<session id="462988823">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.closed" value="13" />
<entry key="project.open.time.1" value="1" />
<entry key="project.open.time.3" value="8" />
<entry key="project.open.time.4" value="4" />
<entry key="project.open.time.6" value="1" />
<entry key="project.opened" value="14" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="dummy" value="10" />
<entry key="py" value="7571" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="PLAIN_TEXT" value="10" />
<entry key="Python" value="7571" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="log" value="1" />
<entry key="php" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="PLAIN_TEXT" value="2" />
</counts>
</usages-collector>
</session>
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="146">
<caret line="143" column="92" lean-forward="true" selection-start-line="143" selection-start-column="92" selection-end-line="143" selection-end-column="92" />
<folding>
<element signature="e#44#53#0" expanded="true" />
<marker date="1536774843817" expanded="true" signature="2613:2969" ph="..." />
<marker date="1536774843817" expanded="true" signature="4193:4343" ph="..." />
<marker date="1536774843817" expanded="true" signature="4488:4493" ph="..." />
<marker date="1536774843817" expanded="true" signature="8295:8300" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/output/history.log">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="16" column="63" selection-start-line="16" selection-start-column="63" selection-end-line="16" selection-end-column="63" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>history.lo</find>
<find>_output</find>
<find>scrape_page</find>
</findStrings>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/ImoBot.py" />
<option value="$PROJECT_DIR$/main.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="-88" />
<option name="y" value="312" />
<option name="width" value="2141" />
<option name="height" value="993" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
<item name="driver" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
<item name="output" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$/ImoBot.py" />
</component>
<component name="RecentsManager">
<key name="MoveFile.RECENT_KEYS">
<recent name="H:\Workspace\Projects\ImoBot\web" />
<recent name="H:\Workspace\Projects\ImoBot\output" />
<recent name="H:\Workspace\Projects\ImoBot\driver" />
</key>
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration name="Run" type="PythonConfigurationType" factoryName="Python">
<module name="ImoBot" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="C:\Users\denni\AppData\Local\Programs\Python\Python37-32\python.exe" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="H:\Workspace\Projects\ImoBot\main.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="784f4e67-6877-44d9-879c-93318a68f68b" name="Default Changelist" comment="" />
<created>1534264450607</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1534264450607</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="2576" height="1416" extended-state="6" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.13396375" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" weight="0.3299532" />
<window_info anchor="bottom" id="Run" order="2" weight="0.3299532" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="Terminal" order="8" />
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
<window_info anchor="bottom" id="Python Console" order="10" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="UnknownFeatures">
<option featureType="com.intellij.fileTypeFactory" implementationName="*.log" />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="editorHistoryManager">
<entry file="file://$USER_HOME$/Desktop/index.php" />
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="146">
<caret line="143" column="92" lean-forward="true" selection-start-line="143" selection-start-column="92" selection-end-line="143" selection-end-column="92" />
<folding>
<element signature="e#44#53#0" expanded="true" />
<marker date="1536774843817" expanded="true" signature="2613:2969" ph="..." />
<marker date="1536774843817" expanded="true" signature="4193:4343" ph="..." />
<marker date="1536774843817" expanded="true" signature="4488:4493" ph="..." />
<marker date="1536774843817" expanded="true" signature="8295:8300" ph="..." />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/output/history.log">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="272">
<caret line="16" column="63" selection-start-line="16" selection-start-column="63" selection-end-line="16" selection-end-column="63" />
</state>
</provider>
</entry>
</component>
</project>

BIN
driver/chromedriver Normal file

Binary file not shown.

BIN
driver/chromedriver.exe Normal file

Binary file not shown.

338
main.py Normal file
View File

@@ -0,0 +1,338 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os
import statistics
import sys
import logging
import codecs
import time
import xlsxwriter
import datetime
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException, StaleElementReferenceException
href_1 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99znh05tds4%2Cu99znqdmts7u%2Cu99znx4p99ee%2Cu99znvstves2%2Cu99znenb3dhq%2Cu99zne2djsev%2Cu99znkhh1th2%2Cu99zjgkyjwh3%2Cu99zjge8t8h4%2Cu99zjggyvwec%2Cu99znh0y3x5b#zoom:13;center:(54.698048468880216,25.254161356320424)'
href_2 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99znv59jpvx%2Cu99znu91thv0%2Cu99zndzq3ptt%2Cu99znfcyjnt1%2Cu99zp56n9jtn%2Cu99zp5n3ejmw%2Cu99zp71dg4t3%2Cu99zp7mwvnv7%2Cu99zp7tvt5tp%2Cu99zpk5um5me%2Cu99zph7t10tp%2Cu99znvp055j4%2Cu99znv58ejt2#zoom:14;center:(54.68989490798361,25.272123999999963)'
href_3 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99zp75y38fj%2Cu99zp7p5d91g%2Cu99zpe1mqt1s%2Cu99zpe499scs%2Cu99zpdfdwwcu%2Cu99zpddj8t1f%2Cu99zpd6nkt9r%2Cu99zpd1rqe1x%2Cu99zpd4368cz%2Cu99zp9uzfd9d%2Cu99zp9usnd64%2Cu99zp9ejdwcy%2Cu99zp90wud6p%2Cu99zp3pkref4%2Cu99zp3hq1e3s%2Cu99zp38xkw3d%2Cu99zp4nvqd3g%2Cu99zp4wdh96p%2Cu99zp4yvsxcz%2Cu99zp5r8399e%2Cu99zp760ddct%2Cu99zp7hj2t1u#zoom:14;center:(54.68006792227714,25.287418499999944)'
href_4 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99zpdtuqntv%2Cu99zpdt9d4jb%2Cu99zpdd531t6%2Cu99zpd6jdjm3%2Cu99zpd1rtpt9%2Cu99zpd41fnt7%2Cu99zpdj9tjm1%2Cu99zpdnz9pt1%2Cu99zpdrc1jvb%2Cu99zpfhd4pvw%2Cu99zpfpsm0tk%2Cu9dp042mk5vp%2Cu99zpfxvq4m0%2Cu99zpfy904tj%2Cu99zpghgqpte%2Cu99zpg5xf4m1%2Cu99zpgdcd0jh%2Cu99zpgdhs1jr%2Cu99zpg95h5j2%2Cu99zpdyjypvd%2Cu99zpdtg3njr#zoom:15;center:(54.68360773485153,25.302744000000075)'
filename_output = os.path.join('output', 'ImoBot_output.xlsx')
history_filepath = os.path.join('output', 'history.log')
driver_win_filepath = os.path.join('driver', 'chromedriver.exe')
driver_nt_filepath = os.path.join('driver', 'chromedriver')
mailto = ['dennis.thiessen@riskahead.de', 'kevin.gruendel@riskahead.de', 'florianbergel@yahoo.de']
pages = [['Zverynas', href_1], ['Gedimino Prospektas', href_2], ['Old City', href_3], ['Uzupis', href_4]]
#mailto = ['dennis.thiessen@riskahead.de']
#pages = [['Zverynas', href_1]]
class Area:
def __init__(self, name, href, flats_for_rent=None, flats_for_sale=None, houses_for_sale=None, plots_for_sale=None):
self.name = name
self.href = href
self.flats_for_rent = flats_for_rent
self.flats_for_sale = flats_for_sale
self.houses_for_sale = houses_for_sale
self.plots_for_sale = plots_for_sale
class Property:
def __init__(self, href, addr, desc, price):
self.href = href
self.addr = addr
self.desc = desc
self.price = price
def get_price(self):
return float(self.price.replace(" ", "").replace(",", ".").split('')[0])
def get_price_m2(self):
return float(self.price.replace(" ", "").replace(",", ".").split('')[1][1:])
def get_m2(self):
return float(self.get_price() / self.get_price_m2())
def get_roi(self, avg_rent_m2):
return float(avg_rent_m2 * self.get_m2() * 12 / self.get_price())
class PropertyCollection:
def __init__(self, properties):
self.properties = properties
self.average_price = self.calc_average_price()
self.average_price_m2 = self.calc_average_price_m2()
self.median_price = self.calc_median_price()
self.median_price_m2 = self.calc_median_price_m2()
def calc_average_price(self):
return sum(c.get_price() for c in self.properties) / len(self.properties) if len(self.properties) > 0 else 0
def calc_average_price_m2(self):
return sum(c.get_price_m2() for c in self.properties) / len(self.properties) if len(self.properties) > 0 else 0
def calc_median_price(self):
return statistics.median([c.get_price() for c in self.properties]) if len(self.properties) > 0 else 0
def calc_median_price_m2(self):
return statistics.median([c.get_price_m2() for c in self.properties]) if len(self.properties) > 0 else 0
def sort_by_roi(self, price_m2):
if len(self.properties) > 0:
self.properties = sorted(self.properties, key=lambda x: x.get_roi(price_m2), reverse=True)
def count(self):
return len(self.properties)
def get_script_path():
return os.path.dirname(os.path.realpath(sys.argv[0]))
def send_mail(send_from, send_to, subject, text, files=None, server="mail.riskahead.de"):
assert isinstance(send_to, list)
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = ", ".join(send_to)
msg['Subject'] = subject
msg.attach(MIMEText(text))
for f in files or []:
with open(os.path.join(get_script_path(), f), "rb") as fil:
part = MIMEApplication(
fil.read(),
Name=os.path.basename(f)
)
# After the file is closed
part['Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(f)
msg.attach(part)
smtp = smtplib.SMTP_SSL(server)
smtp.login('support@riskahead.de', "405risksupport")
smtp.sendmail(send_from, send_to, msg.as_string())
smtp.close()
def switch_page(driver, page):
driver.switch_to.default_content()
driver.find_element_by_id('searchFormField_obj') \
.click()
driver.find_element_by_id('options_obj') \
.find_element_by_xpath('//li/label[contains(text(),\''+page+'\')]/..') \
.click()
driver.switch_to.frame("sideListIframe")
def scrape_page(driver):
elements = driver.find_elements_by_class_name('result-item-v3')
items = []
for element in elements:
href = element.find_element_by_class_name('object-image-link').get_attribute('href')
addr = element.find_element_by_class_name('item-address-v3').text
desc = element.find_element_by_class_name('item-description-v3').text
price = element.find_element_by_class_name('item-price-main-v3').text
items.append(Property(href, addr, desc, price))
try:
pagination = driver.find_element_by_class_name('sidebar-pagination')
next_page_element = pagination.find_elements_by_xpath('//a[contains(text(), \'Next page\')]')
if len(next_page_element) == 1:
next_page_element[0].click()
items += scrape_page(driver)
except NoSuchElementException as e:
logger.debug("no pagination found")
return items
def scrape_pages(pages):
logger.info('Start ImoBot...')
start = time.time()
chrome_driver = os.path.join(get_script_path(), driver_win_filepath) if os.name == 'nt' else os.path.join(get_script_path(), driver_nt_filepath)
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
workbook = xlsxwriter.Workbook(os.path.join(get_script_path(), filename_output))
worksheet = workbook.add_worksheet()
now = datetime.datetime.now()
worksheet.write('A2', now.strftime("%Y-%m-%d %H:%M"))
areas = []
row = 1
for page in pages:
row, area = scrape_and_write_page(driver, worksheet, page[1], page[0], row)
areas.append(area)
workbook.close()
end = time.time()
subject = "ImoBot-Weekly-Results"
text = "ImoBot-Weekly-Results\n\n" + \
"Last run: {}\n".format(now.strftime("%Y-%m-%d %H:%M"))
for area in areas:
text += "{}: (Flats: {}, Houses: {}, Plots: {})\n".format(area.name, area.flats_for_sale, area.houses_for_sale, area.plots_for_sale)
send_mail('webmaster@riskahead.de',
mailto,
subject,
text,
[filename_output])
logger.info('Finished in {:.0f} sec'.format(end-start))
def scrape_and_write_page(driver, worksheet, href, name, row):
driver.get(href)
flats_for_sale, houses_for_sale, plots_for_sale, flats_for_rent = get_prices_from_site(driver)
row = row + 2
worksheet.write('A' + str(row), name)
row, ffr = write_to_ws(flats_for_rent, "Flats for rent", flats_for_rent, worksheet, row)
row, ffs = write_to_ws(flats_for_rent, "Flats for sale", flats_for_sale, worksheet, row)
row, hfs = write_to_ws(flats_for_rent, "Houses for sale", houses_for_sale, worksheet, row)
row, pfs = write_to_ws(flats_for_rent, "Plots for sale", plots_for_sale, worksheet, row)
return row, Area(name, href, flats_for_rent=ffr, flats_for_sale=ffs, houses_for_sale=hfs, plots_for_sale=pfs)
def write_to_ws(flats_for_rent, name, properties, worksheet, row):
row += 2
worksheet.write('B' + str(row), name)
row += 1
worksheet.write('C' + str(row), 'Amount')
worksheet.write('D' + str(row), properties.count())
row += 1
worksheet.write('C' + str(row), 'AVG Price')
worksheet.write('D' + str(row), properties.average_price)
row += 1
worksheet.write('C' + str(row), 'AVG m² Price')
worksheet.write('D' + str(row), properties.average_price_m2)
row += 2
if name == 'Flats for rent':
worksheet.write('C' + str(row), 'MED Price')
worksheet.write('D' + str(row), properties.median_price)
row += 1
worksheet.write('C' + str(row), 'MED m² Price')
worksheet.write('D' + str(row), properties.median_price_m2)
else:
worksheet.write('C' + str(row), 'Address')
worksheet.write('D' + str(row), 'GRY (AVG)')
worksheet.write('E' + str(row), 'GRY (MED)')
worksheet.write('F' + str(row), 'Total Price')
worksheet.write('G' + str(row), 'Price per m²')
worksheet.write('H' + str(row), 'Description')
worksheet.write('I' + str(row), 'Link')
row += 1
for prop in properties.properties[:10]:
logger.info("ROI (Median): {:3.2%}, ROI (Average): {:3.2%}, Price: {}€, Address: {}, URL: {}".format(
prop.get_roi(flats_for_rent.median_price_m2), prop.get_roi(flats_for_rent.average_price_m2), prop.price,
prop.addr, prop.href))
worksheet.write('C' + str(row), prop.addr)
worksheet.write('D' + str(row), prop.get_roi(flats_for_rent.average_price_m2))
worksheet.write('E' + str(row), prop.get_roi(flats_for_rent.median_price_m2))
worksheet.write('F' + str(row), prop.get_price())
worksheet.write('G' + str(row), prop.get_price_m2())
worksheet.write('H' + str(row), prop.desc)
worksheet.write('I' + str(row), prop.href)
row += 1
return row, properties.count()
def get_prices_from_site(driver):
try:
driver.find_element_by_class_name('close-button').click()
except ElementNotVisibleException:
pass
driver.switch_to.frame("sideListIframe")
logger.info("Start scraping flats for sale...")
flats_for_sale = PropertyCollection(scrape_page(driver))
logger.info("Found {} flats for sale".format(flats_for_sale.count()))
logger.info("Average price: {:8.2f}".format(flats_for_sale.average_price))
logger.info("Average m2 price: {:5.2f}".format(flats_for_sale.average_price_m2))
logger.info("Median m2 price: {:5.2f}".format(flats_for_sale.median_price_m2))
logger.info("Start scraping houses for sale...")
switch_page(driver, 'Houses for sale')
time.sleep(3) # Avoid race condition by giving chromedriver enough time to load the page
houses_for_sale = PropertyCollection(scrape_page(driver))
logger.info("Found {} houses for sale".format(houses_for_sale.count()))
logger.info("Average price: {:8.2f}".format(houses_for_sale.average_price))
logger.info("Average m2 price: {:5.2f}".format(houses_for_sale.average_price_m2))
logger.info("Median m2 price: {:5.2f}".format(houses_for_sale.median_price_m2))
logger.info("Start scraping plots for sale...")
switch_page(driver, 'Plots for sale')
time.sleep(3)
plots_for_sale = PropertyCollection(scrape_page(driver))
logger.info("Found {} plots for sale".format(plots_for_sale.count()))
logger.info("Average price: {:8.2f}".format(plots_for_sale.average_price))
logger.info("Average m2 price: {:5.2f}".format(plots_for_sale.average_price_m2))
logger.info("Median m2 price: {:5.2f}".format(plots_for_sale.median_price_m2))
logger.info("Start scraping flats for rent...")
switch_page(driver, 'Flats for rent')
time.sleep(3)
flats_for_rent = PropertyCollection(scrape_page(driver))
logger.info("Found {} flats for rent".format(flats_for_rent.count()))
logger.info("Average price: {:4.2f}".format(flats_for_rent.average_price))
logger.info("Average m2 price: {:3.2f}".format(flats_for_rent.average_price_m2))
logger.info("Median m2 price: {:3.2f}".format(flats_for_rent.median_price_m2))
flats_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
houses_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
plots_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
return flats_for_sale, houses_for_sale, plots_for_sale, flats_for_rent
if __name__ == '__main__':
assert (len(sys.argv) == 1), "Wrong number of arguments given"
os.environ["PYTHONIOENCODING"] = "utf-8"
logger = logging.getLogger('imobot_logger')
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler(os.path.join(get_script_path(), history_filepath), "a", encoding='utf-8')
fh.setLevel(logging.DEBUG)
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
scrape_pages(pages)

56
web/index.php Normal file
View File

@@ -0,0 +1,56 @@
<?php header('Content-type: text/html; charset=utf-8'); ?>
<!DOCTYPE HTML>
<html>
<head>
<style>
.error {color: #FF0000;}
</style>
</head>
<body>
<?php
// define variables and set to empty values
$url = $urlErr = "";
if ($_SERVER["REQUEST_METHOD"] == "POST") {
if (empty($_POST["url"])) {
$nameErr = "URL is required";
} else {
$url = test_input($_POST["url"]);
if (!preg_match('%^((https?://)|(www\.))([a-z0-9-].?)+(:[0-9]+)?(/.*)?$%i',$url)) {
$urlErr = "Not a valid URL";
} else {
echo "Used URL: ".$url;
echo "<br><br>";
echo "Output: ";
echo "<br>";
$scriptName = "python3 ImoBot.py '$url'";
exec($scriptName,$out);
foreach($out as $key => $value){
echo $key." ".$value."<br>";
}
}
}
}
function test_input($data) {
$data = trim($data);
$data = stripslashes($data);
return $data;
}
?>
<h2>ImoBot</h2>
<p><span class="error">* required field</span></p>
<form method="post" action="<?php echo htmlspecialchars($_SERVER["PHP_SELF"]);?>">
URL: <input type="text" size="100" name="url" value="<?php echo $url;?>">
<span class="error">* <?php echo $urlErr;?></span>
<br><br>
<br><br>
<input type="submit" name="submit" value="Submit">
</form>
</body>
</html>