Initial commit
This commit is contained in:
11
.idea/ImoBot.iml
generated
Normal file
11
.idea/ImoBot.iml
generated
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="TestRunnerService">
|
||||||
|
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
4
.idea/misc.xml
generated
Normal file
4
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/ImoBot.iml" filepath="$PROJECT_DIR$/.idea/ImoBot.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
244
.idea/workspace.xml
generated
Normal file
244
.idea/workspace.xml
generated
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ChangeListManager">
|
||||||
|
<list default="true" id="784f4e67-6877-44d9-879c-93318a68f68b" name="Default Changelist" comment="" />
|
||||||
|
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||||
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||||
|
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||||
|
</component>
|
||||||
|
<component name="FUSProjectUsageTrigger">
|
||||||
|
<session id="462988823">
|
||||||
|
<usages-collector id="statistics.lifecycle.project">
|
||||||
|
<counts>
|
||||||
|
<entry key="project.closed" value="13" />
|
||||||
|
<entry key="project.open.time.1" value="1" />
|
||||||
|
<entry key="project.open.time.3" value="8" />
|
||||||
|
<entry key="project.open.time.4" value="4" />
|
||||||
|
<entry key="project.open.time.6" value="1" />
|
||||||
|
<entry key="project.opened" value="14" />
|
||||||
|
</counts>
|
||||||
|
</usages-collector>
|
||||||
|
<usages-collector id="statistics.file.extensions.edit">
|
||||||
|
<counts>
|
||||||
|
<entry key="dummy" value="10" />
|
||||||
|
<entry key="py" value="7571" />
|
||||||
|
</counts>
|
||||||
|
</usages-collector>
|
||||||
|
<usages-collector id="statistics.file.types.edit">
|
||||||
|
<counts>
|
||||||
|
<entry key="PLAIN_TEXT" value="10" />
|
||||||
|
<entry key="Python" value="7571" />
|
||||||
|
</counts>
|
||||||
|
</usages-collector>
|
||||||
|
<usages-collector id="statistics.file.extensions.open">
|
||||||
|
<counts>
|
||||||
|
<entry key="log" value="1" />
|
||||||
|
<entry key="php" value="1" />
|
||||||
|
</counts>
|
||||||
|
</usages-collector>
|
||||||
|
<usages-collector id="statistics.file.types.open">
|
||||||
|
<counts>
|
||||||
|
<entry key="PLAIN_TEXT" value="2" />
|
||||||
|
</counts>
|
||||||
|
</usages-collector>
|
||||||
|
</session>
|
||||||
|
</component>
|
||||||
|
<component name="FileEditorManager">
|
||||||
|
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||||
|
<file pinned="false" current-in-tab="true">
|
||||||
|
<entry file="file://$PROJECT_DIR$/main.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="146">
|
||||||
|
<caret line="143" column="92" lean-forward="true" selection-start-line="143" selection-start-column="92" selection-end-line="143" selection-end-column="92" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#44#53#0" expanded="true" />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="2613:2969" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="4193:4343" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="4488:4493" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="8295:8300" ph="..." />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
</file>
|
||||||
|
<file pinned="false" current-in-tab="false">
|
||||||
|
<entry file="file://$PROJECT_DIR$/output/history.log">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="272">
|
||||||
|
<caret line="16" column="63" selection-start-line="16" selection-start-column="63" selection-end-line="16" selection-end-column="63" />
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
</file>
|
||||||
|
</leaf>
|
||||||
|
</component>
|
||||||
|
<component name="FileTemplateManagerImpl">
|
||||||
|
<option name="RECENT_TEMPLATES">
|
||||||
|
<list>
|
||||||
|
<option value="Python Script" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="FindInProjectRecents">
|
||||||
|
<findStrings>
|
||||||
|
<find>history.lo</find>
|
||||||
|
<find>_output</find>
|
||||||
|
<find>scrape_page</find>
|
||||||
|
</findStrings>
|
||||||
|
</component>
|
||||||
|
<component name="IdeDocumentHistory">
|
||||||
|
<option name="CHANGED_PATHS">
|
||||||
|
<list>
|
||||||
|
<option value="$PROJECT_DIR$/ImoBot.py" />
|
||||||
|
<option value="$PROJECT_DIR$/main.py" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="ProjectFrameBounds" extendedState="6">
|
||||||
|
<option name="x" value="-88" />
|
||||||
|
<option name="y" value="312" />
|
||||||
|
<option name="width" value="2141" />
|
||||||
|
<option name="height" value="993" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectView">
|
||||||
|
<navigator proportions="" version="1">
|
||||||
|
<foldersAlwaysOnTop value="true" />
|
||||||
|
</navigator>
|
||||||
|
<panes>
|
||||||
|
<pane id="Scope" />
|
||||||
|
<pane id="ProjectPane">
|
||||||
|
<subPane>
|
||||||
|
<expand>
|
||||||
|
<path>
|
||||||
|
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
|
||||||
|
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
|
||||||
|
</path>
|
||||||
|
<path>
|
||||||
|
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
|
||||||
|
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
|
||||||
|
<item name="driver" type="462c0819:PsiDirectoryNode" />
|
||||||
|
</path>
|
||||||
|
<path>
|
||||||
|
<item name="ImoBot" type="b2602c69:ProjectViewProjectNode" />
|
||||||
|
<item name="ImoBot" type="462c0819:PsiDirectoryNode" />
|
||||||
|
<item name="output" type="462c0819:PsiDirectoryNode" />
|
||||||
|
</path>
|
||||||
|
</expand>
|
||||||
|
<select />
|
||||||
|
</subPane>
|
||||||
|
</pane>
|
||||||
|
</panes>
|
||||||
|
</component>
|
||||||
|
<component name="PropertiesComponent">
|
||||||
|
<property name="last_opened_file_path" value="$PROJECT_DIR$/ImoBot.py" />
|
||||||
|
</component>
|
||||||
|
<component name="RecentsManager">
|
||||||
|
<key name="MoveFile.RECENT_KEYS">
|
||||||
|
<recent name="H:\Workspace\Projects\ImoBot\web" />
|
||||||
|
<recent name="H:\Workspace\Projects\ImoBot\output" />
|
||||||
|
<recent name="H:\Workspace\Projects\ImoBot\driver" />
|
||||||
|
</key>
|
||||||
|
</component>
|
||||||
|
<component name="RunDashboard">
|
||||||
|
<option name="ruleStates">
|
||||||
|
<list>
|
||||||
|
<RuleState>
|
||||||
|
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
||||||
|
</RuleState>
|
||||||
|
<RuleState>
|
||||||
|
<option name="name" value="StatusDashboardGroupingRule" />
|
||||||
|
</RuleState>
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="RunManager">
|
||||||
|
<configuration name="Run" type="PythonConfigurationType" factoryName="Python">
|
||||||
|
<module name="ImoBot" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="C:\Users\denni\AppData\Local\Programs\Python\Python37-32\python.exe" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||||
|
<option name="IS_MODULE_SDK" value="false" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<option name="SCRIPT_NAME" value="H:\Workspace\Projects\ImoBot\main.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
</component>
|
||||||
|
<component name="SvnConfiguration">
|
||||||
|
<configuration />
|
||||||
|
</component>
|
||||||
|
<component name="TaskManager">
|
||||||
|
<task active="true" id="Default" summary="Default task">
|
||||||
|
<changelist id="784f4e67-6877-44d9-879c-93318a68f68b" name="Default Changelist" comment="" />
|
||||||
|
<created>1534264450607</created>
|
||||||
|
<option name="number" value="Default" />
|
||||||
|
<option name="presentableId" value="Default" />
|
||||||
|
<updated>1534264450607</updated>
|
||||||
|
</task>
|
||||||
|
<servers />
|
||||||
|
</component>
|
||||||
|
<component name="ToolWindowManager">
|
||||||
|
<frame x="-8" y="-8" width="2576" height="1416" extended-state="6" />
|
||||||
|
<layout>
|
||||||
|
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.13396375" />
|
||||||
|
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
||||||
|
<window_info id="Favorites" order="2" side_tool="true" />
|
||||||
|
<window_info anchor="bottom" id="Message" order="0" />
|
||||||
|
<window_info anchor="bottom" id="Find" order="1" weight="0.3299532" />
|
||||||
|
<window_info anchor="bottom" id="Run" order="2" weight="0.3299532" />
|
||||||
|
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
||||||
|
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
||||||
|
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
||||||
|
<window_info anchor="bottom" id="TODO" order="6" />
|
||||||
|
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
|
||||||
|
<window_info anchor="bottom" id="Terminal" order="8" />
|
||||||
|
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
|
||||||
|
<window_info anchor="bottom" id="Python Console" order="10" />
|
||||||
|
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
|
||||||
|
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
||||||
|
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
||||||
|
</layout>
|
||||||
|
</component>
|
||||||
|
<component name="UnknownFeatures">
|
||||||
|
<option featureType="com.intellij.fileTypeFactory" implementationName="*.log" />
|
||||||
|
</component>
|
||||||
|
<component name="VcsContentAnnotationSettings">
|
||||||
|
<option name="myLimit" value="2678400000" />
|
||||||
|
</component>
|
||||||
|
<component name="editorHistoryManager">
|
||||||
|
<entry file="file://$USER_HOME$/Desktop/index.php" />
|
||||||
|
<entry file="file://$PROJECT_DIR$/main.py">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="146">
|
||||||
|
<caret line="143" column="92" lean-forward="true" selection-start-line="143" selection-start-column="92" selection-end-line="143" selection-end-column="92" />
|
||||||
|
<folding>
|
||||||
|
<element signature="e#44#53#0" expanded="true" />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="2613:2969" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="4193:4343" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="4488:4493" ph="..." />
|
||||||
|
<marker date="1536774843817" expanded="true" signature="8295:8300" ph="..." />
|
||||||
|
</folding>
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
<entry file="file://$PROJECT_DIR$/output/history.log">
|
||||||
|
<provider selected="true" editor-type-id="text-editor">
|
||||||
|
<state relative-caret-position="272">
|
||||||
|
<caret line="16" column="63" selection-start-line="16" selection-start-column="63" selection-end-line="16" selection-end-column="63" />
|
||||||
|
</state>
|
||||||
|
</provider>
|
||||||
|
</entry>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
BIN
driver/chromedriver
Normal file
BIN
driver/chromedriver
Normal file
Binary file not shown.
BIN
driver/chromedriver.exe
Normal file
BIN
driver/chromedriver.exe
Normal file
Binary file not shown.
338
main.py
Normal file
338
main.py
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import statistics
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import codecs
|
||||||
|
import time
|
||||||
|
import xlsxwriter
|
||||||
|
import datetime
|
||||||
|
import smtplib
|
||||||
|
|
||||||
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
from email.mime.application import MIMEApplication
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException, StaleElementReferenceException
|
||||||
|
|
||||||
|
href_1 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99znh05tds4%2Cu99znqdmts7u%2Cu99znx4p99ee%2Cu99znvstves2%2Cu99znenb3dhq%2Cu99zne2djsev%2Cu99znkhh1th2%2Cu99zjgkyjwh3%2Cu99zjge8t8h4%2Cu99zjggyvwec%2Cu99znh0y3x5b#zoom:13;center:(54.698048468880216,25.254161356320424)'
|
||||||
|
href_2 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99znv59jpvx%2Cu99znu91thv0%2Cu99zndzq3ptt%2Cu99znfcyjnt1%2Cu99zp56n9jtn%2Cu99zp5n3ejmw%2Cu99zp71dg4t3%2Cu99zp7mwvnv7%2Cu99zp7tvt5tp%2Cu99zpk5um5me%2Cu99zph7t10tp%2Cu99znvp055j4%2Cu99znv58ejt2#zoom:14;center:(54.68989490798361,25.272123999999963)'
|
||||||
|
href_3 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99zp75y38fj%2Cu99zp7p5d91g%2Cu99zpe1mqt1s%2Cu99zpe499scs%2Cu99zpdfdwwcu%2Cu99zpddj8t1f%2Cu99zpd6nkt9r%2Cu99zpd1rqe1x%2Cu99zpd4368cz%2Cu99zp9uzfd9d%2Cu99zp9usnd64%2Cu99zp9ejdwcy%2Cu99zp90wud6p%2Cu99zp3pkref4%2Cu99zp3hq1e3s%2Cu99zp38xkw3d%2Cu99zp4nvqd3g%2Cu99zp4wdh96p%2Cu99zp4yvsxcz%2Cu99zp5r8399e%2Cu99zp760ddct%2Cu99zp7hj2t1u#zoom:14;center:(54.68006792227714,25.287418499999944)'
|
||||||
|
href_4 = 'https://en.aruodas.lt/nt_zemelapis/?obj=1&on_map=1&type=map&FSelectedArea=u99zpdtuqntv%2Cu99zpdt9d4jb%2Cu99zpdd531t6%2Cu99zpd6jdjm3%2Cu99zpd1rtpt9%2Cu99zpd41fnt7%2Cu99zpdj9tjm1%2Cu99zpdnz9pt1%2Cu99zpdrc1jvb%2Cu99zpfhd4pvw%2Cu99zpfpsm0tk%2Cu9dp042mk5vp%2Cu99zpfxvq4m0%2Cu99zpfy904tj%2Cu99zpghgqpte%2Cu99zpg5xf4m1%2Cu99zpgdcd0jh%2Cu99zpgdhs1jr%2Cu99zpg95h5j2%2Cu99zpdyjypvd%2Cu99zpdtg3njr#zoom:15;center:(54.68360773485153,25.302744000000075)'
|
||||||
|
|
||||||
|
filename_output = os.path.join('output', 'ImoBot_output.xlsx')
|
||||||
|
history_filepath = os.path.join('output', 'history.log')
|
||||||
|
driver_win_filepath = os.path.join('driver', 'chromedriver.exe')
|
||||||
|
driver_nt_filepath = os.path.join('driver', 'chromedriver')
|
||||||
|
|
||||||
|
mailto = ['dennis.thiessen@riskahead.de', 'kevin.gruendel@riskahead.de', 'florianbergel@yahoo.de']
|
||||||
|
pages = [['Zverynas', href_1], ['Gedimino Prospektas', href_2], ['Old City', href_3], ['Uzupis', href_4]]
|
||||||
|
|
||||||
|
|
||||||
|
#mailto = ['dennis.thiessen@riskahead.de']
|
||||||
|
#pages = [['Zverynas', href_1]]
|
||||||
|
|
||||||
|
class Area:
|
||||||
|
def __init__(self, name, href, flats_for_rent=None, flats_for_sale=None, houses_for_sale=None, plots_for_sale=None):
|
||||||
|
self.name = name
|
||||||
|
self.href = href
|
||||||
|
self.flats_for_rent = flats_for_rent
|
||||||
|
self.flats_for_sale = flats_for_sale
|
||||||
|
self.houses_for_sale = houses_for_sale
|
||||||
|
self.plots_for_sale = plots_for_sale
|
||||||
|
|
||||||
|
|
||||||
|
class Property:
|
||||||
|
def __init__(self, href, addr, desc, price):
|
||||||
|
self.href = href
|
||||||
|
self.addr = addr
|
||||||
|
self.desc = desc
|
||||||
|
self.price = price
|
||||||
|
|
||||||
|
def get_price(self):
|
||||||
|
return float(self.price.replace(" ", "").replace(",", ".").split('€')[0])
|
||||||
|
|
||||||
|
def get_price_m2(self):
|
||||||
|
return float(self.price.replace(" ", "").replace(",", ".").split('€')[1][1:])
|
||||||
|
|
||||||
|
def get_m2(self):
|
||||||
|
return float(self.get_price() / self.get_price_m2())
|
||||||
|
|
||||||
|
def get_roi(self, avg_rent_m2):
|
||||||
|
return float(avg_rent_m2 * self.get_m2() * 12 / self.get_price())
|
||||||
|
|
||||||
|
|
||||||
|
class PropertyCollection:
|
||||||
|
def __init__(self, properties):
|
||||||
|
self.properties = properties
|
||||||
|
self.average_price = self.calc_average_price()
|
||||||
|
self.average_price_m2 = self.calc_average_price_m2()
|
||||||
|
self.median_price = self.calc_median_price()
|
||||||
|
self.median_price_m2 = self.calc_median_price_m2()
|
||||||
|
|
||||||
|
def calc_average_price(self):
|
||||||
|
|
||||||
|
return sum(c.get_price() for c in self.properties) / len(self.properties) if len(self.properties) > 0 else 0
|
||||||
|
|
||||||
|
def calc_average_price_m2(self):
|
||||||
|
return sum(c.get_price_m2() for c in self.properties) / len(self.properties) if len(self.properties) > 0 else 0
|
||||||
|
|
||||||
|
def calc_median_price(self):
|
||||||
|
return statistics.median([c.get_price() for c in self.properties]) if len(self.properties) > 0 else 0
|
||||||
|
|
||||||
|
def calc_median_price_m2(self):
|
||||||
|
return statistics.median([c.get_price_m2() for c in self.properties]) if len(self.properties) > 0 else 0
|
||||||
|
|
||||||
|
def sort_by_roi(self, price_m2):
|
||||||
|
if len(self.properties) > 0:
|
||||||
|
self.properties = sorted(self.properties, key=lambda x: x.get_roi(price_m2), reverse=True)
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
return len(self.properties)
|
||||||
|
|
||||||
|
|
||||||
|
def get_script_path():
|
||||||
|
return os.path.dirname(os.path.realpath(sys.argv[0]))
|
||||||
|
|
||||||
|
|
||||||
|
def send_mail(send_from, send_to, subject, text, files=None, server="mail.riskahead.de"):
|
||||||
|
assert isinstance(send_to, list)
|
||||||
|
|
||||||
|
msg = MIMEMultipart()
|
||||||
|
msg['From'] = send_from
|
||||||
|
msg['To'] = ", ".join(send_to)
|
||||||
|
msg['Subject'] = subject
|
||||||
|
|
||||||
|
msg.attach(MIMEText(text))
|
||||||
|
|
||||||
|
for f in files or []:
|
||||||
|
with open(os.path.join(get_script_path(), f), "rb") as fil:
|
||||||
|
part = MIMEApplication(
|
||||||
|
fil.read(),
|
||||||
|
Name=os.path.basename(f)
|
||||||
|
)
|
||||||
|
# After the file is closed
|
||||||
|
part['Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(f)
|
||||||
|
msg.attach(part)
|
||||||
|
|
||||||
|
smtp = smtplib.SMTP_SSL(server)
|
||||||
|
smtp.login('support@riskahead.de', "405risksupport")
|
||||||
|
|
||||||
|
smtp.sendmail(send_from, send_to, msg.as_string())
|
||||||
|
smtp.close()
|
||||||
|
|
||||||
|
|
||||||
|
def switch_page(driver, page):
|
||||||
|
driver.switch_to.default_content()
|
||||||
|
driver.find_element_by_id('searchFormField_obj') \
|
||||||
|
.click()
|
||||||
|
driver.find_element_by_id('options_obj') \
|
||||||
|
.find_element_by_xpath('//li/label[contains(text(),\''+page+'\')]/..') \
|
||||||
|
.click()
|
||||||
|
driver.switch_to.frame("sideListIframe")
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_page(driver):
|
||||||
|
elements = driver.find_elements_by_class_name('result-item-v3')
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for element in elements:
|
||||||
|
href = element.find_element_by_class_name('object-image-link').get_attribute('href')
|
||||||
|
addr = element.find_element_by_class_name('item-address-v3').text
|
||||||
|
desc = element.find_element_by_class_name('item-description-v3').text
|
||||||
|
price = element.find_element_by_class_name('item-price-main-v3').text
|
||||||
|
|
||||||
|
items.append(Property(href, addr, desc, price))
|
||||||
|
|
||||||
|
try:
|
||||||
|
pagination = driver.find_element_by_class_name('sidebar-pagination')
|
||||||
|
next_page_element = pagination.find_elements_by_xpath('//a[contains(text(), \'Next page\')]')
|
||||||
|
|
||||||
|
if len(next_page_element) == 1:
|
||||||
|
next_page_element[0].click()
|
||||||
|
items += scrape_page(driver)
|
||||||
|
except NoSuchElementException as e:
|
||||||
|
logger.debug("no pagination found")
|
||||||
|
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_pages(pages):
|
||||||
|
logger.info('Start ImoBot...')
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
chrome_driver = os.path.join(get_script_path(), driver_win_filepath) if os.name == 'nt' else os.path.join(get_script_path(), driver_nt_filepath)
|
||||||
|
|
||||||
|
chrome_options = Options()
|
||||||
|
chrome_options.add_argument("--headless")
|
||||||
|
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
|
||||||
|
|
||||||
|
workbook = xlsxwriter.Workbook(os.path.join(get_script_path(), filename_output))
|
||||||
|
worksheet = workbook.add_worksheet()
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
worksheet.write('A2', now.strftime("%Y-%m-%d %H:%M"))
|
||||||
|
|
||||||
|
areas = []
|
||||||
|
row = 1
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
row, area = scrape_and_write_page(driver, worksheet, page[1], page[0], row)
|
||||||
|
areas.append(area)
|
||||||
|
|
||||||
|
workbook.close()
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
subject = "ImoBot-Weekly-Results"
|
||||||
|
|
||||||
|
text = "ImoBot-Weekly-Results\n\n" + \
|
||||||
|
"Last run: {}\n".format(now.strftime("%Y-%m-%d %H:%M"))
|
||||||
|
|
||||||
|
for area in areas:
|
||||||
|
text += "{}: (Flats: {}, Houses: {}, Plots: {})\n".format(area.name, area.flats_for_sale, area.houses_for_sale, area.plots_for_sale)
|
||||||
|
|
||||||
|
send_mail('webmaster@riskahead.de',
|
||||||
|
mailto,
|
||||||
|
subject,
|
||||||
|
text,
|
||||||
|
[filename_output])
|
||||||
|
|
||||||
|
logger.info('Finished in {:.0f} sec'.format(end-start))
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_and_write_page(driver, worksheet, href, name, row):
|
||||||
|
driver.get(href)
|
||||||
|
flats_for_sale, houses_for_sale, plots_for_sale, flats_for_rent = get_prices_from_site(driver)
|
||||||
|
row = row + 2
|
||||||
|
worksheet.write('A' + str(row), name)
|
||||||
|
|
||||||
|
row, ffr = write_to_ws(flats_for_rent, "Flats for rent", flats_for_rent, worksheet, row)
|
||||||
|
row, ffs = write_to_ws(flats_for_rent, "Flats for sale", flats_for_sale, worksheet, row)
|
||||||
|
row, hfs = write_to_ws(flats_for_rent, "Houses for sale", houses_for_sale, worksheet, row)
|
||||||
|
row, pfs = write_to_ws(flats_for_rent, "Plots for sale", plots_for_sale, worksheet, row)
|
||||||
|
|
||||||
|
return row, Area(name, href, flats_for_rent=ffr, flats_for_sale=ffs, houses_for_sale=hfs, plots_for_sale=pfs)
|
||||||
|
|
||||||
|
|
||||||
|
def write_to_ws(flats_for_rent, name, properties, worksheet, row):
|
||||||
|
row += 2
|
||||||
|
worksheet.write('B' + str(row), name)
|
||||||
|
row += 1
|
||||||
|
worksheet.write('C' + str(row), 'Amount')
|
||||||
|
worksheet.write('D' + str(row), properties.count())
|
||||||
|
row += 1
|
||||||
|
worksheet.write('C' + str(row), 'AVG Price')
|
||||||
|
worksheet.write('D' + str(row), properties.average_price)
|
||||||
|
row += 1
|
||||||
|
worksheet.write('C' + str(row), 'AVG m² Price')
|
||||||
|
worksheet.write('D' + str(row), properties.average_price_m2)
|
||||||
|
row += 2
|
||||||
|
|
||||||
|
if name == 'Flats for rent':
|
||||||
|
worksheet.write('C' + str(row), 'MED Price')
|
||||||
|
worksheet.write('D' + str(row), properties.median_price)
|
||||||
|
row += 1
|
||||||
|
worksheet.write('C' + str(row), 'MED m² Price')
|
||||||
|
worksheet.write('D' + str(row), properties.median_price_m2)
|
||||||
|
else:
|
||||||
|
worksheet.write('C' + str(row), 'Address')
|
||||||
|
worksheet.write('D' + str(row), 'GRY (AVG)')
|
||||||
|
worksheet.write('E' + str(row), 'GRY (MED)')
|
||||||
|
worksheet.write('F' + str(row), 'Total Price')
|
||||||
|
worksheet.write('G' + str(row), 'Price per m²')
|
||||||
|
worksheet.write('H' + str(row), 'Description')
|
||||||
|
worksheet.write('I' + str(row), 'Link')
|
||||||
|
row += 1
|
||||||
|
|
||||||
|
for prop in properties.properties[:10]:
|
||||||
|
logger.info("ROI (Median): {:3.2%}, ROI (Average): {:3.2%}, Price: {}€, Address: {}, URL: {}".format(
|
||||||
|
prop.get_roi(flats_for_rent.median_price_m2), prop.get_roi(flats_for_rent.average_price_m2), prop.price,
|
||||||
|
prop.addr, prop.href))
|
||||||
|
|
||||||
|
worksheet.write('C' + str(row), prop.addr)
|
||||||
|
worksheet.write('D' + str(row), prop.get_roi(flats_for_rent.average_price_m2))
|
||||||
|
worksheet.write('E' + str(row), prop.get_roi(flats_for_rent.median_price_m2))
|
||||||
|
worksheet.write('F' + str(row), prop.get_price())
|
||||||
|
worksheet.write('G' + str(row), prop.get_price_m2())
|
||||||
|
worksheet.write('H' + str(row), prop.desc)
|
||||||
|
worksheet.write('I' + str(row), prop.href)
|
||||||
|
row += 1
|
||||||
|
|
||||||
|
return row, properties.count()
|
||||||
|
|
||||||
|
|
||||||
|
def get_prices_from_site(driver):
|
||||||
|
try:
|
||||||
|
driver.find_element_by_class_name('close-button').click()
|
||||||
|
except ElementNotVisibleException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
driver.switch_to.frame("sideListIframe")
|
||||||
|
|
||||||
|
logger.info("Start scraping flats for sale...")
|
||||||
|
flats_for_sale = PropertyCollection(scrape_page(driver))
|
||||||
|
logger.info("Found {} flats for sale".format(flats_for_sale.count()))
|
||||||
|
logger.info("Average price: {:8.2f} €".format(flats_for_sale.average_price))
|
||||||
|
logger.info("Average m2 price: {:5.2f} €".format(flats_for_sale.average_price_m2))
|
||||||
|
logger.info("Median m2 price: {:5.2f} €".format(flats_for_sale.median_price_m2))
|
||||||
|
|
||||||
|
logger.info("Start scraping houses for sale...")
|
||||||
|
switch_page(driver, 'Houses for sale')
|
||||||
|
time.sleep(3) # Avoid race condition by giving chromedriver enough time to load the page
|
||||||
|
houses_for_sale = PropertyCollection(scrape_page(driver))
|
||||||
|
logger.info("Found {} houses for sale".format(houses_for_sale.count()))
|
||||||
|
logger.info("Average price: {:8.2f} €".format(houses_for_sale.average_price))
|
||||||
|
logger.info("Average m2 price: {:5.2f} €".format(houses_for_sale.average_price_m2))
|
||||||
|
logger.info("Median m2 price: {:5.2f} €".format(houses_for_sale.median_price_m2))
|
||||||
|
|
||||||
|
logger.info("Start scraping plots for sale...")
|
||||||
|
switch_page(driver, 'Plots for sale')
|
||||||
|
time.sleep(3)
|
||||||
|
plots_for_sale = PropertyCollection(scrape_page(driver))
|
||||||
|
logger.info("Found {} plots for sale".format(plots_for_sale.count()))
|
||||||
|
logger.info("Average price: {:8.2f} €".format(plots_for_sale.average_price))
|
||||||
|
logger.info("Average m2 price: {:5.2f} €".format(plots_for_sale.average_price_m2))
|
||||||
|
logger.info("Median m2 price: {:5.2f} €".format(plots_for_sale.median_price_m2))
|
||||||
|
|
||||||
|
logger.info("Start scraping flats for rent...")
|
||||||
|
switch_page(driver, 'Flats for rent')
|
||||||
|
time.sleep(3)
|
||||||
|
flats_for_rent = PropertyCollection(scrape_page(driver))
|
||||||
|
logger.info("Found {} flats for rent".format(flats_for_rent.count()))
|
||||||
|
logger.info("Average price: {:4.2f} €".format(flats_for_rent.average_price))
|
||||||
|
logger.info("Average m2 price: {:3.2f} €".format(flats_for_rent.average_price_m2))
|
||||||
|
logger.info("Median m2 price: {:3.2f} €".format(flats_for_rent.median_price_m2))
|
||||||
|
|
||||||
|
flats_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
|
||||||
|
houses_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
|
||||||
|
plots_for_sale.sort_by_roi(flats_for_rent.average_price_m2)
|
||||||
|
|
||||||
|
return flats_for_sale, houses_for_sale, plots_for_sale, flats_for_rent
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
assert (len(sys.argv) == 1), "Wrong number of arguments given"
|
||||||
|
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||||
|
|
||||||
|
logger = logging.getLogger('imobot_logger')
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
fh = logging.FileHandler(os.path.join(get_script_path(), history_filepath), "a", encoding='utf-8')
|
||||||
|
fh.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
|
||||||
|
|
||||||
|
ch = logging.StreamHandler(sys.stdout)
|
||||||
|
ch.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(message)s')
|
||||||
|
fh.setFormatter(formatter)
|
||||||
|
ch.setFormatter(formatter)
|
||||||
|
|
||||||
|
logger.addHandler(fh)
|
||||||
|
logger.addHandler(ch)
|
||||||
|
|
||||||
|
scrape_pages(pages)
|
||||||
56
web/index.php
Normal file
56
web/index.php
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
<?php header('Content-type: text/html; charset=utf-8'); ?>
|
||||||
|
<!DOCTYPE HTML>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style>
|
||||||
|
.error {color: #FF0000;}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<?php
|
||||||
|
// define variables and set to empty values
|
||||||
|
$url = $urlErr = "";
|
||||||
|
|
||||||
|
if ($_SERVER["REQUEST_METHOD"] == "POST") {
|
||||||
|
if (empty($_POST["url"])) {
|
||||||
|
$nameErr = "URL is required";
|
||||||
|
} else {
|
||||||
|
$url = test_input($_POST["url"]);
|
||||||
|
if (!preg_match('%^((https?://)|(www\.))([a-z0-9-].?)+(:[0-9]+)?(/.*)?$%i',$url)) {
|
||||||
|
$urlErr = "Not a valid URL";
|
||||||
|
} else {
|
||||||
|
echo "Used URL: ".$url;
|
||||||
|
echo "<br><br>";
|
||||||
|
|
||||||
|
echo "Output: ";
|
||||||
|
echo "<br>";
|
||||||
|
|
||||||
|
$scriptName = "python3 ImoBot.py '$url'";
|
||||||
|
exec($scriptName,$out);
|
||||||
|
foreach($out as $key => $value){
|
||||||
|
echo $key." ".$value."<br>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_input($data) {
|
||||||
|
$data = trim($data);
|
||||||
|
$data = stripslashes($data);
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
|
||||||
|
<h2>ImoBot</h2>
|
||||||
|
<p><span class="error">* required field</span></p>
|
||||||
|
<form method="post" action="<?php echo htmlspecialchars($_SERVER["PHP_SELF"]);?>">
|
||||||
|
URL: <input type="text" size="100" name="url" value="<?php echo $url;?>">
|
||||||
|
<span class="error">* <?php echo $urlErr;?></span>
|
||||||
|
<br><br>
|
||||||
|
<br><br>
|
||||||
|
<input type="submit" name="submit" value="Submit">
|
||||||
|
</form>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user