Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib
import re
import time
import os
import os.path
import string
sleeptime1 = 10 #20
sleeptime2 = 2
StartStunde = 8
EndStunde = 22
symbollist = ["https://www.ls-tc.de/de/aktien/deutschland/dax"]
def check_data(source,a,b):
fobj = open("Kursdaten/"+source+".csv", "r")
i = 0
buf = []
for line in fobj:
i = i +1
buf.append(line)
fobj.close()
data = buf[i-1]
daten_feld = data.split(";")
if a == daten_feld[1] or b == daten_feld[2]:
return False
else:
return True
def check_double_data(bid,ask):
buf = []
fobj = open("Kursdaten/"+source+".csv", "r")
for line in fobj:
buf.append[line]
def WriteData(wert,Datum,ti,bi,aa,vo):
if check_data(wert,ti,bi) == True: # and check_double_data() == True: Muss gemacht werden
if os.path.exists("Kursdaten/"+wert+".csv"):
fobj = open("Kursdaten/"+wert+".csv","a")
fobj.write(Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
#print stri[1],";",Datum,";",ti,";",bi,";",aa,";",vo
else:
fobj = open("Report/"+"Error"+".csv","a")
fobj.write(wert+";"+Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
def format_filename(s):
"""Take a string and return a valid filename constructed from the string.
Uses a whitelist approach: any characters not present in valid_chars are
removed. Also spaces are replaced with underscores.
Note: this method may produce invalid filenames such as ``, `.` or `..`
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_') # I don't like spaces in filenames.
filename = filename.replace('.','_') # Und ich keine Punkte.
return filename.lower()
def scrap():
i = 0
idx = 0
while i <> 1:
Stunde = int(time.strftime("%H"))
Minute = int(time.strftime("%M"))
time.sleep(0.01)
os.system("clear")
print "Ausserhalb der Handelszeiten!!"
if Stunde >= StartStunde and Stunde <= EndStunde:
os.system("clear")
print "++++++++++++++++++++"
time.sleep(sleeptime1)
os.system("clear")
print "####################"
#=== URL Lesen ===
for i in range(0,len(symbollist),1):
os.system("clear")
print "++++++++++++++++++++"
#time.sleep(sleeptime1)
os.system("clear")
print "####################"
time.sleep(sleeptime2)
htmlfile = urllib.urlopen(symbollist[i])
htmltext = htmlfile.read()
#=== HTML Daten Tags ===
Name = '<a href="/de/aktie/(.+?)</a>'
Geld = 'bidWithCurrencySymbol" decimals="4">(.+?) €</span>'
Brief = 'askWithCurrencySymbol" decimals="4">(.+?) €</span>'
Zeit = 'midTime" decimals="4">(.+?)</span>'
Volumen = 'tradeCumulativeTurnoverWithCurrencySymbol" decimals="4">(.+?) €</span>'
#=== Daten Suchen ===
patternname = re.compile(Name)
patternbid = re.compile(Geld)
patternask = re.compile(Brief)
patterntime = re.compile(Zeit)
patternvol = re.compile(Volumen)
#=== Daten Extrahieren ===
Bezeichnung = re.findall(patternname,htmltext)
BID = re.findall(patternbid,htmltext)
ASK = re.findall(patternask,htmltext)
TIME = re.findall(patterntime,htmltext)
VOL = re.findall(patternvol,htmltext)
Datum = str(time.strftime("%d.%m.%Y"))
for i in range(0,len(Bezeichnung),1):
na = Bezeichnung[i]
print na
bi = BID[i]
aa = ASK[i]
ti = TIME[i]
vo = VOL[i]
stri = na.split(">")
wert = stri[1]
korrekterfilename = format_filename(wert)
if idx == 0:
fobj = open("Kursdaten/"+korrekterfilename+".csv","a")
fobj.write(Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
#print stri[1],";",Datum,";",ti,";",bi,";",aa,";",vo
elif idx == 1:
WriteData(korrekterfilename,Datum,ti,bi,aa,vo)
#wert = ""
idx = 1 # schaltet nach dem ersten schreiben checkdata frei
scrap()
Hab den Code vor vielen Jahren geschrieben, müsst mal überarbeitet werden, aber läuft noch.