- ©«¤l
- 552
- ¥DÃD
- 3
- ºëµØ
- 0
- ¿n¤À
- 578
- ÂI¦W
- 0
- §@·~¨t²Î
- win7
- ³nÅ骩¥»
- office 2010
- ¾\ŪÅv
- 50
- ©Ê§O
- ¨k
- µù¥U®É¶¡
- 2015-2-8
- ³Ì«áµn¿ý
- 2024-7-9
|
¥»©«³Ì«á¥Ñ lpk187 ©ó 2016-9-29 12:53 ½s¿è
¦^´_ 30# c_c_lai
¤@Ó«Ü©_©Çªº°ÝÃD¡A§Ú¤]·d¤£À´¡I
¥H¤U±³o¬qµ{¦¡½X¨Ó»¡¡A·Ó²z»¡¡A·í
try:
c = conn.cursor()
sql="SELECT * FROM [" + regs[0] + "] WHERE ¤é´Á = '" + date + "';"
c.execute(sql)
c.close()
§ä¤£¨ì¦¹¤é´Á¡AÀ³¸Ó·|²£¥Í¿ù»~¡A¶Çµ¹except Exception as e: °õ¦æ¤~¹ï¡A¦ý
¦bJupyter NoteBook «o¬O¤£·|²£¥Í¿ù»~(¥H¤@¯ëªºpyhton ·|¶Ç¦^¿ù»~°T®§)
§Ú¸ÕµÛ¥hSQLiteStudio¤¤¥´¤WÀ˸߿ù»~ªº¤é´Á¡AÁöµM§ä¤£¨ì¡A¦ý¤]¨S¦³²£¥Í¿ù»~¡A
©Ò¥H§Ú§ï¤Fµ{¦¡½X¡A§A¸Õ¬Ý¬Ý¡A½Ð½Æ»s¥H¤U¥N½X
¨ä¤¤¤]×¥¿Â^¨úºô¸ô¸ê®Æªº°j°é¡A²{¦bÂ^¨úÅܫܧÖ- #-*- coding:utf-8 -*-
- import requests
- from bs4 import BeautifulSoup
- import random
- import os
- import sqlite3
- import datetime
- import time
- # ÀH¾÷§ó§ï headers
- def head_random():
- for i in range(10):
- hs = ['Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
- 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16',
- 'Mozilla/5.0 (Linux; U; Android 4.1.2; zh-tw; GT-I9300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30',
- 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10'
- 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)']
- hes = {"User-Agent":random.choice(hs)}
- return hes
- # ¶°¥xÃҩҿĸê¿Ä¨é¾lÃB¥þ³¡¸ê®Æ
- def read_network_data(date,from_number):
- url = 'http://www.twse.com.tw/ch/trading/exchange/MI_MARGN/MI_MARGN.php'
- head =head_random()
- payload = {"download":'',
- "qdate":date,
- "selectType":"ALL"}
- res = requests.post(url, headers=head, data=payload)
- soup=BeautifulSoup(res.text, "lxml")
- tab = soup.select('tbody')[from_number] # 2Óªí®æ¤§2
- regs=[] # ªì©l°}¦C
-
- le=len(tab.select('tr')[0].select('td'))
- for i in range(le): # ©w¸q2ºû°}¦Cºû«×
- regs.append([])
- for tr in tab.select('tr'): #Â^¨úªí®æ¸ê®Æ
- for i in range(le):
- if i==0 or i==1 or i==15: #§â¤å¦r¦ê¤¤ªºªÅ¥Õ¤å¦r¥h°£±¼
- strs=tr.select('td')[i].text
- strs=str(strs.replace(' ',''))
- regs[i].append(strs)
- else: # §â¼Æ¦r¤¤ªº¤d¤À²Å¸¹ "," ¥h°£±¼
- ints=tr.select('td')[i].text
- ints=str(ints.replace(',',''))
- regs[i].append(ints)
- return regs
- def check_db_exists(regs):
- if not os.path.exists('twsedata.db'): # Àˬdtwsedata.db¬O§_¦s¦b(©M¦¹µ{¦¡½X¬Û¦Pªº¸ê®Æ§¨)
- conn = sqlite3.connect('twsedata.db') # ¦pªG¨S¦³¡A´N«Ø¥ß¤@Ӫťոê®Æ®w¡A(sqlite3.connect) ¤]¬O³sµ²»y¥y¡A¦³¸ê®Æ®w´N³sµ²¡A¨S¦³´N«Ø¥ß
- sql = "CREATE TABLE ªÑ²¼(ªÑ²¼¥N¸¹ char(10) PRIMARY KEY,ªÑ²¼¦WºÙ char(20));"
- conn.execute(sql) #¦b·s¸ê®Æ®w«Ø¥ßªÅ¥Õ¸ê®Æªí (ªÑ²¼)
- sql = "CREATE TABLE ¿Ä¸ê¿Ä¨é«H¥Î¥æ©ö²Îp( \
- ¤é´Á datetime, \
- ¶µ¥Ø char(20), \
- ¶R¶i INTEGER, \
- ¶R¥X INTEGER, \
- ²{ª÷¡]¨é¡^ÀvÁÙ INTEGER, \
- «e¤é¾lÃB INTEGER, \
- ¤µ¤é¾lÃB INTEGER);"
- conn.execute(sql) #¦b·s¸ê®Æ«Ø¥ß ªÅ¥Õ¸ê®Æªí(¿Ä¸ê¿Ä¨é«H¥Î¥æ©ö²Îp)
- conn.commit() # Àx¦s
- conn.close() #Ãö³¬
-
- n=0
- conn = sqlite3.connect('twsedata.db') # conn©M¤W¤@Ó if ..¤¤ªº conn¬O¤£¤@¼Ëªº ¡A¤W¤@Ó¬O¨S¦³¸ê®Æ®w®É¤~§@¥Î¡A¬°§½³¡ÅܼƦӦ¹³B¡A«h¬°¥H¤U¨Ï¥Îªº¥þ§½ÅܼÆ
- for i in regs[0]: #¥Hª¦¦^¨Óªº¸ê®Æ°}¦Cªº²Ä¤@¦C¬°°j°é
- try: #¥ý¹Á¸Õ¬d¸ß i Åܼƪº¸ê®Æªí¬O§_¦s¦b¡AY¬O¤£¦s¦b·|µo¥X Exception ªº¿ù»~°T®§
- c = conn.cursor()
- sql = "select * from [" + i + "]"
- c.execute(sql) # table does not exist
- c.close()
- except Exception as e: #·ítry µo¥X¿ù»~°T®§(¤]´N¬OŪ¤£¨ì i ¸ê®Æªí)¡A¦¹®É«Ø¥ß¤@Ó i ¸ê®Æªí
- c = conn.cursor()
- sql="CREATE TABLE [" + i + "](¤é´Á DATETIME PRIMARY KEY, \
- ¸ê¶R¶i INTEGER, \
- ¸ê½æ¥X INTEGER, \
- ¸ê²{ª÷ÀvÁÙ INTEGER, \
- ¸ê«e¤é¾lÃB INTEGER, \
- ¸ê¤µ¤é¾lÃB INTEGER, \
- ¸êÃB INTEGER, \
- ¨é¶R¶i INTEGER, \
- ¨é½æ¥X INTEGER, \
- ¨é²{ª÷ÀvÁÙ INTEGER, \
- ¨é«e¤é¾lÃB INTEGER, \
- ¨é¤µ¤é¾lÃB INTEGER, \
- ¨éÃB INTEGER, \
- ¸ê¨é¤¬©è INTEGER, \
- µù°O char(5));"
- c.execute(sql) #«Ø¥ß i ¸ê®Æªí
- sql="INSERT INTO ªÑ²¼(ªÑ²¼¥N¸¹, ªÑ²¼¦WºÙ) \
- VALUES ('" + regs[0][n] + "','" + regs[1][n] + "');"
- c.execute(sql) # ¦bªÑ²¼¸ê®Æªí¤¤¼g¤JªÑ²¼¥N¸¹¡BªÑ²¼¦WºÙ
- c.close() # Ãö³¬ c ª«¥ó
- n += 1
- conn.commit()
- conn.close()
- def add_data(regs,date):
- conn = sqlite3.connect('twsedata.db')
- for i in range(len(regs[0])):
- try: #Àˬd¬Y¤é´Á¬O§_¦s¦b©ó¸ê®Æªí¤¤¡A·í¬Y¤é´Á¦s¦b®É¡A¸õÂ÷³o¦¸ªº´`Àô¡A³o¸Ì«üªº¶È¬O³o¦¸¡A¤£¬O¸õÂ÷ for
- c = conn.cursor()
- sql="INSERT INTO [" + regs[0][i] + "](¤é´Á,¸ê¶R¶i,¸ê½æ¥X,¸ê²{ª÷ÀvÁÙ,¸ê«e¤é¾lÃB, \
- ¸ê¤µ¤é¾lÃB,¸êÃB,¨é¶R¶i,¨é½æ¥X,¨é²{ª÷ÀvÁÙ, \
- ¨é«e¤é¾lÃB,¨é¤µ¤é¾lÃB,¨éÃB,¸ê¨é¤¬©è,µù°O) \
- VALUES('" + date + "'," + regs[2][i] + "," + regs[3][i] + ", \
- " + regs[4][i] + ", " + regs[5][i] + "," + regs[6][i] + ", \
- " + regs[7][i] + "," + regs[8][i] + ", " + regs[9][i] + ", \
- " + regs[10][i] + "," + regs[11][i] + "," + regs[12][i] + ", \
- " + regs[13][i] + "," + regs[14][i]+ ",'" + regs[15][i] + "') ;"
- c.execute(sql)
- except Exception as e: #·í¤é´Á¤£¦s¦b®É¡A¼g¤J¸ê®Æ¨ì¸ê®Æªí¤¤
- pass
- else:
- c.close()
-
- conn.commit()
- conn.close()
- # ¥H¤U¬°µ{¦¡ªº±Ò©lÂI¡G
- StartDate=input('½Ð¿é¤J¶}©lÂ^¨ú¤é´Á(¦è¤¸¤é´Á¦p¡G2016/1/1)¡G')
- EndDate=input('½Ð¿é¤Jµ²§ôÂ^¨ú¤é´Á(¦è¤¸¤é´Á¦p¡G2016/1/31)¡G')
- #³B²z¤é´ÁÁÙ¯uªº«Ü³Â·Ð
- ts=time.time()
- StartDate = time.strptime(StartDate, "%Y/%m/%d")
- EndDate =time.strptime(EndDate, "%Y/%m/%d")
- StartDate = datetime.date(StartDate[0], StartDate[1], StartDate[2])
- EndDate = datetime.date(EndDate[0], EndDate[1], EndDate[2])
- RangeDate = datetime.timedelta(days = 1)
- while StartDate <= EndDate:
- yy,mm,dd=str(StartDate).split('-')
- dat=datetime.datetime(int(yy), int(mm), int(dd))
- dd=dat.strftime('%Y/%m/%d')
- year=str(int(dd[0:4])-1911)
- date=dd.replace(dd[0:4], year)
- print('¥Ø«e°õ¦æºô¸ôÂ^¨ú',date ,'¤éªº¸ê®Æ¡A½Ðµy«á...')
- re=read_network_data(date,1)
- if re[0][0]=='¬dµL¸ê®Æ':
- print(date,'¤é¡A¥i¯à¬°¥ð¥«¤é¡A¬dµL¸ê®Æ¡C')
- StartDate = StartDate + RangeDate
- continue
- print('Àˬd¬O§_¦³·sªÑ²¼¥[¤J¡A½Ðµy«á...')
- check_db_exists(regs=re)
- print('±N¸ê®Æ¼g¤J¸ê®Æ®w¤¤¡A½Ðµy«á...')
- add_data(regs=re,date=dd)
- StartDate = StartDate + RangeDate
- te = time.time()
- print('ºI¦Ü¥Ø«e¡A®É¶¡¤w¯Ó¶O¡G', int(te - ts),'¬í')
- print('°õ¦æ§¹²¦¡I')
½Æ»s¥N½X |
|