将爬取数据存入mysql

百变鹏仔 4个月前 (02-07) #Python

文章标签数据

python 与 mysql 数据连接用pymysql

<span style="color: #0000ff">import<span style="color: #000000"> pymysqlconn =pymysql.connect(host=<span style="color: #800000">'<span style="color: #800000">127.0.0.1<span style="color: #800000">',user=<span style="color: #800000">'<span style="color: #800000">root<span style="color: #800000">',passw<a href="http://www.php.cn/wiki/1360.html" target="_blank">ord</a>=<span style="color: #800000">'<span style="color: #800000">123456<span style="color: #800000">',db=<span style="color: #800000">'<span style="color: #800000">company<span style="color: #800000">',char<a href="http://www.php.cn/code/8209.html" target="_blank">set</a>=<span style="color: #800000">"<span style="color: #800000">utf8<span style="color: #800000">"<span style="color: #000000">)cur=<span style="color: #000000">conn.cursor()sql=‘’‘<br/><br/>’‘’<span style="color: #000000">employee=<span style="color: #000000">cur.execute(sql)conn.commit()cur.close()conn.close()</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span>

基本操作大概就这样
这次爬取的是淘宝商品页面

过程就略了放代码

import requestsimport reimport pymysqldef getHTMLtext(url):    try:       r=requests.get(url,timeout=100)       r.raise_for_status()       r.encoding=r.apparent_encoding       return r.text    except:        return ""def getpage(itl,html):    try:        plt=re.findall(r'"view_price":"[d.]*"',html)        nlt=re.findall(r'"raw_title":".*?"',html)        for i in range(len(plt)):            price = eval(plt[i].split(':')[1])            title = eval(nlt[i].split(':')[1])            itl.append([price, title])    except:       print("")def printgoods(itl):    tplt = "{:2}	{:8}	{:16}"    print(tplt.format("序号", "价格", "商品名称"))    count = 0    conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', db='company',charset="utf8")    cur = conn.cursor()    sqlc = '''                create table coffee(                id int(11) not null auto_increment primary key,                name varchar(255) not null,                price float not null)DEFAULT CHARSET=utf8;                '''    try:        A = cur.execute(sqlc)        conn.commit()        print('成功')    except:        print("错误")    for g in itl:        count = count + 1        b=tplt.format(count, g[0], g[1])        sqla = '''        insert into  coffee(name,price)        values(%s,%s);       '''        try:            B = cur.execute(sqla,(g[1],g[0]))            conn.commit()            print('成功')        except:            print("错误")        # save_path = 'D:/taobao.txt'        # f=open(save_path,'a')        #        # f.write(b+'')        # f.close()    conn.commit()    cur.close()    conn.close()def main():    goods="咖啡"    depth =2    start_url='https://s.taobao.com/search?q='+goods    List =[]    for i in range(depth):        try:            url =start_url +"&s="+ str(i*44)            html=getHTMLtext(url)            getpage(List,html)        except:           continue    print(printgoods(List))    # savefiles(data)main()

可以看到所需要的数据已经存入数据库了

文章推荐

将爬取数据存入mysql

Python实现字典的key和values的交换

使用Python脚本来获取Cisco设备信息的示例

Python的Django中django-userena组件的简单使用教程

零基础写python爬虫之神器正则表达式

零基础写python爬虫之抓取百度贴吧代码分享