采集项目数据处理详细步骤

Outshine 2019-05-04 | 阅读 3491

第一步:去除新表重复记录

DELETE FROM 表名 WHERE memberid IN (SELECT * FROM (SELECT memberid FROM 表名 GROUP BY memberid HAVING COUNT(memberid) > 1) t1) AND id NOT IN (SELECT * FROM (SELECT MIN(id) FROM 表名 GROUP BY memberid HAVING COUNT(memberid) > 1) t2)

第二步:备注栏标注 update 表名 set remark='new' 同时 update alibaba set remark=null

第三步:把新数据按网址类型插入新建的表: insert into alibaba_new_one(company_name,company_url,memberid,is_cxt,contactor,phone,mobile,fax,address,product,business_scope,business_model,legal_person,reg_no,reg_date,company_type,update_on,remark) SELECT company_name,company_url,memberid,is_cxt,contactor,phone,mobile,fax,address,product,business_scope,business_model,legal_person,reg_no,reg_date,company_type,update_on,remark FROM alibaba_new where company_url not LIKE 'https://www.1688.com%' ORDER BY id DESC

第四步:将原数据按ID递增方式插入新建的表 插入表一 insert into alibaba_new_one(company_name,company_url,memberid,is_cxt,contactor,phone,mobile,fax,address,product,business_scope,business_model,legal_person,reg_no,reg_date,company_type,update_on,remark) SELECT company_name,company_url,memberid,is_cxt,contactor,phone,mobile,fax,address,product,business_scope,business_model,legal_person,reg_no,reg_date,company_type,update_on,remark FROM alibaba_update ORDER BY id ASC

多表去重复

delete from alibaba_tmp where memberid in (select memberid from alibaba) OR memberid in (select memberid from alibaba_two) OR memberid in (select memberid from alibaba_nophone)

数据表更新

update alibaba inner join alibaba_tmp on alibaba.memberid=alibaba_tmp.memberid set alibaba.is_cxt=alibaba_tmp.is_cxt;

update alibaba_two inner join alibaba_tmp on alibaba_two.memberid=alibaba_tmp.memberid set alibaba_two.is_cxt=alibaba_tmp.is_cxt;

update alibaba_two_copy inner join alibaba_tmp on alibaba_two_copy.memberid=alibaba_tmp.memberid set alibaba_two_copy.company_url=alibaba_tmp.company_url;