[1] SQL Tutorial.
[2] dt='20180709',该字段是数据库更新时间,若不选该字段,结果将返回过去时间的数据,尤其注意多表关联操作时为每一张表指定更新时间.
[3] 若数据量超限,则应考虑分块查询并下载,如定义抓取时间WHERE bbd_dotime<=to_date('2016-12-31','yyyy-mm-dd').
[4] Matlab生成包含中文的文本文件.
1 2 3 4 | str = '長和'; fod = fopen('test.txt','wt', 'n', 'GBK'); fprintf(fod,'%s',str); fclose(fod); |
[5] 查询语言返回某表的全部字段,限定100条,示例例子.
1 2 3 4 | SELECT * FROM table_name WHERE dt='updateday' LIMIT 100; |
[6] 部分字段内容为字符型,需将特定编码转换为字符再加入查询语言,如企业类型编码.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | aa=[9300 1151 1151]; staa=[]; for ii=1:numel(aa) a=aa(ii); if ii<numel(aa) staa=[staa, '''', num2str(a), ''', ']; else staa=[staa, '''', num2str(a), '''']; end end % Output >> staa staa = '9300', '1151', '1151' >> |
[7] 部分数值型字段潜在混有字符型数值(如空格情形),M代码规避这类异常,如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | try % 'value' is the column of numeric value value=cell2mat(value); catch % check the class of every cell indclass=cellfun(@class, value, 'UniformOutput',false); % some cell may be the 'char' data type indchar=cellfun(@strcmp, indclass, repmat({'char'}, size(indclass))); ind=find(indchar==1); % delete non-numberic value value(ind)=[]; % convert to numberic matrix value=cell2mat(value); end |
[8] 动态调整单行输出字符串数量,如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | str1=[]; str2=[]; for ii=1:numel(input) % Construct Execute Sentence if ii==numel(input) str1=[str1, '%s\n']; str2=[str2, 'strtrim(num2str(input{', num2str(ii), '}))']; else str1=[str1, '%s,']; str2=[str2, 'strtrim(num2str(input{', num2str(ii), '})),']; end end cmd='fprintf(fod, '; cmd=[cmd, '''', str1, ''', ', str2, ');']; eval(cmd); |
[9] 重复SQL代码生成器.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | or operate_scope like '%音乐%' or operate_scope like '%明星包装%' or operate_scope like '%歌手%' or operate_scope like '%影视%' or operate_scope like '%动画%' or operate_scope like '%后期制作%' or operate_scope like '%艺人%' or operate_scope like '%影片%' or operate_scope like '%电影%' or operate_scope like '%专题%' or operate_scope like '%专栏%' or operate_scope like '%综艺%' or operate_scope like '%广播剧%' or operate_scope like '%电视剧%' or operate_scope like '%企业形象策划%' or operate_scope like '%摄制电影%' or operate_scope like '%影视项目的投资管理%' or operate_scope like '%图书%' or operate_scope like '%期刊%' |
[10] 示例代码.
1 2 3 4 5 6 7 8 9 | % 注册类型排除“个体” substring(company_companytype,1,2) !='93' % 规定时间范围 (esdate between date('2017-01-01') and date('2017-03-31') ) % 规定数据版本 dt='20190326' % 字段(字符型)相似查询,注意可能会收录混杂非需求数据 salary like '%null%' salary not like '%null%' |
1 2 3 4 5 6 7 8 | % 按照年月有序输出 select year(pubdate) as year, month(pubdate) as month, count(main) as num from dw.shgy_zhaobjg where dt='20190701' and (pubdate between date('2019-01-01') and date('2019-06-30') ) and title like '%上海%' group by year(pubdate), month(pubdate) order by year,month |
1 2 3 4 5 6 7 8 | % 按照年月省份等字段有序输出 SELECT year(sentence_date) as year ,month(sentence_date) as month , province, case_type, count(distinct bbd_xgxx_id) as num FROM dw.legal_adjudicative_documents WHERE dt='20190708' and case_type like '%刑事%' and (sentence_date between date('2016-01-01') and date('2019-07-08') ) group by year(sentence_date) ,month(sentence_date), province, case_type order by year, month, province, case_type |
1 2 | % 划定查询范围 company_companytype in ('5800','5600','5200'....) |
1 2 | % 模糊查询字段 regexp_like(operate_scope, '(锅炉|内燃机|轮机)') |
1 2 3 4 5 6 7 8 9 10 11 12 | --关联两表 select a.company_county, b.company_scale, a.address, a.operate_scope, a.company_industry, company_gis_lat, company_gis_lon from qyxx_basic a join (select bbd_qyxx_id, company_scale from dw.enterprisescaleout where dt ='20190802' )b on a.bbd_qyxx_id=b.bbd_qyxx_id and a.dt='20191018' and a.company_county like '11%' and a.company_type not like '%个体工商户%' and a.company_type not like '%合作社%' and regexp_like(a.enterprise_status, '存续|开业|在册|在营') and (esdate between date('1900-01-01') and date('2015-12-31') ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 | --两张查询表格按照键值合并,可能有子表数据不包含在最终结果 SELECT * FROM ( VALUES (2, 4, 20) ) AS table_1 (key_A, key_B, y1) LEFT JOIN ( VALUES (1, 3, 100), (2, 4, 200) ) AS table_2 (key_A, key_B, y2) USING (key_A, key_B) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | --重要参考 select c2.year2 as year0, c2.month2 as month0, c2.industry2 as industry0, c2.province2 as province0, c2.count1_cpws2 as chengtounum, c1.count1_cpws1 as quanbunum from (select b.year as year1, b.month as month1, a.company_industry as industry1, a.company_province as province1, count(distinct b.bbd_xgxx_id) as count1_cpws1 from (select company_industry, company_province, bbd_qyxx_id from dw.qyxx_basic where dt='20191106' and not regexp_like(company_type, '个体|农民|合作社') and not regexp_like(enterprise_status, '注销|吊销') ) a join (select bbd_qyxx_id, bbd_xgxx_id, year(sentence_date) as year , month(sentence_date) as month from dw.legal_adjudicative_documents where dt='20191106' and (to_char(sentence_date, 'yyyy-mm-dd')>='2017-01-01' ) and bbd_qyxx_id is not null )b on a.bbd_qyxx_id=b.bbd_qyxx_id group by b.year, b.month, a.company_province, a.company_industry ) c1 right join (select b2.year as year2, b2.month as month2, a2.company_industry as industry2, a2.company_province as province2, count(distinct b2.bbd_xgxx_id) as count1_cpws2 from (select company_industry, company_province, bbd_qyxx_id from dw.qyxx_basic where dt='20191106' and not regexp_like(company_type, '个体|农民|合作社') and not regexp_like(enterprise_status, '注销|吊销') and regexp_like(company_name, '建设投资|建设开发|投资开发|投资控股|投资发展|投资集团|国有资产运营|国有资本经营') and company_companytype in ('1110', '1140', '1213', '1223', '3100', '3200', '3300', '3400', '3500') ) a2 join (select bbd_qyxx_id, bbd_xgxx_id, year(sentence_date) as year , month(sentence_date) as month from dw.legal_adjudicative_documents where dt='20191106' and (to_char(sentence_date, 'yyyy-mm-dd')>='2017-01-01' ) and bbd_qyxx_id is not null )b2 on a2.bbd_qyxx_id=b2.bbd_qyxx_id group by b2.year, b2.month, a2.company_province, a2.company_industry ) c2 on c1.year1=c2.year2 and c1.month1=c2.month2 and c1.industry1=c2.industry2 and c1.province1=c2.province2 order by c2.year2,c2.month2,c2.industry2,c2.province2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | --重点体会CASE用法,注意前SELECT后GROUP BY SELECT (case when a.company_county like '1101%' then '北京市' when a.company_county like '1201%' then '天津市' when a.company_county like '1301%' then '石家庄市' when a.company_county like '1501%' then '呼和浩特市' when a.company_county like '1401%' then '太原市' when a.company_county like '2101%' then '沈阳市' when a.company_county like '2102%' then '大连市' when a.company_county like '2201%' then '长春市' when a.company_county like '2301%' then '哈尔滨市' when a.company_county like '3101%' then '上海市' when a.company_county like '3201%' then '南京市' when a.company_county like '3301%' then '杭州市' when a.company_county like '3302%' then '宁波市' when a.company_county like '3401%' then '合肥市' when a.company_county like '3501%' then '福州市' when a.company_county like '3502%' then '厦门市' when a.company_county like '3601%' then '南昌市' when a.company_county like '3701%' then '济南市' when a.company_county like '3702%' then '青岛市' when a.company_county like '4101%' then '郑州市' when a.company_county like '4201%' then '武汉市' when a.company_county like '4301%' then '长沙市' when a.company_county like '4401%' then '广州市' when a.company_county like '4403%' then '深圳市' when a.company_county like '4501%' then '南宁市' when a.company_county like '4601%' then '海口市' when a.company_county like '500%' then '重庆市' when a.company_county like '5101%' then '成都市' when a.company_county like '5201%' then '贵阳市' when a.company_county like '5301%' then '昆明市' when a.company_county like '6101%' then '西安市' when a.company_county like '6201%' then '兰州市' when a.company_county like '6301%' then '西宁市' when a.company_county like '6401%' then '银川市' when a.company_county like '6501%' then '乌鲁木齐市' when a.company_county like '5401%' then '拉萨市' else '其他' end) as city, count(b.company_name) from dw.qyxx_basic a right join dw.recruit b on a.bbd_qyxx_id=b.bbd_qyxx_id where b.pubdate >=to_date('2017-01-01','yyyy-mm-dd') and b.pubdate <=to_date('2017-03-31','yyyy-mm-dd') and b.dt='20180723' and a.dt='20180723' group by (case when a.company_county like '1101%' then '北京市' when a.company_county like '1201%' then '天津市' when a.company_county like '1301%' then '石家庄市' when a.company_county like '1501%' then '呼和浩特市' when a.company_county like '1401%' then '太原市' when a.company_county like '2101%' then '沈阳市' when a.company_county like '2102%' then '大连市' when a.company_county like '2201%' then '长春市' when a.company_county like '2301%' then '哈尔滨市' when a.company_county like '3101%' then '上海市' when a.company_county like '3201%' then '南京市' when a.company_county like '3301%' then '杭州市' when a.company_county like '3302%' then '宁波市' when a.company_county like '3401%' then '合肥市' when a.company_county like '3501%' then '福州市' when a.company_county like '3502%' then '厦门市' when a.company_county like '3601%' then '南昌市' when a.company_county like '3701%' then '济南市' when a.company_county like '3702%' then '青岛市' when a.company_county like '4101%' then '郑州市' when a.company_county like '4201%' then '武汉市' when a.company_county like '4301%' then '长沙市' when a.company_county like '4401%' then '广州市' when a.company_county like '4403%' then '深圳市' when a.company_county like '4501%' then '南宁市' when a.company_county like '4601%' then '海口市' when a.company_county like '500%' then '重庆市' when a.company_county like '5101%' then '成都市' when a.company_county like '5201%' then '贵阳市' when a.company_county like '5301%' then '昆明市' when a.company_county like '6101%' then '西安市' when a.company_county like '6201%' then '兰州市' when a.company_county like '6301%' then '西宁市' when a.company_county like '6401%' then '银川市' when a.company_county like '6501%' then '乌鲁木齐市' when a.company_county like '5401%' then '拉萨市' else '其他' end) |
1 2 3 4 5 | % 字符规范化 dat=cellfun(@(x) char(x), dat, 'UniformOutput', false); dat=cellfun(@(x) strtrim(x), dat, 'UniformOutput', false); dat(:, 1)=cellfun(@(x) [x, '*****'], dat(:, 1), 'UniformOutput', false); dat(:, 1)=cellfun(@(x) x(1:3), dat(:, 1), 'UniformOutput', false); |
1 2 3 4 5 6 | % 解析最底层Cell newinp=tempinp; while any(cellfun(@iscell, newinp)) newinp= [newinp{cellfun(@iscell, newinp)} newinp(~cellfun(@iscell, newinp))]; end newinp=newinp'; |
1 2 | --显示dt列表 show partitions from qyxx_wanfang_zhuanli |
1 2 3 4 5 6 7 8 9 | --存续企业示例 SELECT count(bbd_qyxx_id) FROM dw.qyxx_basic WHERE dt='20200720' and not regexp_like(company_type , '个体|农民专业合作|合作社') and operate_scope like '%水泥%' and not regexp_like(operate_scope, '石膏|预制构件|轻质建筑材料|石棉|玻璃纤维|木材|玻璃钢') and (esdate between date('1900-01-01') and date('2014-12-31') ) and ((company_enterprise_status like '存续') or (cancel_date between date('2014-01-01') and date('2014-12-31')) ) |
1 2 3 4 5 6 7 8 | --存续企业示例之二 select count(distinct bbd_qyxx_id) as company_num from (select esdate,cancel_date,revoke_date,bbd_qyxx_id from dw.qyxx_basic where dt = '20210505' and esdate<= date('2020-12-31') and (cancel_date> date('2020-12-31') or cancel_date is null) and (revoke_date> date('2020-12-31') or revoke_date is null) |
1 2 3 4 5 6 7 | --企业筛选条件 --1、剔除个体户和农专社 and company_companytype not in ('9100','9200','9300','9310','9320') --2、剔除个体工商户、外资企业、港澳台企业等 and company_companytype not in ('9100','9200','9300','9310','9320','5000','5500','5100','5110','5120','5130','5140','5150','5160','5190','5200','5210','5220','5230','5240','5290','5300','5310','5390','5400','5410','5420','5430','5490','5800','5810','5820','5830','5840','5890','6000','6100','6110','6120','6130','6140','6150','6160','6170','6190','6200','6210','6220','6230','6240','6250','6260','6270','6290','6300','6310','6320','6390','6400','6410','6420','6430','6490','6500','6510','6600','6610','6700','6710','6800','6810','6820','6830','6840','6890','7000','7100','7110','7120','7130','7190','7200','7300','7310','7390','9925','9926','9927','9929','9931','9936','9937','9941','1120','1121','1122','1123','1211','1221','2120','2121','2122','2123','2211','2221') --3、保留存续企业 and company_enterprise_status in ('正常','存续','开业','个体转企业','待迁入','迁入') |
No comments:
Post a Comment