python

"python"

Posted by zwt on November 19, 2020

环境

1
1. jupyter kernel: python -m ipykernel install --user --name py3.6

正则

1
2
3
re.findall(pattern, str): 输出为list
re.match(pattern, str, flags):通过.group来获取匹配结果
re.search(pattern, string, flags):和match类似

一些例子

车牌号:

1
2
3
([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z]{1}[A-Z]-
{0,1}(([0-9]{5}[DF])|([DF]([A-HJ-NP-Z0-9])[0-9]{4})))|([京津沪渝冀豫
云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z]{1}[A-Z]-{0,1}[A-HJ-NP-Z0-9]{4}[A-HJ-NP-Z0-9挂学警港澳]{1})

^[.\s]*[一二三]+[.\s]*$匹配一二三任意一个或多个字组合一次或多次

字典

字典按照value排序

1
reply = sorted(reply.items(), key=lambda item: item[1], reverse=True)

运行超时跳过

1
2
3
4
5
6
7
import time
import eventlet#导入eventlet这个模块
eventlet.monkey_patch()#必须加这条代码
with eventlet.Timeout(2,False):#设置超时时间为2秒
   time.sleep(4)
   print('没有跳过这条输出')
print('跳过了输出')

注意

1
2
3
4
1.break 跳出最里层的一个循环
2.continue 跳过当前循环的数据
3.连接mysql或者其他的密码字符时,字母中含有特殊符号,需要编码quote('password')
4.multiprocessing.cpu_count()获取cpu核数

时间戳

获取时间

1
2
3
4
import datetime
now_time = datetime.datetime.now()
str_time = datetime.datetime.now().strftime('%Y-%m-%d')
str_time = datetime.datetime.strptime(string,'%Y-%m-%d %H:%M:%S')

比较大小

1
2
3
4
5
6
7
8
9
s1 = "2021-07-13 18:49:12 169"
s1 = s1.split(" ")
s1 = ' '.join(s1[:2]) + '.' + s1[2]
s1 = int(time.mktime(time.strptime(s1, "%Y-%m-%d %H:%M:%S.%f")))
s = "2021-07-13 18:50:12 169"
s = s.split(" ")
s = ' '.join(s[:2]) + '.' + s[2]
s = int(time.mktime(time.strptime(s, "%Y-%m-%d %H:%M:%S.%f")))
print(s - s1)

多线程多进程

pool

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 一般用法
from multiprocessing import Pool
pool = Pool(processes=10)
result=[]
    for i  in xrange(50000):
    	result.append(pool.apply_async(test, args=(i,)))#维持执行的进程总数为10,当一个进程执行完后添加新进程.       
    pool.join()
for i in result:
        print i.get()  
# 高级用法(依据子进程的返回值来区分是否来结束)
import Queue
def test(p):
    time.sleep(0.001)
    if p==10000:
        return True
    else:
        return False
pool = Pool(processes=10)
q=Queue.Queue()
for i  in xrange(50000):
	q.put(pool.apply_async(test, args=(i,)))#维持执行的进程总数为10,当一个进程执行完后添加新进程.  

相似度计算

scipy

1
2
3
4
5
from scipy import spatial
vec1 = [1, 2, 3, 4]
vec2 = [5, 6, 7, 8]
cos_sim = 1 - spatial.distance.cosine(vec1, vec2)
print(cos_sim)

numpy

1
2
3
4
5
import numpy as np
vec1 = np.array([1, 2, 3, 4])
vec2 = np.array([5, 6, 7, 8])
cos_sim = vec1.dot(vec2) / np.linalg.norm(vec1) * np.linalg.norm(vec2)
print(cos_sim)

sklearn

1
2
3
4
5
6
import numpy as np
from sklearn.metric.pairwise import cosine_similarity()
vec1 = np.array([1, 2, 3, 4])
vec2 = np.array([5, 6, 7, 8])
cos_sim = cosine_similarity(vec1.reshape(1, -1), vec2.reshape(1, -1))
print(cos_sim[0][0])

torch

1
2
3
4
5
6
import torch
import torch.nn.functional as F
vec1 = torch.FloatTensor([1, 2, 3, 4])
vec2 = torch.FloatTensor([5, 6, 7, 8])
cos_sim = F.cosine_similarity(vec1, vec2, dim=0)
print(cos_sim)

json文件比较

1
2
3
4
5
from deepdiff import DeepDiff
a = {"a":0,"b":[{"v":"name","age":100}]}
b = {"a":0.1,"b":[{"v":"name1","age":100}]}
diff = DeepDiff(a,b,ignore_order=True)
print(diff.items())