Simon Shi的小站

人工智能,机器学习, 强化学习,大模型,自动驾驶

0%

Transform

[TOC]

angle2matrix

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def angle2matrix(angles):
''' get rotation matrix from three rotation angles(degree). right-handed.
Args:
angles: [3,]. x, y, z angles
x: pitch. positive for looking down.
y: yaw. positive for looking left.
z: roll. positive for tilting head right.
Returns:
R: [3, 3]. rotation matrix.
'''
x, y, z = np.deg2rad(angles[0]), np.deg2rad(angles[1]), np.deg2rad(angles[2])
# x
Rx=np.array([[1, 0, 0],
[0, cos(x), -sin(x)],
[0, sin(x), cos(x)]])
# y
Ry=np.array([[ cos(y), 0, sin(y)],
[ 0, 1, 0],
[-sin(y), 0, cos(y)]])
# z
Rz=np.array([[cos(z), -sin(z), 0],
[sin(z), cos(z), 0],
[ 0, 0, 1]])

R=Rz.dot(Ry.dot(Rx))
return R.astype(np.float32)

angle2matrix_3ddfa

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def angle2matrix_3ddfa(angles):
''' get rotation matrix from three rotation angles(radian). The same as in 3DDFA.
Args:
angles: [3,]. x, y, z angles
x: pitch.
y: yaw.
z: roll.
Returns:
R: 3x3. rotation matrix.
'''
# x, y, z = np.deg2rad(angles[0]), np.deg2rad(angles[1]), np.deg2rad(angles[2])
x, y, z = angles[0], angles[1], angles[2]

# x
Rx=np.array([[1, 0, 0],
[0, cos(x), sin(x)],
[0, -sin(x), cos(x)]])
# y
Ry=np.array([[ cos(y), 0, -sin(y)],
[ 0, 1, 0],
[sin(y), 0, cos(y)]])
# z
Rz=np.array([[cos(z), sin(z), 0],
[-sin(z), cos(z), 0],
[ 0, 0, 1]])
R = Rx.dot(Ry).dot(Rz)
return R.astype(np.float32)

—————————————— 1. transform(transform, project, camera).

———- 3d-3d transform. Transform obj in world space

rotate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def rotate(vertices, angles):
''' rotate vertices.
X_new = R.dot(X). X: 3 x 1
Args:
vertices: [nver, 3].
rx, ry, rz: degree angles
rx: pitch. positive for looking down
ry: yaw. positive for looking left
rz: roll. positive for tilting head right
Returns:
rotated vertices: [nver, 3]
'''
R = angle2matrix(angles)
rotated_vertices = vertices.dot(R.T)

return rotated_vertices

def similarity_transform(vertices, s, R, t3d):
''' similarity transform. dof = 7.
3D: s*R.dot(X) + t
Homo: M = [[sR, t],[0^T, 1]]. M.dot(X)
Args:(float32)
vertices: [nver, 3].
s: [1,]. scale factor.
R: [3,3]. rotation matrix.
t3d: [3,]. 3d translation vector.
Returns:
transformed vertices: [nver, 3]
'''
t3d = np.squeeze(np.array(t3d, dtype = np.float32))
transformed_vertices = s * vertices.dot(R.T) + t3d[np.newaxis, :]

return transformed_vertices

normalize

1
2
3
4
5
6
7
## -------------- Camera. from world space to camera space
# Ref: https://cs184.eecs.berkeley.edu/lecture/transforms-2
def normalize(x):
epsilon = 1e-12
norm = np.sqrt(np.sum(x**2, axis = 0))
norm = np.maximum(norm, epsilon)
return x/norm

lookat camera

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def lookat_camera(vertices, eye, at = None, up = None):
""" 'look at' transformation: from world space to camera space
standard camera space:
camera located at the origin.
looking down negative z-axis.
vertical vector is y-axis.
Xcam = R(X - C)
Homo: [[R, -RC], [0, 1]]
Args:
vertices: [nver, 3]
eye: [3,] the XYZ world space position of the camera.
at: [3,] a position along the center of the camera's gaze.
up: [3,] up direction
Returns:
transformed_vertices: [nver, 3]
"""
if at is None:
at = np.array([0, 0, 0], np.float32)
if up is None:
up = np.array([0, 1, 0], np.float32)

eye = np.array(eye).astype(np.float32)
at = np.array(at).astype(np.float32)
z_aixs = -normalize(at - eye) # look forward
x_aixs = normalize(np.cross(up, z_aixs)) # look right
y_axis = np.cross(z_aixs, x_aixs) # look up

R = np.stack((x_aixs, y_axis, z_aixs))#, axis = 0) # 3 x 3
transformed_vertices = vertices - eye # translation
transformed_vertices = transformed_vertices.dot(R.T) # rotation
return transformed_vertices

orthographic_project 正交投影

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
## --------- 3d-2d project. from camera space to image plane
# generally, image plane only keeps x,y channels, here reserve z channel for calculating z-buffer.
def orthographic_project(vertices):
''' scaled orthographic projection(just delete z)
assumes: variations in depth over the object is small relative to the mean distance from camera to object
x -> x*f/z, y -> x*f/z, z -> f.
for point i,j. zi~=zj. so just delete z
** often used in face
Homo: P = [[1,0,0,0], [0,1,0,0], [0,0,1,0]]
Args:
vertices: [nver, 3]
Returns:
projected_vertices: [nver, 3] if isKeepZ=True. [nver, 2] if isKeepZ=False.
'''
return vertices.copy()

perspective_project 透视投影

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def perspective_project(vertices, fovy, aspect_ratio = 1., near = 0.1, far = 1000.):
''' perspective projection.
Args:
vertices: [nver, 3]
fovy: vertical angular field of view. degree.
aspect_ratio : width / height of field of view
near : depth of near clipping plane
far : depth of far clipping plane
Returns:
projected_vertices: [nver, 3]
'''
fovy = np.deg2rad(fovy)
top = near*np.tan(fovy)
bottom = -top
right = top*aspect_ratio
left = -right

#-- homo
P = np.array([[near/right, 0, 0, 0],
[0, near/top, 0, 0],
[0, 0, -(far+near)/(far-near), -2*far*near/(far-near)],
[0, 0, -1, 0]])
vertices_homo = np.hstack((vertices, np.ones((vertices.shape[0], 1)))) # [nver, 4]
projected_vertices = vertices_homo.dot(P.T)
projected_vertices = projected_vertices/projected_vertices[:,3:]
projected_vertices = projected_vertices[:,:3]
projected_vertices[:,2] = -projected_vertices[:,2]

#-- non homo. only fovy
# projected_vertices = vertices.copy()
# projected_vertices[:,0] = -(near/right)*vertices[:,0]/vertices[:,2]
# projected_vertices[:,1] = -(near/top)*vertices[:,1]/vertices[:,2]
return projected_vertices

to_image

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def to_image(vertices, h, w, is_perspective = False):
''' change vertices to image coord system
3d system: XYZ, center(0, 0, 0)
2d image: x(u), y(v). center(w/2, h/2), flip y-axis.
Args:
vertices: [nver, 3]
h: height of the rendering
w : width of the rendering
Returns:
projected_vertices: [nver, 3]
'''
image_vertices = vertices.copy()
if is_perspective:
# if perspective, the projected vertices are normalized to [-1, 1]. so change it to image size first.
image_vertices[:,0] = image_vertices[:,0]*w/2
image_vertices[:,1] = image_vertices[:,1]*h/2
# move to center of image
image_vertices[:,0] = image_vertices[:,0] + w/2
image_vertices[:,1] = image_vertices[:,1] + h/2
# flip vertices along y-axis.
image_vertices[:,1] = h - image_vertices[:,1] - 1
return image_vertices

estimate_affine_matrix_3d23d

1
2
3
4
5
6
7
8
9
10
11
12
#### -------------------------------------------2. estimate transform matrix from correspondences.
def estimate_affine_matrix_3d23d(X, Y):
''' Using least-squares solution
Args:
X: [n, 3]. 3d points(fixed)
Y: [n, 3]. corresponding 3d points(moving). Y = PX
Returns:
P_Affine: (3, 4). Affine camera matrix (the third row is [0, 0, 0, 1]).
'''
X_homo = np.hstack((X, np.ones([X.shape[1],1]))) #n x 4
P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
return P

estimate_affine_matrix_3d22d

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def estimate_affine_matrix_3d22d(X, x):
''' Using Golden Standard Algorithm for estimating an affine camera
matrix P from world to image correspondences.
See Alg.7.2. in MVGCV
Code Ref: https://github.com/patrikhuber/eos/blob/master/include/eos/fitting/affine_camera_estimation.hpp
x_homo = X_homo.dot(P_Affine)
Args:
X: [n, 3]. corresponding 3d points(fixed)
x: [n, 2]. n>=4. 2d points(moving). x = PX
Returns:
P_Affine: [3, 4]. Affine camera matrix
'''
X = X.T; x = x.T
assert(x.shape[1] == X.shape[1])
n = x.shape[1]
assert(n >= 4)

#--- 1. normalization
# 2d points
mean = np.mean(x, 1) # (2,)
x = x - np.tile(mean[:, np.newaxis], [1, n])
average_norm = np.mean(np.sqrt(np.sum(x**2, 0)))
scale = np.sqrt(2) / average_norm
x = scale * x

T = np.zeros((3,3), dtype = np.float32)
T[0, 0] = T[1, 1] = scale
T[:2, 2] = -mean*scale
T[2, 2] = 1

# 3d points
X_homo = np.vstack((X, np.ones((1, n))))
mean = np.mean(X, 1) # (3,)
X = X - np.tile(mean[:, np.newaxis], [1, n])
m = X_homo[:3,:] - X
average_norm = np.mean(np.sqrt(np.sum(X**2, 0)))
scale = np.sqrt(3) / average_norm
X = scale * X

U = np.zeros((4,4), dtype = np.float32)
U[0, 0] = U[1, 1] = U[2, 2] = scale
U[:3, 3] = -mean*scale
U[3, 3] = 1

# --- 2. equations
A = np.zeros((n*2, 8), dtype = np.float32);
X_homo = np.vstack((X, np.ones((1, n)))).T
A[:n, :4] = X_homo
A[n:, 4:] = X_homo
b = np.reshape(x, [-1, 1])

# --- 3. solution
p_8 = np.linalg.pinv(A).dot(b)
P = np.zeros((3, 4), dtype = np.float32)
P[0, :] = p_8[:4, 0]
P[1, :] = p_8[4:, 0]
P[-1, -1] = 1

# --- 4. denormalization
P_Affine = np.linalg.inv(T).dot(P.dot(U))
return P_Affine

P2sRt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def P2sRt(P):
''' decompositing camera matrix P
Args:
P: (3, 4). Affine Camera Matrix.
Returns:
s: scale factor.
R: (3, 3). rotation matrix.
t: (3,). translation.
'''
t = P[:, 3]
R1 = P[0:1, :3]
R2 = P[1:2, :3]
s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2.0
r1 = R1/np.linalg.norm(R1)
r2 = R2/np.linalg.norm(R2)
r3 = np.cross(r1, r2)

R = np.concatenate((r1, r2, r3), 0)
return s, R, t

isRotationMatrix

1
2
3
4
5
6
7
8
9
#Ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/
def isRotationMatrix(R):
''' checks if a matrix is a valid rotation matrix(whether orthogonal or not)
'''
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype = R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6

matrox2angle

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def matrix2angle(R):
''' get three Euler angles from Rotation Matrix
Args:
R: (3,3). rotation matrix
Returns:
x: pitch
y: yaw
z: roll
'''
assert(isRotationMatrix)
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])

singular = sy < 1e-6

if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0

# rx, ry, rz = np.rad2deg(x), np.rad2deg(y), np.rad2deg(z)
rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
return rx, ry, rz

[TOC]

算法类型

图形学算法

S,R,T

  • Scala
  • 旋转
  • 位移
  • 对齐 (mtcnn align 算法)
  • 仿射变换 (CV-transformations.md)
  • 线性插值

CG Algorithm (Algorithm-CG.md)

/home/simon/data/blogs/source/_posts/CV-3D-Transform.md

  • LBS算法 (CV-3D-BuildModel-CMPL.md)

点云对齐算法

CV-3D-Base.md

统计机器学习算法

  • k-means

数据降维

  • PCA

数据降维基础

  • 奇异值分解SVD
  • 特征值分解EVD

游戏树算法

  • minimax
  • pn-search
  • alpha-beta剪枝

增强学习算法

  • MCTS
  • UCT
  • CFR

[TOC]

OpenCV -API (python)

APi Tutorials

imgproc 模块. 图像处理 OpenCV 2.3.2 documentation

  • 图像平滑(模糊)处理 blur /GaussianBlur / MedianBlur / BilaterFilter

  • 腐蚀-膨胀

  • 形态变换

一、Image I/O

imread

imsave

imshow()

resize

cv2.resize的第二个参数dim是(W, H)

1
cv2.resize(array, (W, H))

copyMakeBorder

cv2.copyMakeBorder的第二个到第五个参数是top, bottom, left, right,是先H后W

opencv中以左上角为原点,W方向为x,H方向为y

1
def copyMakeBorder(src, top, bottom, left, right, borderType, dst=None, value=None): # real signature unknown; restored from __doc__

**扩充src的边缘,将图像变大,**然后以各种外插方式自动填充图像边界,这个函数实际上调用了函数cv::borderInterpolate,这个函数最重要的功能就是为了处理边界,比如均值滤波或者中值滤波中,使用copyMakeBorder将原图稍微放大,然后我们就可以处理边界的情况了

CV demo {实现crop,pad)

使用opencv-python 对图像进行resize和填充
在图像输入神经网络之前,需要进行一定的处理,假设神经网络的图像输入是256 256然后进行了224 224的random crop。

我们需要进行如下处理:

读入原始图像

1
image = cv2.imread("img.jpg")

截取图像中有价值的部分

1
region = image[y1:y2, x1:x2]

确定图片的长边和短边,然后把长边resize到224,保持纵横比的情况下resize短边

1
2
3
4
5
6
w, h = x2 - x1, y2 - y1 # h, w = image.shape
m = max(w, h)
ratio = 224.0 / m
new_w, new_h = int(ratio * w), int(ratio *h)
assert new_w > 0 and new_h > 0
resized = cv2.resize(region, (new_w, new_h))

把图片进行填充,填充到256 256

1
2
3
4
5
6
7
8
9
10
11
12
W, H = 256, 256
top = (H - new_h) // 2
bottom = (H - new_h) // 2
if top + bottom + h < H:
bottom += 1

left = (W - new_w) // 2
right = (W - new_w) // 2
if left + right + w < W:
right += 1

pad_image = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value = self.white)

图片在输入网络之后,训练的时候进行random crop,就会发生有一部分被截取掉的情况,而这正是我们想要的图像增强
在test阶段,是进行centre crop,而正好把整个图像都截取出来,而这正是我们想要的
值得注意的是,image.shape,cv2.resize和cv2.copyMakeBorder几个函数

image.shape的输出是(H, W, C)

三、形态变化

  • 形态变化是基于图像中物体的形态进行一些简单变换,通常在二值化的图像上进行,

1.侵蚀

侵蚀的基本思想就像土壤侵蚀一样,侵蚀着物体前景色(白色)的边界。形态学处理内核在图像窗口中滑动,只有当内核下的所有像素都为1时,原始图像中的像素(1或0)才会被视为1,否则它将被侵蚀(变为零)。我要说话

因此最终的表现就是,所有靠近前景色边界的像素都将而被丢弃,内核尺寸越大丢的越多,即白色的区域会减小。这种操作对于消除小的白色噪声(如我们在色彩空间一章中所看到的)、分离两个连接的对象等非常有用。

1
2
3
4
5
6
7
8
9
10
11
def pltShow(img):
dst = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(dst)

def testErosion():
img = cv2.imread('test.png', 0)
kernel = np.ones((5, 5), np.uint8)
erosion = cv2.erode(img, kernel, iterations=1)
plt.subplot(121),pltShow(img),plt.title("Origin")
plt.subplot(122),pltShow(erosion),plt.title("Erosion")
plt.show()

2.膨胀

膨胀的效果正好与侵蚀相反,如果内核下至少有一个像素为“1”,则像素元素为“1”。因此,它增加了图像中的白色区域(前景对象)的大小。我要说话

在去除噪音的时候,一般采用的操作是侵蚀之后再膨胀。侵蚀消除了白色的噪音,但它也缩小了物体,所以需要通过膨胀来还原。而噪音已经在侵蚀的过程中消失,膨胀的时候也不会再出现。例子:

1
2
img_3 = cv2.imread(img_path, mode)
erosion = cv2.dilate(img_3, kernel, iterations=1)

3 开运算

开运算就是2节中讲到的先侵蚀再膨胀的操作,可以认为是个语法糖吧。(外部杂点去除)

1
2
3
4
5
6
7
8
9
10
def testOpen():
img = cv2.imread("test2.png", 0)
kernel = np.ones((5,5), np.uint8)
erosion = cv2.erode(img, kernel, iterations=1)
dilateAfterErosion = cv2.dilate(erosion, kernel, iterations=1)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
plt.subplot(131), pltShow(img), plt.title("Origin")
plt.subplot(132), pltShow(dilateAfterErosion), plt.title("DilateAfterErosion")
plt.subplot(133), pltShow(opening), plt.title("Opening")
plt.show()

4 闭运算

顾名思义,开运算与闭运算是相反的语法糖——先膨胀再侵蚀,其作用也可以想象到了,可以填充上物体内部的杂点。

1
2
3
4
5
6
7
8
9
10
def testClose():
img = cv2.imread("test3.png", 0)
kernel = np.ones((5,5), np.uint8)
dilate = cv2.dilate(img, kernel, iterations=1)
erosionAfterDilate = cv2.erode(dilate, kernel, iterations=1)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
plt.subplot(131), pltShow(img), plt.title("Origin")
plt.subplot(132), pltShow(erosionAfterDilate), plt.title("ErosionAfterDilate")
plt.subplot(133), pltShow(closing), plt.title("Closing")
plt.show()

5.形态梯度

出现梯度变化的地方前景色保留,其它的去掉,用于检测边缘

1
2
3
4
5
6
7
def testGradient():
img = cv2.imread('test.png', 0)
kernel = np.ones((2, 2), np.uint8)
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
plt.subplot(121), pltShow(img), plt.title("Origin")
plt.subplot(122), pltShow(gradient), plt.title("Gradient")
plt.show()

6.tophat

顶帽 = 原图 - 开运算

开运算的效果是去除图像外的噪点,因此原图 - 开运算就得到了去掉的噪点

通过API — morphologyEx(img, MORPH_TOPHAT, kernel)

1
2
3
4
5
6
7
8
9
10
import cv2
import numpy as np
cv2.namedWindow("img", cv2.WINDOW_NORMAL)
cv2.resizeWindow("img", 640, 480)
img = cv2.imread("i.png")
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
new_img = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel, iterations=1)
cv2.imshow("img", np.hstack((img, new_img)))
cv2.waitKey(0)
cv2.destroyAllWindows()

7. blackhat

黑帽 = 原图 - 闭运算

闭运算可以将图形内部的噪声点去掉,那么原图 - 闭运算的结果就是图形内部的噪声点

通过API — morphologyEx(img, MORPH_BLACKHAT, kernel)

1
2
3
4
5
6
7
8
9
10
11
import cv2
import numpy as np
cv2.namedWindow("img", cv2.WINDOW_NORMAL)
cv2.resizeWindow("img", 640, 480)
img = cv2.imread("is.png")
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
# 黑帽操作
new_img = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel, iterations=1)
cv2.imshow("img", np.hstack((img, new_img)))
cv2.waitKey(0)
cv2.destroyAllWindows()

8. 自定义内核

四、图像处理

图像金字塔

图像阈值操作

五、仿射变换

  • 旋转(线性)

  • 平移(向量加)

  • 缩放(线性变换)

getAffineTransform

1
2
warp_mat = getAffineTransform( srcTri, dstTri );
warpAffine( src, warp_dst, warp_mat, warp_dst.size() );

rotation

1
2
rot_mat = getRotationMatrix2D( center, angle, scale );
warpAffine( warp_dst, warp_rotate_dst, rot_mat, warp_dst.size() );

轮廓

rectangle & boundingRect

Bounding Rectangle

然后利用cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)画出矩行

1
2
3
4
5
6
参数解释
第一个参数:img是原图
第二个参数:(x,y)是矩阵的左上点坐标
第三个参数:(x+w,y+h)是矩阵的右下点坐标
第四个参数:(0,255,0)是画线对应的rgb颜色
第五个参数:2是所画的线的宽度
1
2
3
4
5
6
7
8
9
10
# 用绿色(0, 255, 0)来画出最小的矩形框架
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

# 用红色表示有旋转角度的矩形框架
rect = cv2.minAreaRect(cnt)
box = cv2.cv.BoxPoints(rect)
box = np.int0(box)
cv2.drawContours(img, [box], 0, (0, 0, 255), 2)
cv2.imwrite('contours.png', img)

七、Video api

1
2
3
4
5
6
7
8
9
10
11
12
# 1、表示打开笔记本的内置摄像头,
cap = cv2.VideoCapture(0)

# 2 视频文件路径
cap = cv2.VideoCapture(“../test.avi”)


# 3
ret,frame = cap.read()

# 4
cap.release()

八、应用

Sobel导数

如何计算梯度,以及如何使用梯度来检测边缘。

Laplace算子

边缘检测算法。

Canny边缘检测

这儿是一个更高级的边缘检测算法。

霍夫线变换

霍夫变换来检测直线。

霍夫圆变换

霍夫变换来检测圆。

Remapping重映射

两副图像之间建立坐标位置的映射。

九、CV Face

CascadeClassifier

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
haarcascade_eye.xml
haarcascade_eye_tree_eyeglasses.xml
haarcascade_frontalcatface.xml
haarcascade_frontalcatface_extended.xml
haarcascade_frontalface_alt.xml
haarcascade_frontalface_alt2.xml
haarcascade_frontalface_alt_tree.xml
haarcascade_frontalface_default.xml
haarcascade_fullbody.xml
haarcascade_lefteye_2splits.xml
haarcascade_licence_plate_rus_16stages.xml
haarcascade_lowerbody.xml
haarcascade_profileface.xml
haarcascade_righteye_2splits.xml
haarcascade_russian_plate_number.xml
haarcascade_smile.xml
haarcascade_upperbody.xml

1、人脸检测

cv.CascadeClassifier

1
2
# 人脸检测haarcascade_frontalface_default
faceCascade = cv2.CascadeClassifier("XML/haarcascade_frontalface_default.xml")

Dlib

人脸提取+关键点检测

CNN人脸检测模型名称: mmod_human_face_detector.dat.bz2

68维人脸检测模型名称: shape_predictor_68_face_landmarks.dat.bz2

5维人脸检测模型名称 shape_predictor_5_face_landmarks.dat.bz2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
detector = dlib.cnn_face_detection_model_v1(face_detector_model_path)  # dlib.cnn_face_detection_model_v1
# detector = dlib.get_frontal_face_detector()

predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')


detections = detector(img, 1)

# step 3. get shape of one face for example
detection = detections[0] # dlib.mmod_rectangle
# the mmod_rectangle contains two parts : confidence and rect

shape = predictor(img, detection.rect) # dlib.full_object_detection

# step 4. get all the face landmark points
landmark_points = shape.parts() # dlib.points

APIs

[TOC]

Tensor

pytorch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#1 list->tensor
data = [[1,2],[3,4]]
x_data = torch.tensor(data, dtype=torch.float)
### torch.FloatTensor, torch.DoubleTensor
float_tensor = torch.FloatTensor([4,5,6])

#2 np.array -> tensor
np_array = np.array(data)
x_np_data = torch.from_numpy(data)

#3 tensor -> tensor
x_ones = torch.ones_like(x_data)
x_rand = torch.rand_like(x_data, dtype=torch.float)

#4 new empty tensor
shape = (2,3)
x_ones = torch.ones(shape)
x_zeros = torch.zeros(shape)
x_rand = torch.rand(shape)

#5 tensor to numpy
tensor.numpy()

libtorch(c++)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//1 数组 -> Tensor
int data[10] = {3,4,6}
torch::Tensor x_data = torch::from_blob(data,{3},torch::kFloat)

//2 vector -> Tensor
std::vector<float> std_vector = {346};
torch::Tensor vector_data = torch::from_brob(std_vector.data(),{3},torch::kFloat);

//3 Tensor like
torch::Tensor x = torch::zeros({3,4});
torch::Tensor x_zeros = torch::zeros_like(x);
torch::Tensor x_ones = torch::ones_like(x);
torch::Tensor x_rand = torch::rand_like(x);
//浅拷贝
torch::Tensor y = x
//深拷贝
torch::Tensor z = x.clone();


//4 new shape Tensor
torch::Tensor x_ones = torch::ones({3,4});
torch::Tensor x_zeros = torch::zeros({3,4});
torch::Tensor x_eye = torch::eye(4);
torch::Tensor x_full = torch::full({3,4},10);
torch::Tensor x_rand = torch::rand({3,4});
torch::Tensor x_randn = torch::randn({3,4});
torch::Tensor x_randint = torch::randint(0,4,{3,3});

张量操作

pytorch

1
2
3
4
5
6
# index + slice
tensor = torch.rand(4,4)
a = tensor[:, 1]

# get mask
Tensor[Mask]

libtorch(c++)

1
2
3
4
5
6
7
8
9
10
11
12
// index 
auto x = torch::rand({3,4});
y = x[1];
y = x[1][3];

// slice
auto x = torch::rand({3,4});
auto y = x.select(0,1); // 第0维的第一层张量

auto x = torch::rand({3,4});
auto y = x.narrow(0, 2, 2); // 从第0维的第2层张量开始,选两层张量?
auto y = x.slice(0, 1, 3); // 从第0维中,选第1维到第3-1维的张量 ?

3、 提取指定元素形成新的张量(关键字index:就代表是提取出来相应的元素组成新的张量)

1
2
3
4
5
6
7
8
9
10
11
12
std::cout<<b.index_select(0,torch::tensor({0, 3, 3})).sizes();//选择第0维的0,3,3组成新张量[3,3,28,28]
std::cout<<b.index_select(1,torch::tensor({0,2})).sizes(); //选择第1维的第0和第2的组成新张量[10, 2, 28, 28]
std::cout<<b.index_select(2,torch::arange(0,8)).sizes(); //选择十张图片每个通道的前8列的所有像素[10, 3, 8, 28]

Tensor x_data = torch::rand({3,4});
Tensor mask = torch::zeros({3,4});

mask[1][1] = 1;
mask[0][0] = 1;

//index()方法输入参量为布尔值组成的数组,输出参量为对应index的值组成新的张量(新的内存空间)
Tensor x = x_data.index({ mask.to(kBool) });

张量基本运算

3.1 pytorch
① 按元素的加减乘除正常使用±*/的运算符即可
② 矩阵乘法:例如xx^T

1
2
3
x = torch.rand((3,4))
y = x @ x.T
y = x.matmul(x.T)

3.2 torchlib
① 按元素的加减乘除正常使用±*/的运算符即可
② 矩阵乘法:例如xx^T

1
2
auto x = torch::rand({3,4});
x.mm(x.t());

Init Weight

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
torch.nn.init.xavier_uniform_(conv.weight)
torch.nn.init.xavier_uniform_(conv.bias, 0)


# REF: https://blog.csdn.net/qq_42995479/article/details/120598487

# kaiming均匀分布
# torch.nn.init.kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')
# 服从 U(-a, a), a = sqrt(6 / (1 + b ^2) * fan_in), 其中b为激活函数的负半轴的斜率, relu是0
# model 可以是fan_in或者fan_out。fan_in 表示使正向传播时,方差一致; fan_out使反向传播时, 方差一致
# nonlinearity 可选为relu和leaky_relu, 默认是leaky_relu

# kaiming正态分布, N~ (0,std),其中std = sqrt(2/(1+b^2)*fan_in)
# torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')

for m in net.modules():
if isinstance(m, torch.nn.Conv2d):
nn.kaiming_normal_(m.weight, mode = 'fan_in')
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

API-tensor

torch — PyTorch 2.0 documentation

torch.reshape()

torch.reshape用来改变tensor的shape。
torch.reshape(tensor,shape)

tensor.squeeze()

维度压缩

如果 input 的形状为 (A×1×B×C×1×D),那么返回的tensor的形状则为 (A×B×C×D)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
>>> x = torch.zeros(2, 1, 2, 1, 2)
>>> x.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x)
>>> y.size()
torch.Size([2, 2, 2])
>>> y = torch.squeeze(x, 0)
>>> y.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x, 1)
>>> y.size()
torch.Size([2, 2, 1, 2])
>>> y = torch.squeeze(x, (1, 2, 3))
torch.Size([2, 2, 2])

tensor.unsqueenze()

1
2
3
4
5
6
7
8
>>> x = torch.tensor([1, 2, 3, 4])
>>> torch.unsqueeze(x, 0)
tensor([[ 1, 2, 3, 4]])
>>> torch.unsqueeze(x, 1)
tensor([[ 1],
[ 2],
[ 3],
[ 4]])

torch.gather函数

​ (todo)

​ 图解PyTorch中的 []https://zhuanlan.zhihu.com/p/352877584

torch.flatten()

扁平化

torch.flatten(t, start_dim=0, end_dim=-1) 的实现原理如下。假设类型为 torch.tensor 的张量 t 的形状如下所示:(2,4,3,5,6),则 torch.flatten(t, 1, 3).shape 的结果为 (2, 60, 6)。将索引为 start_dim 和 end_dim 之间(包括该位置)的数量相乘【1,2,3维度的数据扁平化处理合成一个维度】,其余位置不变。因为默认 start_dim=0,end_dim=-1,所以 torch.flatten(t) 返回只有一维的数据。

image-20220630094510503

mean()

        求均值

var()

        求方差

item()

        .item()用于在只包含一个元素的tensor中提取值,注意是只包含一个元素,否则的话使用.tolist()

tolist()

        tensor转换为Python List

permute

mask

pytorch提供mask机制用来提取数据中“感兴趣”的部分。过程如下:左边的矩阵是原数据,中间的mask是遮罩矩阵,标记为1的表明对这个位置的数据“感兴趣”-保留,反之舍弃。整个过程可以视作是在原数据上盖了一层mask,只有感兴趣的部分(值为1)显露出来,而其他部分则背遮住。

torch.masked_fill(input, mask, value)

参数:  

  • input:输入的原数据
  • mask:遮罩矩阵
  • value:被“遮住的”部分填充的数据,可以取0、1等值,数据类型不限,int、float均可

返回值:一个和input相同size的masked-tensor

使用:

  • output = torch.masked_fill(input, mask, value)
  • output = input.masked_fill(mask, value)

torch.masked_select(input, mask, out)

参数:  

  • input:输入的原数据
  • mask:遮罩矩阵
  • out:输出的结果,和原tensor不共用内存,一般在左侧接收,而不在形参中赋值

返回值:一维tensor,数据为“选中”的数据

使用:

  • torch.masked_select(input, mask, out)
  • output = input.masked_select(mask)
1
2
selected_ele = torch.masked_select(input=imgs, mask=mask)  # true表示selected,false则未选中,所以这里没有取反
# tensor([182., 92., 86., 157., 148., 56.])

torch.masked_scatter(input, mask, source)

说明:将从input中mask得到的数据赋值到source-tensor中

参数:  

  • input:输入的原数据
  • mask:遮罩矩阵
  • source:遮罩矩阵的”样子“(全零还是全一或是其他),true表示遮住了

返回值:一个和source相同size的masked-tensor

使用:

  • output = torch.masked_scatter(input, mask, source)
  • output = input.masked_scatter(mask, source)

API-Model

train(mode=True)

module设置为 training mode

仅仅当模型中有DropoutBatchNorm是才会有影响。

eval()

将模型设置成evaluation模式

仅仅当模型中有DropoutBatchNorm是才会有影响。

训练

pytorch多机多卡分布式训练

BN使用注意事项

model.eval()之后,pytorch会管理BN的参数,所以不需要TF那般麻烦;

$$
Y = (X - running_mean) / sqrt(running_var + eps) * gamma + beta
$$

其中gamma、beta为可学习参数(在pytorch中分别改叫weight和bias),训练时通过反向传播更新;而running_mean、running_var则是在前向时先由X计算出mean和var,再由mean和var以动量momentum来更新running_mean和running_var。

所以在训练阶段,running_mean和running_var在每次前向时更新一次;

在测试阶段,则通过net.eval()固定该BN层的running_mean和running_var,此时这两个值即为训练阶段最后一次前向时确定的值,并在整个测试阶段保持不变

torchrun

torchrun (Elastic Launch) — PyTorch 2.3 documentation

关于集群分布式torchrun命令踩坑记录(自用)-CSDN博客

【分布式训练】单机多卡的正确打开方式(三):PyTorch

Model Save & Load

save model

1
2
3
4
5
6
7
8
torch.save({
'model_state_dict': {k: _models[k].state_dict() for k in _models},
'optimizer_state_dict': {k: optimizers[k].state_dict() for k in optimizers},
"stats": stats,
'flags': vars(flags),
'frames': frames,
'position_frames': position_frames
}, checkpointpath)

save state_dict()

1
torch.save(learner_model.get_model(position).state_dict(), model_weights_dir)

Saving & Loading Model

Save:

1
torch.save(model.state_dict(), PATH)

Load:

1
2
3
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()

Save/Load Entire Model

Officle

Save:

1
torch.save(model, PATH)

Load:

1
2
3
# Model class must be defined somewhere
model = torch.load(PATH)
model.eval()

Export/Load Model in TorchScript Format

Export:

1
2
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('model_scripted.pt') # Save

Load:

1
2
model = torch.jit.load('model_scripted.pt')
model.eval()

Saving & Loading a General Checkpoint for Inference and/or Resuming Training

Save:

1
2
3
4
5
6
7
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
...
}, PATH)

Load:

1
2
3
4
5
6
7
8
9
10
11
12
model = TheModelClass(*args, **kwargs)
optimizer = TheOptimizerClass(*args, **kwargs)

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()

Saving Multiple Models in One File

Save:

1
2
3
4
5
6
7
torch.save({
'modelA_state_dict': modelA.state_dict(),
'modelB_state_dict': modelB.state_dict(),
'optimizerA_state_dict': optimizerA.state_dict(),
'optimizerB_state_dict': optimizerB.state_dict(),
...
}, PATH)

Load:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
modelA = TheModelAClass(*args, **kwargs)
modelB = TheModelBClass(*args, **kwargs)
optimizerA = TheOptimizerAClass(*args, **kwargs)
optimizerB = TheOptimizerBClass(*args, **kwargs)

checkpoint = torch.load(PATH)
modelA.load_state_dict(checkpoint['modelA_state_dict'])
modelB.load_state_dict(checkpoint['modelB_state_dict'])
optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
optimizerB.load_state_dict(checkpoint['optimizerB_state_dict'])

modelA.eval()
modelB.eval()
# - or -
modelA.train()
modelB.train()

Warmstarting Model Using Parameters from a Different Model

Saving & Loading Model Across Devices

Save:

1
torch.save(modelA.state_dict(), PATH)

Load:

1
2
modelB = TheModelBClass(*args, **kwargs)
modelB.load_state_dict(torch.load(PATH), strict=False)

Save on GPU, Load on CPU

Save:

1
torch.save(model.state_dict(), PATH)

Load:

1
2
3
device = torch.device('cpu')
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

Save on GPU, Load on GPU

Save:

1
torch.save(model.state_dict(), PATH)

Load:

1
2
3
4
5
device = torch.device("cuda")
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)
# Make sure to call input = input.to(device) on any input tensors that you feed to the model

Save on CPU, Load on GPU

Save:

1
torch.save(model.state_dict(), PATH)

Load:

1
2
3
4
5
device = torch.device("cuda")
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU device number you want
model.to(device)
# Make sure to call input = input.to(device) on any input tensors that you feed to the model

Saving torch.nn.DataParallel Models

Save:

1
torch.save(model.module.state_dict(), PATH)

Load:

1
# Load to whatever device you want

模型参数(打印print)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import torch

def print_model_param_names(model):
for name, param in model.named_parameters():
print(name)

def print_model_param_values(model):
for name, param in model.named_parameters():
print(name, param.data)

# 创建一个模型实例
model = torch.nn.Sequential(
torch.nn.Linear(10, 5),
torch.nn.ReLU(),
torch.nn.Linear(5, 1)
)

# 打印模型的所有参数名
print_model_param_names(model)

# 打印模型的所有参数值
print_model_param_values(model)

此我们可以使用clone()方法来创建它们的副本。具体来说,我们可以使用clone()方法来创建一个张量的深拷贝,然后使用detach()方法来将其从计算图中分离,从而得到一个不会影响原始张量的新张量。

1
2
3
4
5
6
7
8
9
# 打印模型的所有参数名和参数值,并取出参数值
params = {}
for name, param in model.named_parameters():
print('Parameter name:', name)
print('Parameter value:', param)
params[name] = param.clone().detach().numpy()

# 断开图的链接
model = None

[TOC]

数据初始化

1、readcsv

1
df = pd.read_csv('file_path')

2、

1
2
3
4
5
df = pd.DataFrame(columns=['index', 'v2'])
df['index'] = np.arange(10)
df['v2'] = np.random.randn(10)
# 修改某行的v2列值
df.loc[df.index==1, 'c'] = '9'

3、pd 和np转换

首先导入numpy模块、pandas模块、创建一个DataFrame类型数据df

1
2
3
4
5
6
7
8
9
10
11
12
13
import numpy as np
import pandas as pd

df=pd.DataFrame({'A':[1,2,3],'B':[4,5,6],'C':[7,8,9]})

1.使用DataFrame中的values方法
df.values

2.使用DataFrame中的as_matrix()方法
df.as_matrix()

3.使用Numpy中的array方法
np.array(df)
语法 操作 返回结果
df.head(n) 查看 DataFrame 对象的前n行 DataFrame
df.tail(n) 查看 DataFrame 对象的最后n行 DataFrame
df.sample(n) 查看 n 个样本,随机 DataFrame

读取列

以下两种方法都可以代表一列:

1
2
3
4
5
df['name'] # 会返回本列的 Series
df.name
df.Q1
# df.1Q 即使列名叫 1Q 也无法使用
# df.my name 有空格也无法调用,可以处理加上下划线

注意,当列名为一个合法的 python 变量时可以直接作为属性去使用。

读取部分行列

有时我们需要按条件选择部分列、部分行,一般常用的有:

操作 语法 返回结果
选择列 df[col] Series
按索引选择行 df.loc[label] Series
按数字索引选择行 df.iloc[loc] Series
使用切片选择行 df[5:10] DataFrame
用表达式筛选行 df[bool_vec] DataFrame

以上操作称为 Fancy Indexing(花式索引),它来自 Numpy,是指传递一个索引序列,然后一次性得到多个索引元素。Fancy Indexing 和 slicing 不同,它通常将元素拷贝到新的数据对象中。索引中可以有切片 slice,或者省略 ellipsis、新轴 newaxis、布尔数组或者整数数组索引等,可以看做是一个多维切片。

接下来我们将重点介绍一下这些查询的方法。

数据遍历

1. 使用 .iterrows()

iterrows() 方法返回 DataFrame 的索引标签和相应的行数据。这是一个迭代器,它会产生 (index, Series) 对,其中索引是行索引(如果有),Series 是该行的数据。

1
2
3
4
5
6
7
8
9
10
11
import pandas as pd

# 创建示例 DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# 遍历每一行
for index, row in df.iterrows():
print(f"Index: {index}")
print(f"Row data: {row}")
print()

2. 使用 .itertuples()

itertuples() 方法返回一个迭代器,用于迭代 DataFrame 行作为命名元组。这种方式比使用 iterrows() 更快一些,因为不需要创建 Series 对象。

1
2
3
4
5
# 遍历每一行
for row in df.itertuples(index=True, name='Pandas'):
print(f"Index: {row.Index}")
print(f"Data: A={row.A}, B={row.B}")
print()

3. 使用 .apply()

apply() 方法可以应用于整个 DataFrame 或者沿着一个轴应用到每一列或每一行上。这里我们使用 axis=1 来对每一行应用函数。

1
2
3
4
def process_row(row):
print(f"Processing row: {row}")

df.apply(process_row, axis=1)

4. 使用 .loc 或 .iloc

如果你想要更灵活地访问特定行或列的数据,可以使用 .loc.iloc 方法。

1
2
3
4
5
6
7
8
9
10
# 使用 .loc 按照标签索引访问
for index in df.index:
print(f"Index: {index}")
print(f"Row data: {df.loc[index]}")
print()

# 使用 .iloc 按照位置索引访问
for i in range(len(df)):
print(f"Row {i} data: {df.iloc[i]}")
print()

切片 [行/列]

我们可以像列表那样利用切片功能选择部分行的数据,但是不支持索引一条:

1
2
3
4
5
df[:2] # 前两行数据
df[4:10]
df[:] # 所有数据,一般没这么用的
df[:10:2] # 按步长取
s[::-1] # 反转顺序

也可以选择列:

1
2
3
df['name'] # 只要一列,Series
df[['Q1', 'Q2']] # 选择两列
df[['name']] # 选择一列,返回 DataFrame,注意和上例区别

按标签 .loc(行)

df.loc() 的格式为 df.loc[<索引表达式>, <列表达式>],表达式支持以下形式:

单个标签:

1
2
3
# 代表索引,如果是字符需要加引号
df.loc[0] # 选择索引为 0 的行
df.loc[8]

单个列表标签:

1
2
3
4
5
name, value, socre, grade
Eli,9,23,C
Ben,8,89,A
Tom,7,65,B
Toni,6,34,C
1
2
3
4
5
6
df.loc[[0,5,10]] # 指定索引 0,5,10 的行
df.loc[['Eli', 'Ben']] # 如果行索引是 name,提取所有列
df.loc[['Eli', 'Ben'],["name", "grade"]] # 如果行索引是 name, 提取指定列
df.loc[:,["name", "grade"]]
# 真假选择,长度要和索引一样
df.loc[[False, True]*50] # 为真的列显示,隔一个显示一个

带标签的切片,包括起始和停止start:stop, 可以其中只有一个,返回包括它们的数据:

1
2
3
4
df.loc[0:5] # 索引切片, 代表0-5行,包括5
df.loc['2010':'2014'] # 如果索引是时间可以用字符查询
df.loc[:] # 所有
# 本方法支持 Series

关于 loc 的更详细介绍可访问:loc 查询数据行和列

进行切片操作,索引必须经过排序,意味着索引单调递增或者单调递减,以下代码中其中一个为 True,否则会引发 KeyError 错误。

1
2
3
4
5
6
# 索引单调性
(
df.index.is_monotonic_increasing,
df.index.is_monotonic_decreasing
)
# (True, False)

通过上边的规则可以先对索引排序再执行词义上的查询,如:

1
2
3
4
5
6
7
8
# 姓名开头从 Ad 到 Bo 的
df.set_index('name').sort_index().loc['Ad':'Bo']
# 姓名开头从开始到 Bo 的
df.set_index('name').sort_index().loc[:'Bo']
# 团队名称从 C 到 D 的
df.set_index('team').sort_index().loc['C': 'D']
# 姓名开头从 Te 到 X 的
df.sort_values('name').set_index('name').loc['Te': 'X']

列筛选,必须有行元素:

1
2
3
dft.loc[:, ['Q1', 'Q2']] # 所有行,Q1 和 Q2两列
dft.loc[:, ['Q1', 'Q2']] # 所有行,Q1 和 Q2两列
dft.loc[:10, 'Q1':] # 0-10 行,Q1后边的所有列

按位置 .iloc

df.ilocdf.loc 相似,但只能用自然索引(行和列的 0 - n 索引),不能用标签。

1
2
3
4
5
df.iloc[:3]
df.iloc[:]
df.iloc[:, [1, 2]]
df.iloc[2:20:3]
s.iloc[:3]

如果想筛选多个不连续的行列数据(使用 np.r_),可以使用以下方法:

1
2
3
4
5
# 筛选索引0-4&10&5-29每两行取一个&70-74
df.iloc[np.r_[:5, 10, 15:30:2, 70:75]] # 行
df.iloc[:, np.r_[0, 2:6]] # 列,0列和第2-5列
# 也可以使用追加的方式拼接
df.loc[:5].append(df.loc[10]).append(df.loc[15:30:2])

关于 iloc 的更详细介绍可访问:iloc 数字索引位置选择

取具体值 .at

类似于 loc, 但仅取一个具体的值,结构为 at[<索引>,<列名>]:

1
2
3
4
5
6
7
8
9
10
# 注:索引是字符需要加引号
df.at[4, 'Q1'] # 65
df.at['lily', 'Q1'] # 65 假定索引是 name
df.at[0, 'name'] # 'Liver'
df.loc[0].at['name'] # 'Liver'
# 指定列的值对应其他列的值
df.set_index('name').at['Eorge', 'team'] # 'C'
df.set_index('name').team.at['Eorge'] # 'C'
# 指定列的对应索引的值
df.team.at[3] # 'C'

同样 iat 和 iloc 一样,仅支持数字索引:

1
2
df.iat[4, 2] # 65
df.loc[0].iat[1] # 'E'

.get 可以做类似字典的操作,如果无值给返回默认值(例中是0):

1
2
3
4
df.get('name', 0) # 是 name 列
df.get('nameXXX', 0) # 0, 返回默认值
s.get(3, 0) # 93, Series 传索引返回具体值
df.name.get(99, 0) # 'Ben'

表达式筛选

[] 切片里可以使用表达式进行筛选:

1
2
3
4
5
6
7
df[df['Q1'] == 8] # Q1 等于8
df[~(df['Q1'] == 8)] # 不等于8
df[df.name == 'Ben'] # 姓名为Ben
df.loc[df['Q1'] > 90, 'Q1':] # Q1 大于90,显示Q1后边的所有列
df.loc[(df.Q1 > 80) & (df.Q2 < 15)] # and 关系
df.loc[(df.Q1 > 90) | (df.Q2 < 90)] # or 关系
df[df.Q1 > df.Q2]

df.loc 里的索引部分可以使用表达式进行数据筛选。

1
2
3
4
5
6
7
8
9
10
df.loc[df['Q1'] == 8] # 等于8
df.loc[df.Q1 == 8] # 等于8
df.loc[df['Q1'] > 90, 'Q1'] # Q1 大于90,只显示 Q1 Series
df.loc[df['Q1'] > 90, ['Q1']] # Q1 大于90,只显示 Q1 DataFrame
# 其他表达式与切片一致

# 通过列位置筛选列
df.loc[:, lambda df: df.columns.str.len()==4] # 真假组成的序列
df.loc[:, lambda df: [i for i in df.columns if 'Q' in i]] # 列名列表
df.iloc[:3, lambda df: df.columns.str.len()==2] # 真假组成的序列

逻辑判断和函数:

1
2
3
4
5
6
7
8
9
df.eq() # 等于相等 ==
df.ne() # 不等于 !=
df.le() # 小于等于 >=
df.lt() # 小于 <
df.ge() # 大于等于 >=
df.gt() # 大于 >
# 都支持 axis{0 or ‘index’, 1 or ‘columns’}, default ‘columns’
df[df.Q1.ne(89)] # Q1 不等于8
df.loc[df.Q1.gt(90) & df.Q2.lt(90)] # and 关系 Q1>90 Q2<90

其他函数:

1
2
3
# isin
df[df.team.isin(['A','B'])] # 包含 AB 两组的
df[df.isin({'team': ['C', 'D'], 'Q1':[36,93]})] # 复杂查询,其他值为 NaN

函数筛选

函数生成具体的标签值或者同长度对应布尔索引,作用于筛选:

1
2
3
df[lambda df: df['Q1'] == 8] # Q1为8的
df.loc[lambda df: df.Q1 == 8, 'Q1':'Q2'] # Q1为8的, 显示 Q1 Q2
# 选择字段时尽量使用字典法,属性法在条件表达式时一些情况可能有 bug

函数不仅能应用在行位上,也能应用在列位上。

str查询

模糊查询

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
data[data.列名.str.contains()]
data[data.列名.str.contains('^某某')]
data[data.列名.str.contains('某某')]
data[data.列名.str.contains('某某$')]


data['列名']=data['列名'].apply(str)#把非字符串格式改为字符串格式

#// 多条件查询
data[data.source.str.contains('某某|某某1')]
# 对条件查询结果进行删除
data[-data.source.str.contains('某某|某某1')]

#数据框去重
DataFrame.drop_duplicates(subset=None,keep='first',inplace=False)
#    按某列去重
data.drop_duplicates(subset='列名',keep='first',inplace=False)
#    按多列去重
data.drop_duplicates(subset=['列名','列名1'],keep='first',inplace=False)

df内置方法

where 和 mask

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
s.where(s > 90) # 不符合条件的为 NaN
s.where(s > 90, 0) # 不符合条件的为 0
# np.where, 大于80是真否则是假
np.where(s>80, True, False)
np.where(df.num>=60, '合格', '不合格')

s.mask(s > 90) # 符合条件的为 NaN
s.mask(s > 90, 0) # 符合条件的为 0

# 例:能被整除的显示,不能的显示相反数
m = df.loc[:,'Q1':'Q4'] % 3 == 0
df.loc[:,'Q1':'Q4'].where(m, -df.loc[:,'Q1':'Q4'])

# 行列相同数量,返回一个 array
df.lookup([1,3,4], ['Q1','Q2','Q3']) # array([36, 96, 61])
df.lookup([1], ['Q1']) # array([36])

mask 和 where 还可以通过数据筛选返回布尔序列:

1
2
3
4
5
# 返回布尔序列,符合条件的行为 True
(df.where((df.team=='A') & (df.Q1>60)) == df).Q1

# 返回布尔序列,符合条件的行为 False
(df.mask((df.team=='A') & (df.Q1>60)) == df).Q1

query

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
df.query('Q1 > Q2 > 90') # 直接写类型 sql where 语句
df.query('Q1 + Q2 > 180')
df.query('Q1 == Q2')
df.query('(Q1<50) & (Q2>40) and (Q3>90)')
df.query('Q1 > Q2 > Q3 > Q4')
df.query('team != "C"')
df.query('team in ["A","B"]')
df.query('team not in ("E","A","B")')
df.query('name.str.contains("am")') # 包含 am 字符
# 对于名称中带有空格的列,可以使用反引号引起来
df.query('B == `team name`')

# 支持传入变量,如:大于平均分40分的
a = df.Q1.mean()
df.query('Q1 > @a+40')
df.query('Q1 > `Q2`+@a')

# df.eval() 用法与 df.query 类似
df[df.eval("Q1 > 90 > Q3 > 10")]
df[df.eval("Q1 > `Q2`+@a")]

filter

使用 filter 可以对行名和列名进行筛选。

1
2
3
4
5
6
7
df.filter(items=['Q1', 'Q2']) # 选择两列
df.filter(regex='Q', axis=1) # 列名包含Q的
df.filter(regex='e$', axis=1) # 以 e 结尾的
df.filter(regex='1$', axis=0) # 正则, 索引名包含1的
df.filter(like='2', axis=0) # 索引中有2的
# 索引中2开头列名有Q的
df.filter(regex='^2', axis=0).filter(like='Q', axis=1)

关于 filter 的详细介绍,可以查阅:Pandas filter 筛选标签

索引选择器 pd.IndexSlice

pd.IndexSlice 的使用方法类似于df.loc[] 切片中的方法,常用在多层索引中,以及需要指定应用范围(subset 参数)的函数中,特别是在链式方法中。

1
2
3
4
5
df.loc[pd.IndexSlice[:, ['Q1', 'Q2']]]
# 变量化使用
idx = pd.IndexSlice
df.loc[idx[:, ['Q1', 'Q2']]]
df.loc[idx[:, 'Q1':'Q4'], :] # 多索引

复杂的选择:

1
2
3
4
5
6
7
# 创建复杂条件选择器
selected = df.loc[(df.team=='A') & (df.Q1>90)]
idxs = pd.IndexSlice[selected.index, 'name']
# 应用选择器
df.loc[idxs]
# 选择这部分区域加样式(样式功能见教程后文介绍)
df.style.applymap(style_fun, subset=idxs)

按数据类型

可以只选择或者排除指定类型数据:

1
2
3
4
5
df.select_dtypes(include=['float64']) # 选择 float64 型数据
df.select_dtypes(include='bool')
df.select_dtypes(include=['number']) # 只取数字型
df.select_dtypes(exclude=['int']) # 排除 int 类型
df.select_dtypes(exclude=['datetime64'])

any 和 all

any 方法如果至少有一个值为 True 是便为 True,all 需要所有值为 True 才为 True。它们可以传入 axis 为 1,会按行检测。

1
2
3
4
# Q1 Q2 成绩全为 80 分的
df[(df.loc[:,['Q1','Q2']] > 80).all(1)]
# Q1 Q2 成绩至少有一个 80 分的
df[(df.loc[:,['Q1','Q2']] > 80).any(1)]

理解筛选原理

df[<表达式>] 里边的表达式如果单独拿出来,可以看到:

1
2
3
4
5
6
7
8
9
10
df.Q1.gt(90)
'''
0 False
1 False
2 False
3 True
4 False
...
Name: Q1, Length: 100, dtype: bool
'''

会有一个由真假值组成的数据,筛选后的结果就是为 True 的内容。

mean() / median() / unique() / value_count()

  • mean()

  • median()

  • unique()

  • value_count()

map & apply

1
2
3
4
5
6
7
8
9
10
# Series数据修改,返回当前列的series
review_points_mean = reviews.points.mean()
reviews.points.map(lambda p: p - review_points_mean)

# DataFrame 引用修改,返回完整的DF数据
def remean_points(row):
row.points = row.points - review_points_mean
return row

reviews.apply(remean_points, axis='columns')

案例实操

获取指定值的索引

有时候我们需要知道指定值所在的位置,即一个值在表中的索引,可以使用以一下方法:

1
2
3
4
5
6
7
8
9
# 指定值的的索引对
np.argwhere(df.values == 'Eorge')
# array([[3, 0]])
# 值为 90 的索引对
np.argwhere(df.values == 90)
'''
array([[33, 2],
[64, 5]])
'''

修改某行某列的值

1
2
3
4
5
import pandas as pd
x2 = pd.read_csv("submit.csv")
# 修改id==80000的行,'isDefault'的值
x2.loc[x2.id==800000,'isDefault'] = 1
x2

相关内容

[TOC]

2020年专业精进

线性代数

麻省理工公开课:线性代数 http://open.163.com/newview/movie/courseintro?newurl=M6V0BQC4M

矩阵的秩–挺适合预习线性代数的https://zhuanlan.zhihu.com/p/108093909

图形学变换

图形学中的基本变换(Basic Transforms)

会议:

[CVPR 2019 论文大盘点—人体姿态篇](https://bbs.cvmart.net/topics/531/CVPR 2019 Human Pose))

AI基础知识

专业书籍

  • 《图形学基础》

AI 集成项目学习

[TOC]

np初始化

np.arange()

np.random

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# random.randn(x1, x2, x3, ...)
# 返回一个满足正态分布的数组
# 参数:x1:第一维大小
#    x2:第二维大小
#    …

x=np.random.randn(3,5) # shape (3,5)
print(x)


#random.randn(x1, x2, x3, ...)
# 返回一个值在[0,1)之间的数组
# 参数:x1:第一维大小
#    x2:第二维大小
#    …

x=np.random.rand(2,4)
print(x)


#numpy.random.randint(low,high=None,size=None,dtype=‘l’)
#返回一个数组
#参数:low:数组最小值
#   high:数组最大值
#   size:数组维度大小
#   dtype:数据类型,默认为int
#   high为空时,默认生成随机数为[0,low)

x=np.random.randint(1,11,(3,4))
y=np.random.randint(10)
print("x:",x)
print("y:",y)
#输出:
x: [[9 2 4 6]
[1 3 7 7]
[8 5 5 5]]
y: 3

# np.random.choice(a,size=None,replace=True,p=None)
# 返回一个数组
# 参数:a:一维数组类似数据或整数,整数时对应一维数组为np.arange(a)
#   size:数组维度大小
#   replace:抽样后是否将该数放回数组,若True则放回,False则不放回,所有数不相同
#   p:数组中的数出现的概率
# 举例:
x=np.random.choice(5, 3, replace=False, p=[0.1, 0, 0.3, 0.6, 0])
#输出:[0 2 3]


# 随机数[0,1]之间
np.random.random()


# seed( ) 用于指定随机数生成时所用算法开始的整数值,如果使用相同的seed( )值,则每次生成的随即数都相同,如果不设置这个值,则系统根据时间来自己选择这个值,此时每次生成的随机数因时间差异而不同。
np.random.seed()


# 是 NumPy 库中的一个函数,用于从正态分布(也称为高斯分布)中随机抽取样本。
np.random.normal

np.meshgrid()

生成网格点坐标矩阵

np.eye()

函数的原型:numpy.eye(N,M=None,k=0,dtype=<class ‘float’>,order=’C)

返回的是一个二维2的数组(N,M),对角线的地方为1,其余的地方为0.

参数介绍:

(1)N:int型,表示的是输出的行数

(2)M:int型,可选项,输出的列数,如果没有就默认为N

(3)k:int型,可选项,对角线的下标,默认为0表示的是主对角线,负数表示的是低对角,正数表示的是高对角。

(4)dtype:数据的类型,可选项,返回的数据的数据类型

(5)order:{‘C’,‘F’},可选项,也就是输出的数组的形式是按照C语言的行优先’C’,还是按照Fortran形式的列优先‘F’存储在内存中

案例:(普通的用法)

1
2
3
4
5
6
7
8
9
10
11
12
13
import numpy as np

a=np.eye(3)
print(a)

a=np.eye(4,k=1)
print(a)

a=np.eye(4,k=-1)
print(a)

a=np.eye(4,k=-3)
print(a)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]

[[0. 1. 0. 0.]
[0. 0. 1. 0.]
[0. 0. 0. 1.]
[0. 0. 0. 0.]]

[[0. 0. 0. 0.]
[1. 0. 0. 0.]
[0. 1. 0. 0.]
[0. 0. 1. 0.]]

[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[1. 0. 0. 0.]]

高级用法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np

labels=np.array([[1],[2],[0],[1]])
print("labels的大小:",labels.shape,"\n")

#因为我们的类别是从0-2,所以这里是3个类
a=np.eye(3)[1]
print("如果对应的类别号是1,那么转成one-hot的形式",a,"\n")

a=np.eye(3)[2]
print("如果对应的类别号是2,那么转成one-hot的形式",a,"\n")

a=np.eye(3)[1,0]
print("1转成one-hot的数组的第一个数字是:",a,"\n")

#这里和上面的结果的区别,注意!!!
a=np.eye(3)[[1,2,0,1]]
print("如果对应的类别号是1,2,0,1,那么转成one-hot的形式\n",a)

res=np.eye(3)[labels.reshape(-1)]
print("labels转成one-hot形式的结果:\n",res,"\n")
print("labels转化成one-hot后的大小:",res.shape)

labels的大小: (4, 1)

如果对应的类别号是1,那么转成one-hot的形式 [0. 1. 0.]

如果对应的类别号是2,那么转成one-hot的形式 [0. 0. 1.]

1转成one-hot的数组的第一个数字是: 0.0

如果对应的类别号是1,2,0,1,那么转成one-hot的形式
[[0. 1. 0.]
[0. 0. 1.]
[1. 0. 0.]
[0. 1. 0.]]
labels转成one-hot形式的结果:
[[0. 1. 0.]
[0. 0. 1.]
[1. 0. 0.]
[0. 1. 0.]]

labels转化成one-hot后的大小: (4, 3)

np.identity()

这个函数和之前的区别在于,这个只能创建方阵,也就是N=M

函数的原型:np.identity(n,dtype=None)

参数:n,int型表示的是输出的矩阵的行数和列数都是n

dtype:表示的是输出的类型,默认是float

返回的是nxn的主对角线为1,其余地方为0的数组

1
2
3
4
5
6
7
8
9
import numpy as np

a=np.identity(3)
print(a)


[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]

np.ravel()

np.expand_dims()

1
2
3
4
5
6
import numpy as np
a = np.zeros((32,32))
print(a.shape)
a1 = np.expand_dims(a,0)
print(a1.shape)
>>> (32,32) (1,32,32)

方式2

1
2
3
4
5
6
import numpy as np
a = np.zeros((32,32))
print(a.shape)
a1 = a[None,:,:]
print(a1.shape)
>>> (32,32) (1,32,32)

np.sequeeze()

1
2
3
4
5
6
7
import numpy as np
a = np.zeros((1,32,32))
print(a.shape)
a1 = a.squeeze()
print(a1.shape)

>>> (1,32,32) (32,32)

另一种方式

1
2
3
4
5
import numpy as np
a = np.zeros((3,32,32))
print(a.shape)
a1 = a[0,:,:]
print(a1.shape)

np.flatten()

两者的功能是一致的,将多维数组降为一维,但是两者的区别是返回拷贝还是返回视图

np.flatten()返回一份拷贝,对拷贝所做修改不会影响原始矩阵,

np.ravel()返回的是视图,修改时会影响原始矩阵

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import numpy as np
a = np.array([[1 , 2] , [3 , 4]])
b = a.flatten()
print('b:' , b)
c = a.ravel()
print('c:' , c)
d = a.ravel('F')
print('d:' , d)

# 二者的区别
b[0] = 10
print('a:' , a)
c[0] = 10
print('a:' , a)
1
2
3
4
5
6
7
b: [1 2 3 4]
c: [1 2 3 4]
d: [1 3 2 4]
a: [[1 2]
[3 4]]
a: [[10 2]
[ 3 4]]

allclose

1
2
3
numpy.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)[source]

absolute(a - b) <= (atol + rtol * absolute(b))

eg: 判断两个数据的误差小于2:

1
np.allclose(a, b, atol=2)

这两个数据,可以是相同维度的,例如

1
2
a = [149.820974 188.13338026 145.44900513] 
b = [151.086161 186.89028926 144.17222595]

diag

np.argmax()

1
2
3
4
import numpy as np
a = np.array([3, 1, 2, 4, 6, 1])
b=np.argmax(a)#取出a中元素最大值所对应的索引,此时最大值位6,其对应的位置索引值为4,(索引值默认从0开始)
print(b)#4

https://wenku.baidu.com/view/1d3dbe48ac1ffc4ffe4733687e21af45b307fe78.html

https://blog.csdn.net/weixin_38145317/article/details/79650188

np.where()

np.where(condition, x, y)

满足条件(condition),输出x,不满足输出y。

如果是一维数组,相当于[xv if c else yv for (c,xv,yv) in zip(condition,x,y)]

1
2
3
4
5
>>> aa = np.arange(10)
>>> np.where(aa,1,-1)
array([-1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) # 0为False,所以第一个输出-1
>>> np.where(aa > 5,1,-1)
array([-1, -1, -1, -1, -1, -1, 1, 1, 1, 1])

np.where(condition)

只有条件 (condition),没有x和y,则输出满足条件 (即非0) 元素的坐标 (等价于numpy.nonzero)。这里的坐标以tuple的形式给出,通常原数组有多少维,输出的tuple中就包含几个数组,分别对应符合条件元素的各维坐标。

1
2
3
>>> a = np.array([2,4,6,8,10])
>>> np.where(a > 5) # 返回索引
(array([2, 3, 4]),)
1
2
3
4
bid_history = [0, 1, 2, 0, 2]
np.argwhere(bid_history==max(bid_history))
>>>array([[2],
[4]], dtype=int64)

np.repeat

1
2
3
a = np.zeros(54, dtype=np.int8)
batch_num = 4
batch_a = np.repeat(a[np.newaxis,:], batch_num, axis=0)

Slice

:: (逆向序列)

1
2
3
4
a = np.arange(10)
print(a)
print(a[-3:][::])
print(a[-3:][::-1])
1
2
3
[0 1 2 3 4 5 6 7 8 9]
[7 8 9]
[9 8 7]

statck相关

stack() Join a sequence of arrays along a new axis.

vstack() Stack along first axis. == np.concatenate(tup, axis=0)

hstack() Stack along second axis. (column wise). == np.concatenate(tup, axis=1)

dstack() Stack arrays in sequence depth wise (along third dimension).==np.concatenate(tup, axis=2)

concatenate() Join a sequence of arrays along an existing axis.

np.stack()

np.hstack()

np.vstack()

np.dstack()

np.concatenate()

np.split()

np.hsplit

np.vsplit

np.dsplit

1
2
3
4
5
6
7
split  :  1D; indices_or_sections ``[2, 3]`` would, for ``axis=0``, result in
- ary[:2]
- ary[2:3]
- ary[3:]
hsplit : Split array into multiple sub-arrays horizontally (column-wise).
vsplit : Split array into multiple sub-arrays vertically (row wise).
dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import numpy as np
a = np.zeros((4, 3))
b = np.ones((4, 1))
print(a.shape)
print(a)
print(b)

c = np.hstack((a, b))
print(c)

d = np.ones((1, 3))
print(d)
c = np.vstack((a, d))
print(c)

Out:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
(4, 3)
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
[[1.]
[1.]
[1.]
[1.]]
[[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]
[0. 0. 0. 1.]]
[[1. 1. 1.]]
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[1. 1. 1.]]

Process finished with exit code 0

np矩阵运算:

mean 均值

var 方差

item

dot

数量积又称内积(Kronecker product)、点积(Dot product),对应元素相乘相加,结果是一个标量(即一个数)。

image-20200826164113328

cross

向量积又称外积、叉积(Cross product)

image-20200826164056372

multiply*

普通乘积:对应元素相乘,结果还是向量。

np.linalg

np.linalg.norm 求范数

img

序列化NP

bcolz

1
2
3
4
5
6
Write:
carr=bcolz.carray(arr, rootdir=fname, mode='w');
carr.flush()

Read:
carr=bcolz.carray(arr, rootdir=fname, mode='r');

https://vimsky.com/zh-tw/examples/detail/python-method-bcolz.carray.html

pickle

path相关

glob模块是最简单的模块之一,内容非常少。用它可以查找符合特定规则的文件路径名。跟使用windows下的文件搜索差不多。查找文件只用到三个匹配符:””, “?”, “[]”。””匹配0个或多个字符;”?”匹配单个字符;”[]”匹配指定范围内的字符,如:[0-9]匹配数字。

1
2
3
4
5
6
7
import glob

#获取指定目录下的所有图片
print (glob.glob(r"/home/qiaoyunhao/*/*.png"),"\n")#加上r让字符串不转义

#获取上级目录的所有.py文件
print (glob.glob(r'../*.py')) #相对路径

[Point Set Matching/Registration Benchmark](http://gwang-cv.github.io/2019/05/01/Point Set Matching-Registration Benchmark/)

A list of point set matching/registration resources collected by Gang Wang. If you find that important resources are not included, please feel free to contact me.

Point Set Matching/Registration Material

POINT SET MATCHING/REGISTRATION METHODS [WIKI]

Point Matching/Registration Methods
  • [MCT] A mathematical analysis of the motion coherence theory, IJCV’1989 [pdf]
  • [ICP: point-to-point] Method for Registration of 3-D Shapes, Robotics-DL tentative’1992 [pdf] [code][code] [code] [material] [tutorial]
  • [ICP: point-to-plane] Object modeling by registration of multiple range images, IVC’1992 [pdf]
  • [ICP] Iterative point matching for registration of free-form curves and surfaces, IJCV’1994 [pdf]
  • [RPM/Softassign] New algorithms for 2d and 3d point matching: pose estimation and correspondence, PR’1998 [pdf] [code]
  • [MultiviewReg] Multiview registration for large data sets, 3DDIM’1999 [pdf]
  • [SC] Shape matching and object recognition using shape contexts, TPAMI’2002 [pdf][wiki] [project] [code]
  • [EM-ICP] Multi-scale EM-ICP: A Fast and Robust Approach for Surface Registration, ECCV’2002 [pdf] [code]
  • [LM-ICP] Robust registration of 2D and 3D point sets, IVC’2003 [pdf] [code]
  • [TPS-RPM] A new point matching algorithm for non-rigid registration, CVIU’2003 [pdf] [project] [code]
  • [Survey] Image registration methods: a survey, IVC’2003 [pdf]
  • [KCReg] A correlation-based approach to robust point set registration, ECCV’2004 [pdf] [code]
  • [3DSC] Recognizing objects in range data using regional point descriptors, ECCV’2004 [pdf] [code_pcl]
  • [RGR] Robust Global Registration, ESGP’2005 [pdf]
  • [RPM-LNS] Robust point matching for nonrigid shapes by preserving local neighborhood structures, TPAMI’2006 [pdf] [code]
  • [IT-FFD] Shape registration in implicit spaces using information theory and free form deformations, TPAMI’2006 [pdf]
  • [Rigid] Rigid Body Registration, 2007 [ch02]
  • [CDC] Simultaneous covariance driven correspondence (cdc) and transformation estimation in the expectation maximization framework, CVPR’2007 [pdf] [project]
  • [Nonrigid-ICP] Optimal step nonrigid icp algorithms for surface registration, CVPR’2007 [pdf] [code] [code]
  • [GNA] Global non-rigid alignment of 3D scans, TOG’2007 [pdf]
  • [PF] Particle filtering for registration of 2D and 3D point sets with stochastic dynamics, CVPR’2008 [pdf]
  • [JS] Simultaneous nonrigid registration of multiple point sets and atlas construction, TPAMI’2008 [pdf]
  • [4PCS] 4-points congruent sets for robust pairwise surface registration, TOG’2008 [pdf] [project]
  • [GICP] Generalized ICP, RSS’2009 [pdf] [code]
  • [MP] 2D-3D registration of deformable shapes with manifold projection, ICIP’2009 [pdf]
  • [SMM] The mixtures of Student’s t-distributions as a robust framework for rigid registration, IVC’2009 [pdf]
  • [Algebraic-PSR] An Algebraic Approach to Affine Registration of Point Sets, ICCV’2009 [pdf]
  • [SM] Subspace matching: Unique solution to point matching with geometric constraints, ICCV’2009 [pdf]
  • [FPFH] Fast Point Feature Histograms (FPFH) for 3D Registration, ICRA’2009 [pdf] [code]
  • [GO] Global optimization for alignment of generalized shapes, CVPR’2009 [pdf]
  • [ISO] Isometric registration of ambiguous and partial data, CVPR’2009 [pdf]
  • [GF] A new method for the registration of three-dimensional point-sets: The Gaussian fields framework, IVC’2010 [pdf]
  • [RotInv] Rotation invariant non-rigid shape matching in cluttered scenes, ECCV’2010 [pdf] [code]
  • [CDFHC] Group-wise point-set registration using a novel cdf-based havrda-charvát divergence, IJCV’2010 [pdf] [code]
  • [QPCCP] A quadratic programming based cluster correspondence projection algorithm for fast point matching, CVIU’2010 [pdf] [code]
  • [CPD] Point set registration: Coherent point drift, NIPS’2007 [pdf] TPAMI’2010 [pdf] [code]
  • [PFSD] Point set registration via particle filtering and stochastic dynamics, TPAMI’2010 [pdf]
  • [ECMPR] Rigid and articulated point registration with expectation conditional maximization, TPAMI’2011 [pdf] [project] [code]
  • [GMMReg/TPS-L2] Robust point set registration using gaussian mixture models, NIPS’2005 TPAMI’2011 [pdf] [code]
  • [TPRL] Topology preserving relaxation labeling for nonrigid point matching, TPAMI’2011 [pdf]
  • [OOH] Robust point set registration using EM-ICP with information-theoretically optimal outlier handling, CVPR’2011 [pdf]
  • [SGO] Stochastic global optimization for robust point set registration, CVIU’2011
  • [survey] 3D Shape Registration, 3DIAA’2012
  • [Multiview LM-ICP] Accurate and automatic alignment of range surfaces, 3DIMPVT’2012 [pdf] [code]
  • [ISC] Intrinsic shape context descriptors for deformable shapes, CVPR’2012 [pdf]
  • [RPM-Concave] Robust point matching revisited: A concave optimization approach, ECCV’2012 [pdf] [code]
  • [RINPSM] Rotation Invariant Nonrigid Point Set Matching in Cluttered Scenes, TIP’2012 [pdf] [code]
  • [RPM-L2E] Robust estimation of nonrigid transformation for point set registration, CVPR’2013 [pdf] [code]
  • [GO-ICP] Go-ICP: Solving 3D Registration Efficiently and Globally Optimally, ICCV’2013 [pdf] TPAMI’2016 [pdf] [code]
  • [Survey] Registration of 3D point clouds and meshes: a survey from rigid to nonrigid, TVCG’2013 [[pdf]](https://orca.cf.ac.uk/47333/1/ROSIN registration of 3d point clouds and meshes.pdf)
  • [NMM] Diffeomorphic Point Set Registration Using Non-Stationary Mixture Models, ISBI’2013 [pdf]
  • [Sparse-ICP] Sparse Iterative Closest Point, ESGP’2013 [pdf] [project] [code]
  • [JRMPC] A Generative Model for the Joint Registration of Multiple Point Sets, ECCV’2014 [pdf] [project] [code&data]
  • [RPM-VFC] Robust Point Matching via Vector Field Consensus, TIP’2014 [pdf] [code]
  • [GLTP] Non-rigid Point Set Registration with Global-Local Topology Preservation, CVPRW’2014 [pdf]
  • [color-GICP] Color supported generalized-ICP, VISAPP’2014 [pdf]
  • [RPM-Concave] Point Matching in the Presence of Outliers in Both Point Sets: A Concave Optimization Approach, CVPR’2014 [pdf] [code]
  • [super4PCS] Super 4pcs fast global pointcloud registration via smart indexing, CGF’2014 [pdf] [code] [OpenGR]
  • [SDTM] A Riemannian framework for matching point clouds represented by the Schrodinger distance transform, CVPR’2014 [pdf]
  • [GLMD-TPS] A robust global and local mixture distance based non-rigid point set registration, PR’2015 [pdf] [code]
  • [CSM] Non-rigid point set registration via coherent spatial mapping, SP’2015 [pdf]
  • [ADR] An Adaptive Data Representation for Robust Point-Set Registration and Merging, ICCV’2015 [pdf]
  • [MLMD] MLMD: Maximum likelihood mixture decoupling for fast and accurate point cloud registration, 3DV’2015 [pdf] [project]
  • [APSR] Non-rigid Articulated Point Set Registration for Human Pose Estimation, WACV’2015 [pdf]
  • [RegGF] Non-rigid visible and infrared face registration via regularized Gaussian fields criterion, PR’2015 [pdf] [code]
  • [LLT] Robust feature matching for remote sensing image registration via locally linear transforming, TGRS’2015 [pdf] [code]
  • [RPM-L2E] Robust L2E estimation of transformation for non-rigid registration, TSP’2015 [pdf] [code]
  • [GLR] Robust Nonrigid Point Set Registration Using Graph-Laplacian Regularization, WACV’2015 [pdf]
  • [FPPSR] Aligning the dissimilar: A probabilistic method for feature-based point set registration, ICPR’2016 [pdf]
  • [IPDA] Point Clouds Registration with Probabilistic Data Association, IROS’2016 [pdf] [code]
  • [CPPSR] A probabilistic framework for color-based point set registration, CVPR’2016 [pdf] [project]
  • [GOGMA] GOGMA: Globally-optimal gaussian mixture alignment, CVPR’2016 [pdf]
  • [GO-APM] An Efficient Globally Optimal Algorithm for Asymmetric Point Matching, TPAMI’2016 [pdf] [project] [code]
  • [PR-GLS] Non-Rigid Point Set Registration by Preserving Global and Local Structures, TIP’2016 [pdf] [code]
  • [conreg] Non-iterative rigid 2D/3D point-set registration using semidefinite programming, TIP’2016 [pdf]
  • [PM] Probabilistic Model for Robust Affine and Non-rigid Point Set Matching, TPAMI’2016 [pdf]
  • [SPSR] A Stochastic Approach to Diffeomorphic Point Set Registration With Landmark Constraints, TPAMI’2016 [pdf]
  • [FRSSP] Fast Rotation Search with Stereographic Projections for 3D Registration, TPAMI’2016 [pdf]
  • [VBPSM] Probabilistic Model for Robust Affine and Non-rigid Point Set Matching, TPAMI’2016 [pdf] [code]
  • [MFF] Image Correspondences Matching Using Multiple Features Fusion, ECCV’2016 [pdf] [code]
  • [FGR] Fast Global Registration, ECCV’2016 [pdf] [code]
  • [HMRF ICP] Hidden Markov Random Field Iterative Closest Point, arxiv’2017 [pdf] [code]
  • [SSFR] Global Registration of 3D LiDAR Point Clouds Basedon Scene Features: Application toStructured Environments, RS’2017 [pdf]
  • [color-PCR] Colored point cloud registration revisited, ICCV’2017 [pdf]
  • [dpOptTrans] Efficient Globally Optimal Point Cloud Alignment using Bayesian
    Nonparametric Mixtures, CVPR’2017 [pdf] [code]
  • [GORE] Guaranteed Outlier Removal for Point Cloud Registration with Correspondences, TPAMI’2017 [pdf]
  • [CSGM] A systematic approach for cross-source point cloud registration by preserving macro and micro structures, TIP’2017 [pdf]
  • [FDCP] Fast descriptors and correspondence propagation for robust global point cloud registration, TIP’2017 [pdf]
  • [RSWD] Multiscale Nonrigid Point Cloud Registration Using Rotation-Invariant Sliced-Wasserstein Distance via Laplace-Beltrami Eigenmap, SIAM JIS’2017 [pdf]
  • [MR] Non-Rigid Point Set Registration with Robust Transformation Estimation under Manifold Regularization, AAAI’2017 [pdf] [code]
  • [LPM] Locality Preserving Matching, IJCAI’2017 [pdf] IJCV’2019 [pdf] [code]
  • [DARE] Density adaptive point set registration, CVPR’2018 [pdf] [code]
  • [GC-RANSAC] Graph-Cut RANSAC, CVPR’2018 [pdf] [code]
  • [3D-CODED] 3D-CODED: 3D correspondences by deep deformation, ECCV’2018 [pdf] [project] [code]
  • [3DFeat-NET] 3dfeat-net: Weakly supervised local 3d features for point cloud registration, ECCV’2018 [pdf] [code]
  • [MVDesc-RMBP] Learning and Matching Multi-View Descriptors for Registration of Point Clouds, ECCV’2018 [pdf]
  • [SWS] Nonrigid Points Alignment with Soft-weighted Selection, IJCAI’2018 [pdf]
  • [DLD] Dependent landmark drift: robust point set registration with aGaussian mixture model and a statistical shape model, arxiv’2018 [pdf] [code]
  • [DeepMapping] DeepMapping: Unsupervised Map Estimation From Multiple Point Clouds, arxiv’2018 [pdf] [project]
  • [APSR] Adversarial point set registration, arxiv’2018 [pdf]
  • [3DIV] Fast and Globally Optimal Rigid Registration of 3D Point Sets by Transformation Decomposition, arxiv’2018 [pdf]
  • [Analysis] Analysis of Robust Functions for Registration Algorithms, arxiv’2018 [pdf]
  • [MVCNN] Learning Local Shape Descriptors from Part Correspondences with Multiview Convolutional Networks, TOG’2018 [pdf] [project]
  • [CSCIF] Cubature Split Covariance Intersection Filter-Based Point Set Registration, TIP’2018 [pdf]
  • [FPR] Efficient Registration of High-Resolution Feature Enhanced Point Clouds, TPAMI’2018 [pdf]
  • [DFMM-GLSP] Non-rigid point set registration using dual-feature finite mixture model and global-local structural preservation, PR’2018 [pdf]
  • [PR-Net] Non-Rigid Point Set Registration Networks, arxiv’2019 [pdf] [code]
  • [SDRSAC] SDRSAC: Semidefinite-Based Randomized Approach for Robust Point Cloud Registration without Correspondences, arxiv’2019 [pdf] [code]
  • [3DRegNet] 3DRegNet: A Deep Neural Network for 3D Point Registration, arxiv’2019 [pdf]
  • [PointNetLK] PointNetLK: Robust & Efficient Point Cloud Registration using PointNet, arxiv’2019 [pdf] [code]
  • [RPM-MR] Nonrigid Point Set Registration with Robust Transformation Learning under Manifold Regularization, TNNLS’2019 [pdf] [code]
  • [FGMM] Feature-guided Gaussian mixture model for image matching, PR’2019 [pdf]
  • [LSR-CFP] Least-squares registration of point sets over SE (d) using closed-form projections, CVIU’2019 [pdf]
  • [FilterReg] FilterReg: Robust and Efficient Probabilistic Point-Set Registration using Gaussian Filter and Twist Parameterization, CVPR’2019 [pdf] [project] [code]
  • [TEASER] A Polynomial-time Solution for Robust Registration with Extreme Outlier Rates, arxiv’2019 [pdf]
Mismatch Removal Methods
  • [RANSAC] Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography, 1981 [pdf] [wiki]
  • [MLESAC] MLESAC: A new robust estimator with application to estimating image geometry, CVIU’2000 [pdf] [code_pcl]
  • [PROSAC] Matching with PROSAC-progressive sample consensus, CVPR’2005 [pdf] [code_pcl]
  • [ICF/SVR] Rejecting mismatches by correspondence function, IJCV’2010 [pdf]
  • [GS] Common visual pattern discovery via spatially coherent correspondences, CVPR’2010 [[pdf]](http://www.jdl.ac.cn/project/faceId/paperreading/Paper/Common Visual Pattern Discovery via Spatially Coherent Correspondences.pdf) [code]
  • [VFC] A robust method for vector field learning with application to mismatch removing, CVPR’2011 [pdf] [code]
  • [DefRANSAC] In defence of RANSAC for outlier rejection in deformable registration, ECCV’2012 [[pdf]](https://media.adelaide.edu.au/acvt/Publications/2012/2012-In Defence of RANSAC for Outlier Rejection in Deformable Registration.pdf) [code]
  • [CM] Robust Non-parametric Data Fitting for Correspondence Modeling, ICCV’2013 [pdf] [code]
  • [AGMM] Asymmetrical Gauss Mixture Models for Point Sets Matching, CVPR’2014 [pdf]
  • [TC] Epipolar geometry estimation for wide baseline stereo by Clustering Pairing Consensus, PRL’2014 [pdf]
  • [BF] Bilateral Functions for Global Motion Modeling, ECCV’2014 [pdf] [project] [code]
  • [WxBS] WxBS: Wide Baseline Stereo Generalizations, BMVC’2015 [pdf] [project]
  • [RepMatch] RepMatch: Robust Feature Matching and Posefor Reconstructing Modern Cities, ECCV’2016 [pdf] [project] [code]
  • [SIM] The shape interaction matrix-based affine invariant mismatch removal for partial-duplicate image search, TIP’2017 [pdf] [code]
  • [DSAC] DSAC: differentiable RANSAC for camera localization, CVPR’2017 [pdf] [code]
  • [GMS] GMS: Grid-based Motion Statistics for Fast, Ultra-robust Feature Correspondence, CVPR’2017 [pdf] [code]
  • [LMI] Consensus Maximization with Linear Matrix Inequality Constraints, CVPR’2017 [pdf] [project] [code]
  • [LFGC] Learning to Find Good Correspondences, CVPR’2018 [pdf] [code]
  • [GC-RANSAC] Graph-Cut RANSAC, CVPR’2018 [pdf] [code]
  • [SRC] Consensus Maximization for Semantic Region Correspondences, CVPR’2018 [pdf] [code]
  • [CODE] Code: Coherence based decision boundaries for feature correspondence, TPAMI’2018 [pdf] [project]
  • [LPM] Locality preserving matching, IJCV’2019 [pdf] [code]
  • [LMR] LMR: Learning A Two-class Classifier for Mismatch Removal, TIP’2019 [pdf] [code]
  • [PFFM] Progressive Filtering for Feature Matching, ICASSP’2019 [pdf]
  • [NM-Net] NM-Net: Mining Reliable Neighbors for Robust Feature Correspondences, arXiv’2019 [pdf]
Graph Matching Methods
  • [SM] A spectral technique for correspondence problems using pairwise constraints, ICCV’2005 [pdf] [code]
  • [SM-MAP] Efficient MAP approximation for dense energy functions, ICML’2006 [pdf] [code]
  • [SMAC] Balanced Graph Matching, NIPS’2006 [pdf] [code]
  • [FCGM] Feature correspondence via graph matching: Models and global optimization, ECCV’2008 [pdf]
  • [PM] Probabilistic Graph and Hypergraph Matching, CVPR’2008 [pdf]
  • [IPFP] An Integer Projected Fixed Point Method for Graph Matching and MAP Inference, NIPS’2009 [pdf] [code]
  • [RRWM] Reweighted Random Walks for Graph Matching, ECCV’2010 [pdf]
  • [FGM] Factorized graph matching, CVPR’2012 [pdf] [code]
  • [DGM] Deformable Graph Matching, CVPR’2013 [pdf] [code]
  • [MS] Progressive mode-seeking on graphs for sparse feature matching, ECCV’2014 [pdf] [code]
Misc
  • [RootSIFT] Three things everyone should know to improve object retrieval, CVPR’2012 [pdf] [related code]
  • [DM-CNN] Descriptor Matching with Convolutional Neural Networks: a Comparison to SIFT, arXiv’2014 [pdf]
  • [DASC] DASC: Robust Dense Descriptor for Multi-modal and Multi-spectral Correspondence Estimation, TPAMI’2017 [pdf] [project]
  • [MODS] MODS: Fast and Robust Method for Two-View Matching, CVIU’2015 [pdf] [project] [code]
  • [Elastic2D3D] Efficient Globally Optimal 2D-to-3D Deformable Shape Matching, CVPR’2016 [pdf] [project]
  • [TCDCN] Facial Landmark Detection by Deep Multi-task Learning, ECCV’2014 [pdf] [project]
  • [LAI] Object matching using a locally affine invariant and linear programming techniques, TPAMI’2013 [pdf]
  • [GeoDesc] GeoDesc: Learning Local Descriptors by Integrating Geometry Constraints, ECCV’2018 [pdf] [code]
Deep Features
  • [TFeat] Learning local feature descriptors with triplets and shallow convolutional neural networks, BMVC’2016 [pdf] [code]
  • [L2-Net] L2-Net: Deep Learning of Discriminative Patch Descriptor in Euclidean Space, CVPR’2017 [pdf] [code]
  • [HardNet] Working hard to know your neighbor’s margins: Local descriptor learning loss, CVPR’2018 [pdf] [code]
  • [AffNet] Repeatability Is Not Enough: Learning Discriminative Affine Regions via Discriminability, ECCV’2018 [pdf] [code]

APPLICATIONS

Remote Sensing Image Registration
  • [GLPM] Guided Locality Preserving Feature Matching for Remote Sensing Image Registration, TGRS’2018 [pdf]
Retinal Image Registration
  • [DB-ICP] The dual-bootstrap iterative closest point algorithm with application to retinal image registration, TMI’2003 [pdf]
  • [GDB-ICP] Registration of Challenging Image Pairs: Initialization, Estimation, and Decision, TPAMI’2007 [pdf] [project]
  • [ED-DB-ICP] The edge-driven dual-bootstrap iterative closest point algorithm for registration of multimodal fluorescein angiogram sequence, TMI’2010 [pdf]
Palmprint Image Registration
  • Robust and efficient ridge-based palmprint matching, TPAMI’2012 [pdf]
  • Palmprint image registration using convolutional neural networks and Hough transform, arxiv’2019 [pdf]
Visual Homing Navigation
  • Visual Homing via Guided Locality Preserving Matching, ICRA’2018 [pdf]
HDR Imaging
  • Locally non-rigid registration for mobile HDR photography, CVPRW’2015 [pdf]
Misc
  • Hand Motion from 3D Point Trajectories and a Smooth Surface Model, ECCV’2004 [pdf] [project]
  • A robust hybrid method for nonrigid image registration, PR’2011 [pdf]
  • Aligning Images in the Wild, CVPR’2012 [pdf] [code]
  • Robust feature set matching for partial face recognition, CVPR’2013 [pdf]
  • Multi-modal and Multi-spectral Registrationfor Natural Images, ECCV’2014 [pdf] [project]
  • Articulated and Generalized Gaussian KernelCorrelation for Human Pose Estimation, TIP’2016 [pdf]
  • Infrared and visible image fusion via gradient transfer and total variation minimization, Information Fusion’2016 [pdf] [code]

DATABASES

General databases
Other databases

TOOLS