"""
RKPM shape function computation module.
This module computes Reproducing Kernel Particle Method (RKPM) shape functions
and their derivatives. The RKPM shape function for node I at point x is:
Ψ_I(x) = H^T(0) M^{-1}(x) H(x - x_I) φ(z)
where:
- H(x) is the polynomial basis vector [1, x/h, y/h, z/h]^T
- M(x) is the moment matrix (ensures reproduction of polynomials)
- φ(z) is the cubic B-spline kernel function
- z = ||x - x_I|| / a_I is the normalized distance
The module provides both original loop-based and optimized vectorized
implementations for performance.
"""
from common import np
# Try to import the vectorized versions if available
try:
from shape_function_vectorized import (
compute_phi_M_standard_sparse,
compute_phi_M_standard_vectorized,
)
VECTORIZED_PHI_AVAILABLE = True
SPARSE_PHI_AVAILABLE = True
except ImportError:
VECTORIZED_PHI_AVAILABLE = False
SPARSE_PHI_AVAILABLE = False
try:
from shape_grad_func_vectorized import shape_grad_shape_func_vectorized
VECTORIZED_GRAD_AVAILABLE = True
except ImportError:
VECTORIZED_GRAD_AVAILABLE = False
# Keep old name for compatibility
VECTORIZED_AVAILABLE = VECTORIZED_PHI_AVAILABLE
[docs]
def compute_phi_M_with_interface_method(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
M_P_z=None,
):
"""Compute phi_M using interface method (IM_RKPM=True and single_grain=False)."""
print(f"Compute phi_M using interface method", flush=True)
if M_P_z is None:
# Initialize with zeros or correct shape
M_P_z = np.zeros_like(M) # Adjust shape accordingly
phi_nonzero_index_row = []
phi_nonzero_index_column = []
phi_nonzerovalue_data = []
phi_P_x_nonzerovalue_data = []
phi_P_y_nonzerovalue_data = []
phi_P_z_nonzerovalue_data = []
z = []
z_P_x = []
z_P_y = []
z_P_z = []
phipz = []
print("get in", flush=True)
for i in range(np.shape(x_G)[0]):
"""
check the distance between point and segments, exact distance
"""
dx_distance = np.zeros(num_interface_segments)
# if x_nodes[j,:] not in interface_nodes:
# find the minimum distance of gauss point to interface
# if gauss point is A, boundary segment is BC, if (BA dot BC) times (CA dot CB) is negative, it means the vertical line from A to segment BC intersect BC on its extension,
# in this case, the distance from A to BC is min(|AB|, |AC|)
# if (BA dot BC) times (CA dot CB) is positive, there is a point D with BC that AD is normal to BC. the distance from A to BC is |BA vector - [vector BA dot (BC vector divided by |BC|) times (BC vector divided by |BC|) ]|
BA = x_G[i, :] - BxByCxCy[:, :2]
BC = BxByCxCy[:, 2:4] - BxByCxCy[:, :2]
CB = -BxByCxCy[:, 2:4] + BxByCxCy[:, :2]
CA = x_G[i, :] - BxByCxCy[:, 2:4]
BA_dot_BC = BA[:, 0] * BC[:, 0] + BA[:, 1] * BC[:, 1]
CA_dot_CB = CA[:, 0] * CB[:, 0] + CA[:, 1] * CB[:, 1]
sign_extension = BA_dot_BC * CA_dot_CB
positive_index = np.where(sign_extension > 0)[0]
negative_index = np.where(sign_extension < 0)[0]
zero_index = np.where(sign_extension == 0)[0]
BA_dot_unit_BC = BA_dot_BC / (((BC[:, 0]) ** 2 + (BC[:, 1]) ** 2) ** 0.5)
BA_dot_unit_BC_times_unit_BC = (
BC
/ (((BC[:, 0]) ** 2 + (BC[:, 1]) ** 2) ** 0.5)[:, None]
* BA_dot_unit_BC[:, None]
)
dx_distance[positive_index] = (
(BA[positive_index, 0] - BA_dot_unit_BC_times_unit_BC[positive_index, 0])
** 2
+ (BA[positive_index, 1] - BA_dot_unit_BC_times_unit_BC[positive_index, 1])
** 2
) ** 0.5
dx_distance[negative_index] = np.minimum(
((CA[negative_index, 0]) ** 2 + (CA[negative_index, 1]) ** 2) ** 0.5,
((BA[negative_index, 0]) ** 2 + (BA[negative_index, 1]) ** 2) ** 0.5,
)
if np.shape(zero_index)[0] != 0:
dx_distance[zero_index] = np.minimum(
((CA[zero_index, 0]) ** 2 + (CA[zero_index, 1]) ** 2) ** 0.5,
((BA[zero_index, 0]) ** 2 + (BA[zero_index, 1]) ** 2) ** 0.5,
)
min_distance = np.min(dx_distance)
min_index = np.argmin(dx_distance)
if (
min_index in positive_index
): # if the smallest distance is between AD, D in between BC
x_coor_min_point_segment = (
BA_dot_unit_BC_times_unit_BC[min_index, 0] + BxByCxCy[min_index, 0]
)
y_coor_min_point_segment = (
BA_dot_unit_BC_times_unit_BC[min_index, 1] + BxByCxCy[min_index, 1]
)
if (min_index in negative_index) or (
min_index in zero_index
): # if the smallest distance is AB or AC
if ((CA[min_index, 0]) ** 2 + (CA[min_index, 1]) ** 2) ** 0.5 < (
(BA[min_index, 0]) ** 2 + (BA[min_index, 1]) ** 2
) ** 0.5:
x_coor_min_point_segment = BxByCxCy[min_index, 2]
y_coor_min_point_segment = BxByCxCy[min_index, 3]
else:
x_coor_min_point_segment = BxByCxCy[min_index, 0]
y_coor_min_point_segment = BxByCxCy[min_index, 1]
d_distance_dx = (x_G[i, 0] - x_coor_min_point_segment) / min_distance
d_distance_dy = (x_G[i, 1] - y_coor_min_point_segment) / min_distance
heaviside_scaling_factor = 4.0e-7
heaviside = np.tanh((min_distance + 1.0e-15) / heaviside_scaling_factor)
heaviside_P_x = (
d_distance_dx
/ heaviside_scaling_factor
* (1.0 / np.cosh((min_distance + 1.0e-15) / heaviside_scaling_factor)) ** 2
) # (1-(np.tanh((min_distance+1.0e-15)/heaviside_scaling_factor))**2)
heaviside_P_y = (
d_distance_dy
/ heaviside_scaling_factor
* (1.0 / np.cosh((min_distance + 1.0e-15) / heaviside_scaling_factor)) ** 2
) # (1-(np.tanh((min_distance+1.0e-15)/heaviside_scaling_factor))**2)
for j in range(np.shape(x_nodes)[0]):
z_ij = (
((x_G[i, 0] - x_nodes[j, 0]) ** 2 + (x_G[i, 1] - x_nodes[j, 1]) ** 2) ** 0.5
) / a[j]
z_ij_P_x = (x_G[i, 0] - x_nodes[j, 0]) / (
a[j] * z_ij * a[j] + 2.220446049250313e-16
) # partial z partial x, add the small number to force the term with machine accuracy
z_ij_P_y = (x_G[i, 1] - x_nodes[j, 1]) / (
a[j] * z_ij * a[j] + 2.220446049250313e-16
) # partial z partial y
x_I = x_nodes[j]
H_scaling_factor = 1.0e-6
H_T = np.array(
[
1,
(x_G[i][0] - x_I[0]) / H_scaling_factor,
(x_G[i][1] - x_I[1]) / H_scaling_factor,
],
dtype=np.float64,
)
H = np.transpose(H_T)
HT_P_x = (
np.array([0, 1, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial x
HT_P_y = (
np.array([0, 0, 1], dtype=np.float64) / H_scaling_factor
) # partial H partial y
H_P_x = np.transpose(HT_P_x)
H_P_y = np.transpose(HT_P_y)
if z_ij >= 0 and z_ij < 0.5:
phi_ij = 2.0 / 3 - 4 * z_ij**2 + 4 * z_ij**3
phi_P_z = -8.0 * z_ij + 12.0 * z_ij**2 # partial phi partial z
else:
if z_ij <= 1 and z_ij >= 0.5:
phi_ij = 4.0 / 3 - 4 * z_ij + 4 * z_ij**2 - 4.0 / 3 * z_ij**3
phi_P_z = -4 + 8 * z_ij - 4 * z_ij**2
if z_ij >= 0 and z_ij <= 1.0:
# print('yes')
# phi_nonzerovalue_data.append(phi_ij)
node_not_on_interface = "True"
for i_nodes in range(num_interface_segments * 2):
# print('yyy')
if (
abs(x_nodes[j, 0] - interface_nodes[i_nodes, 0]) < 1e-10
and abs(x_nodes[j, 1] - interface_nodes[i_nodes, 1]) < 1e-10
):
node_not_on_interface = "False"
if node_not_on_interface == "True":
if nodes_grain_id[j] == Gauss_grain_id[i]:
phi_nonzero_index_row.append(i)
phi_nonzero_index_column.append(j)
phi_nonzerovalue_data.append(phi_ij * heaviside)
phi_P_x_ij = phi_P_z * z_ij_P_x
phi_P_y_ij = phi_P_z * z_ij_P_y
phi_P_x_nonzerovalue_data.append(
phi_P_x_ij * heaviside + phi_ij * heaviside_P_x
) # partial phi partial x
phi_P_y_nonzerovalue_data.append(
phi_P_y_ij * heaviside + phi_ij * heaviside_P_y
) # partial phi partial y
z.append(z_ij)
z_P_x.append(z_ij_P_x)
z_P_y.append(z_ij_P_y)
phipz.append(phi_P_z)
# Note: For interface method with 2D (M shape is 3x3),
# phi_P_z_nonzerovalue_data is not populated
for ii in range(3):
for jj in range(3):
M[i][ii][jj] = (
M[i][ii][jj] + H[ii] * H_T[jj] * phi_ij * heaviside
)
M_P_x[i][ii][jj] = (
M_P_x[i][ii][jj]
+ H[ii]
* H_T[jj]
* (phi_P_x_ij * heaviside + phi_ij * heaviside_P_x)
+ H_P_x[ii] * H_T[jj] * phi_ij * heaviside
+ H[ii] * HT_P_x[jj] * phi_ij * heaviside
)
M_P_y[i][ii][jj] = (
M_P_y[i][ii][jj]
+ H[ii]
* H_T[jj]
* (phi_P_y_ij * heaviside + phi_ij * heaviside_P_y)
+ H_P_y[ii] * H_T[jj] * phi_ij * heaviside
+ H[ii] * HT_P_y[jj] * phi_ij * heaviside
)
return (
np.array(phi_nonzero_index_row),
np.array(phi_nonzero_index_column),
np.array(phi_nonzerovalue_data),
np.array(phi_P_x_nonzerovalue_data),
np.array(phi_P_y_nonzerovalue_data),
np.array(phi_P_z_nonzerovalue_data),
M,
M_P_x,
M_P_y,
M_P_z,
)
[docs]
def compute_z_and_H_2d(x_G_i, x_nodes_j, a_j, H_scaling_factor, eps):
"""Compute z values and H matrices for 2D case (M shape is 3x3)."""
x_I = x_nodes_j
z_ij = (
((x_G_i[0] - x_nodes_j[0]) ** 2 + (x_G_i[1] - x_nodes_j[1]) ** 2) ** 0.5
) / a_j
z_ij_P_x = (x_G_i[0] - x_nodes_j[0]) / (
a_j * z_ij * a_j + eps
) # partial z partial x
z_ij_P_y = (x_G_i[1] - x_nodes_j[1]) / (
a_j * z_ij * a_j + eps
) # partial z partial y
H_T = np.array(
[
1,
(x_G_i[0] - x_I[0]) / H_scaling_factor,
(x_G_i[1] - x_I[1]) / H_scaling_factor,
],
dtype=np.float64,
)
HT_P_x = (
np.array([0, 1, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial x
HT_P_y = (
np.array([0, 0, 1], dtype=np.float64) / H_scaling_factor
) # partial H partial y
H = np.transpose(H_T)
H_P_x = np.transpose(HT_P_x)
H_P_y = np.transpose(HT_P_y)
return (
z_ij,
z_ij_P_x,
z_ij_P_y,
None,
H_T,
HT_P_x,
HT_P_y,
None,
H,
H_P_x,
H_P_y,
None,
)
[docs]
def compute_z_and_H_3d(x_G_i, x_nodes_j, a_j, H_scaling_factor, eps):
"""Compute z values and H matrices for 3D case (M shape is 4x4)."""
x_I = x_nodes_j
z_ij = (
(
(x_G_i[0] - x_nodes_j[0]) ** 2
+ (x_G_i[1] - x_nodes_j[1]) ** 2
+ (x_G_i[2] - x_nodes_j[2]) ** 2
)
** 0.5
) / a_j
z_ij_P_x = (x_G_i[0] - x_nodes_j[0]) / (
a_j * z_ij * a_j + eps
) # partial z partial x
z_ij_P_y = (x_G_i[1] - x_nodes_j[1]) / (
a_j * z_ij * a_j + eps
) # partial z partial y
z_ij_P_z = (x_G_i[2] - x_nodes_j[2]) / (
a_j * z_ij * a_j + eps
) # partial z partial z
H_T = np.array(
[
1,
(x_G_i[0] - x_I[0]) / H_scaling_factor,
(x_G_i[1] - x_I[1]) / H_scaling_factor,
(x_G_i[2] - x_I[2]) / H_scaling_factor,
],
dtype=np.float64,
)
HT_P_x = (
np.array([0, 1, 0, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial x
HT_P_y = (
np.array([0, 0, 1, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial y
HT_P_z = (
np.array([0, 0, 0, 1], dtype=np.float64) / H_scaling_factor
) # partial H partial z
H = np.transpose(H_T)
H_P_x = np.transpose(HT_P_x)
H_P_y = np.transpose(HT_P_y)
H_P_z = np.transpose(HT_P_z)
return (
z_ij,
z_ij_P_x,
z_ij_P_y,
z_ij_P_z,
H_T,
HT_P_x,
HT_P_y,
HT_P_z,
H,
H_P_x,
H_P_y,
H_P_z,
)
[docs]
def compute_phi_M_standard(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
M_P_z=None,
):
"""Compute phi_M using standard method (else case)."""
if M_P_z is None:
# Initialize with zeros or correct shape
M_P_z = np.zeros_like(M) # Adjust shape accordingly
phi_nonzero_index_row = []
phi_nonzero_index_column = []
phi_nonzerovalue_data = []
phi_P_x_nonzerovalue_data = []
phi_P_y_nonzerovalue_data = []
phi_P_z_nonzerovalue_data = []
z = []
z_P_x = []
z_P_y = []
z_P_z = []
phipz = []
H_scaling_factor = 1.0e-6
eps = 2.220446049250313e-16
for i in range(np.shape(x_G)[0]):
for j in range(np.shape(x_nodes)[0]):
# Call appropriate function based on dimension
if np.shape(M)[1] == 3:
# 2D case
(
z_ij,
z_ij_P_x,
z_ij_P_y,
z_ij_P_z,
H_T,
HT_P_x,
HT_P_y,
HT_P_z,
H,
H_P_x,
H_P_y,
H_P_z,
) = compute_z_and_H_2d(x_G[i], x_nodes[j], a[j], H_scaling_factor, eps)
elif np.shape(M)[1] == 4:
# 3D case
(
z_ij,
z_ij_P_x,
z_ij_P_y,
z_ij_P_z,
H_T,
HT_P_x,
HT_P_y,
HT_P_z,
H,
H_P_x,
H_P_y,
H_P_z,
) = compute_z_and_H_3d(x_G[i], x_nodes[j], a[j], H_scaling_factor, eps)
else:
raise ValueError(f"Unsupported M shape: {np.shape(M)[1]}")
if z_ij >= 0 and z_ij < 0.5:
phi_ij = 2.0 / 3 - 4 * z_ij**2 + 4 * z_ij**3
phi_P_z = -8.0 * z_ij + 12.0 * z_ij**2 # partial phi partial z
else:
if z_ij <= 1 and z_ij >= 0.5:
phi_ij = 4.0 / 3 - 4 * z_ij + 4 * z_ij**2 - 4.0 / 3 * z_ij**3
phi_P_z = -4 + 8 * z_ij - 4 * z_ij**2
if z_ij >= 0 and z_ij <= 1.0:
phi_nonzerovalue_data.append(phi_ij)
phi_nonzero_index_row.append(i)
phi_nonzero_index_column.append(j)
phi_P_x_ij = phi_P_z * z_ij_P_x
phi_P_y_ij = phi_P_z * z_ij_P_y
if np.shape(M)[1] == 4:
phi_P_z_ij = phi_P_z * z_ij_P_z
phi_P_z_nonzerovalue_data.append(phi_P_z_ij)
z_P_z.append(z_ij_P_z)
phi_P_x_nonzerovalue_data.append(phi_P_x_ij) # partial phi partial x
phi_P_y_nonzerovalue_data.append(phi_P_y_ij) # partial phi partial y
z.append(z_ij)
z_P_x.append(z_ij_P_x)
z_P_y.append(z_ij_P_y)
phipz.append(phi_P_z)
for ii in range(np.shape(M)[1]):
for jj in range(np.shape(M)[1]):
# if i==13:
# print(M[i])
M[i][ii][jj] = M[i][ii][jj] + H[ii] * H_T[jj] * phi_ij
M_P_x[i][ii][jj] = (
M_P_x[i][ii][jj]
+ H[ii] * H_T[jj] * phi_P_x_ij
+ H_P_x[ii] * H_T[jj] * phi_ij
+ H[ii] * HT_P_x[jj] * phi_ij
)
M_P_y[i][ii][jj] = (
M_P_y[i][ii][jj]
+ H[ii] * H_T[jj] * phi_P_y_ij
+ H_P_y[ii] * H_T[jj] * phi_ij
+ H[ii] * HT_P_y[jj] * phi_ij
)
if np.shape(M)[1] == 4:
M_P_z[i][ii][jj] = (
M_P_z[i][ii][jj]
+ H[ii] * H_T[jj] * phi_P_z_ij
+ H_P_z[ii] * H_T[jj] * phi_ij
+ H[ii] * HT_P_z[jj] * phi_ij
)
return (
np.array(phi_nonzero_index_row),
np.array(phi_nonzero_index_column),
np.array(phi_nonzerovalue_data),
np.array(phi_P_x_nonzerovalue_data),
np.array(phi_P_y_nonzerovalue_data),
np.array(phi_P_z_nonzerovalue_data),
M,
M_P_x,
M_P_y,
M_P_z,
)
[docs]
def compute_phi_M(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
IM_RKPM,
single_grain,
M_P_z=None,
use_vectorized=True,
dtype=np.float64,
):
"""
Compute RKPM kernel values φ and moment matrices M at all Gauss points.
This function evaluates the cubic B-spline kernel for all Gauss point-node
pairs within the support radius (z ≤ 1) and accumulates the moment matrices
needed for shape function computation.
The moment matrix at Gauss point x is:
M(x) = Σ_I H(x - x_I) H^T(x - x_I) φ(z_I)
where the sum is over all nodes I with z_I ≤ 1.
Parameters
----------
x_G : np.ndarray
Gauss point coordinates, shape (n_gauss, dim).
Gauss_grain_id : np.ndarray
Phase ID at each Gauss point, shape (n_gauss,).
x_nodes : np.ndarray
Node coordinates, shape (n_nodes, dim).
nodes_grain_id : np.ndarray
Phase ID at each node, shape (n_nodes,).
a : np.ndarray
Support radius for each node, shape (n_nodes,).
M : np.ndarray
Moment matrix array to accumulate, shape (n_gauss, basis, basis).
For 3D: basis=4 (linear), For 2D: basis=3.
M_P_x, M_P_y : np.ndarray
Partial derivatives of M w.r.t. x, y. Same shape as M.
num_interface_segments : int
Number of interface segments (for IM-RKPM method).
interface_nodes : np.ndarray
Coordinates of nodes on interfaces.
BxByCxCy : np.ndarray
Interface segment endpoint coordinates.
IM_RKPM : str
"True" for interface-modified RKPM, "False" otherwise.
single_grain : str
"True" for single grain, "False" for multi-phase.
M_P_z : np.ndarray, optional
Partial derivative of M w.r.t. z (3D only).
use_vectorized : bool, optional
If True, use optimized vectorized implementation. Default True.
dtype : numpy dtype, optional
Data type for computations. Default np.float64.
Returns
-------
phi_nonzero_index_row : np.ndarray
Gauss point indices for non-zero kernel values, shape (n_nnz,).
phi_nonzero_index_column : np.ndarray
Node indices for non-zero kernel values, shape (n_nnz,).
phi_nonzerovalue_data : np.ndarray
Kernel values φ(z) at non-zero pairs, shape (n_nnz,).
phi_P_x_nonzerovalue_data : np.ndarray
∂φ/∂x at non-zero pairs, shape (n_nnz,).
phi_P_y_nonzerovalue_data : np.ndarray
∂φ/∂y at non-zero pairs, shape (n_nnz,).
phi_P_z_nonzerovalue_data : np.ndarray
∂φ/∂z at non-zero pairs (3D), shape (n_nnz,).
M : np.ndarray
Updated moment matrix, shape (n_gauss, basis, basis).
M_P_x, M_P_y, M_P_z : np.ndarray
Updated moment matrix derivatives.
"""
if single_grain == "False" and IM_RKPM == "True":
# Use interface method
return compute_phi_M_with_interface_method(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
M_P_z,
)
else:
# Use standard method - with sparse optimization if available
if use_vectorized and SPARSE_PHI_AVAILABLE:
print(f"using SPARSE phi_m_standard with dtype={dtype}", flush=True)
return compute_phi_M_standard_sparse(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
M_P_z,
dtype,
)
else:
return compute_phi_M_standard(
x_G,
Gauss_grain_id,
x_nodes,
nodes_grain_id,
a,
M,
M_P_x,
M_P_y,
num_interface_segments,
interface_nodes,
BxByCxCy,
M_P_z,
)
[docs]
def shape_grad_shape_func(
x_G,
x_nodes,
num_non_zero_phi_a,
HT0,
M,
M_P_x,
M_P_y,
differential_method,
HT1,
HT2,
phi_nonzerovalue_data,
phi_P_x_nonzerovalue_data,
phi_P_y_nonzerovalue_data,
phi_nonzero_index_row,
phi_nonzero_index_column,
det_J_time_weight,
IM_RKPM,
M_P_z=None,
HT3=None,
phi_P_z_nonzerovalue_data=None,
use_vectorized=True,
dtype=np.float64,
):
"""
Compute RKPM shape functions and their gradients.
Evaluates the shape function Ψ_I(x) and its spatial derivatives for all
non-zero Gauss-node pairs. The shape function is:
Ψ_I(x) = H^T(0) M^{-1}(x) H(x - x_I) φ(z)
Gradients are computed using the direct differentiation method:
∂Ψ/∂x = H^T M^{-1} H ∂φ/∂x + H^T ∂M^{-1}/∂x H φ + H^T M^{-1} ∂H/∂x φ
Parameters
----------
x_G : np.ndarray
Gauss point coordinates, shape (n_gauss, dim).
x_nodes : np.ndarray
Node coordinates, shape (n_nodes, dim).
num_non_zero_phi_a : int
Number of non-zero kernel values.
HT0 : np.ndarray
Polynomial basis at origin [1, 0, 0, 0], shape (basis,).
M : np.ndarray
Moment matrices at Gauss points, shape (n_gauss, basis, basis).
M_P_x, M_P_y : np.ndarray
Moment matrix derivatives, same shape as M.
differential_method : str
"direct" for direct differentiation, "implicite" for implicit.
HT1, HT2 : np.ndarray
Basis vectors for implicit method derivatives.
phi_nonzerovalue_data : np.ndarray
Non-zero kernel values, shape (n_nnz,).
phi_P_*_nonzerovalue_data : np.ndarray
Kernel derivatives, shape (n_nnz,).
phi_nonzero_index_row, phi_nonzero_index_column : np.ndarray
Sparse indices for non-zero pairs, shape (n_nnz,).
det_J_time_weight : np.ndarray
Jacobian × Gauss weight at each Gauss point, shape (n_gauss,).
IM_RKPM : str
"True" for interface-modified RKPM.
M_P_z : np.ndarray, optional
Moment matrix z-derivative (3D).
HT3 : np.ndarray, optional
Basis vector for implicit z-derivative.
phi_P_z_nonzerovalue_data : np.ndarray, optional
Kernel z-derivative values.
use_vectorized : bool, optional
Use vectorized implementation. Default True.
dtype : numpy dtype, optional
Data type for computations. Default np.float64.
Returns
-------
shape_func_value : np.ndarray
Shape function values, shape (n_nnz,).
shape_func_times_det_J_time_weight_value : np.ndarray
Weighted shape functions, shape (n_nnz,).
grad_shape_func_x_value : np.ndarray
∂Ψ/∂x values, shape (n_nnz,).
grad_shape_func_y_value : np.ndarray
∂Ψ/∂y values, shape (n_nnz,).
grad_shape_func_z_value : np.ndarray
∂Ψ/∂z values (3D), shape (n_nnz,).
grad_shape_func_*_times_det_J_time_weight_value : np.ndarray
Weighted gradient values, shape (n_nnz,).
"""
# Use vectorized version if available and requested
if use_vectorized and VECTORIZED_GRAD_AVAILABLE:
print(f"using vectorized shape_grad_shape_func with dtype={dtype}", flush=True)
return shape_grad_shape_func_vectorized(
x_G,
x_nodes,
num_non_zero_phi_a,
HT0,
M,
M_P_x,
M_P_y,
differential_method,
HT1,
HT2,
phi_nonzerovalue_data,
phi_P_x_nonzerovalue_data,
phi_P_y_nonzerovalue_data,
phi_nonzero_index_row,
phi_nonzero_index_column,
det_J_time_weight,
IM_RKPM,
M_P_z,
HT3,
phi_P_z_nonzerovalue_data,
dtype,
)
# Otherwise use original implementation
shape_func_value = []
shape_func_times_det_J_time_weight_value = []
grad_shape_func_x_value = []
grad_shape_func_y_value = []
grad_shape_func_z_value = []
grad_shape_func_x_times_det_J_time_weight_value = []
grad_shape_func_y_times_det_J_time_weight_value = []
grad_shape_func_z_times_det_J_time_weight_value = []
for ii in range(num_non_zero_phi_a):
i = int(phi_nonzero_index_row[ii])
j = int(phi_nonzero_index_column[ii])
# compute the shape function and the gradient of shape function
x_I = x_nodes[j]
H_scaling_factor = 1.0e-6
if np.shape(M)[1] == 3:
H_T = np.array(
[
1,
(x_G[i][0] - x_I[0]) / H_scaling_factor,
(x_G[i][1] - x_I[1]) / H_scaling_factor,
],
dtype=np.float64,
)
HT_P_x = (
np.array([0, 1, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial x
HT_P_y = (
np.array([0, 0, 1], dtype=np.float64) / H_scaling_factor
) # partial H partial y
if np.shape(M)[1] == 4:
H_T = np.array(
[
1,
(x_G[i][0] - x_I[0]) / H_scaling_factor,
(x_G[i][1] - x_I[1]) / H_scaling_factor,
(x_G[i][2] - x_I[2]) / H_scaling_factor,
],
dtype=np.float64,
)
HT_P_x = (
np.array([0, 1, 0, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial x
HT_P_y = (
np.array([0, 0, 1, 0], dtype=np.float64) / H_scaling_factor
) # partial H partial y
HT_P_z = (
np.array([0, 0, 0, 1], dtype=np.float64) / H_scaling_factor
) # partial H partial y
H_P_z = np.transpose(HT_P_z)
H = np.transpose(H_T)
H_P_x = np.transpose(HT_P_x)
H_P_y = np.transpose(HT_P_y)
# print(i, x_G[i,:]) #13 [0.00000000e+00 2.02113249e-05 1.07886751e-05]
shape_func_ij = (
np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
if differential_method == "implicite" and IM_RKPM == "False":
grad_shape_func_x_ij = (
np.dot(
(
np.dot(
(HT1).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
grad_shape_func_y_ij = (
np.dot(
(
np.dot(
(HT2).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
if np.shape(M) == 4:
grad_shape_func_z_ij = (
np.dot(
(
np.dot(
(HT3).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
else:
if differential_method == "direct" or IM_RKPM == "True":
M_inv_P_x_i = -np.dot(
np.dot(
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
M_P_x[i].astype(np.float64),
),
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
)
M_inv_P_y_i = -np.dot(
np.dot(
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
M_P_y[i].astype(np.float64),
),
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
)
grad_shape_func_x_ij = (
np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_P_x_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(M_inv_P_x_i).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H_P_x.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
grad_shape_func_y_ij = (
np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_P_y_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(M_inv_P_y_i).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H_P_y.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
if np.shape(M)[1] == 4:
M_inv_P_z_i = -np.dot(
np.dot(
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
M_P_z[i].astype(np.float64),
),
np.linalg.inv(M[i].astype(np.float64)).astype(np.float64),
)
grad_shape_func_z_ij = (
np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_P_z_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(M_inv_P_z_i).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
+ np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H_P_z.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
else:
print("differential method is not defined", flush=True)
shape_func_value.append(shape_func_ij)
grad_shape_func_x_value.append(grad_shape_func_x_ij)
grad_shape_func_y_value.append(grad_shape_func_y_ij)
shape_func_times_det_J_time_weight_value.append(
shape_func_ij * det_J_time_weight[i]
)
grad_shape_func_x_times_det_J_time_weight_value.append(
grad_shape_func_x_ij * det_J_time_weight[i]
)
grad_shape_func_y_times_det_J_time_weight_value.append(
grad_shape_func_y_ij * det_J_time_weight[i]
)
if np.shape(M)[1] == 4:
grad_shape_func_z_value.append(grad_shape_func_z_ij)
grad_shape_func_z_times_det_J_time_weight_value.append(
grad_shape_func_z_ij * det_J_time_weight[i]
)
return (
np.array(shape_func_value),
np.array(shape_func_times_det_J_time_weight_value),
np.array(grad_shape_func_x_value),
np.array(grad_shape_func_y_value),
np.array(grad_shape_func_z_value),
np.array(grad_shape_func_x_times_det_J_time_weight_value),
np.array(grad_shape_func_y_times_det_J_time_weight_value),
np.array(grad_shape_func_z_times_det_J_time_weight_value),
)
[docs]
def shape_func_n_nodes_by_n_nodes(
x_G,
x_nodes,
num_non_zero_phi_a,
HT0,
M,
phi_nonzerovalue_data,
phi_nonzero_index_row,
phi_nonzero_index_column,
use_vectorized=True,
):
"""Compute shape functions for non-zero phi values.
Args:
x_G: Gauss points
x_nodes: Node points
num_non_zero_phi_a: Number of non-zero phi values
HT0: Initial H transpose matrix
M: Moment matrices
phi_nonzerovalue_data: Non-zero phi values
phi_nonzero_index_row: Row indices for non-zero phi
phi_nonzero_index_column: Column indices for non-zero phi
use_vectorized: Whether to use vectorized implementation if available
Returns:
List of shape function values
"""
# Use vectorized version if available and requested
if use_vectorized:
print(f"Using vectorized shape function", flush=True)
return shape_func_n_nodes_by_n_nodes_vectorized(
x_G,
x_nodes,
num_non_zero_phi_a,
HT0,
M,
phi_nonzerovalue_data,
phi_nonzero_index_row,
phi_nonzero_index_column,
)
# Fall back to original implementation
shape_func_value = []
for ii in range(num_non_zero_phi_a):
i = int(phi_nonzero_index_row[ii])
j = int(phi_nonzero_index_column[ii])
# compute the shape function and the gradient of shape function
x_I = x_nodes[j]
H_scaling_factor = 1.0e-6
if np.shape(M)[1] == 3:
H_T = np.array(
[
1,
(x_G[i][0] - x_I[0]) / H_scaling_factor,
(x_G[i][1] - x_I[1]) / H_scaling_factor,
],
dtype=np.float64,
)
if np.shape(M)[1] == 4:
H_T = np.array(
[
1,
(x_G[i][0] - x_I[0]) / H_scaling_factor,
(x_G[i][1] - x_I[1]) / H_scaling_factor,
(x_G[i][2] - x_I[2]) / H_scaling_factor,
],
dtype=np.float64,
)
H = np.transpose(H_T)
shape_func_ij = (
np.dot(
(
np.dot(
(HT0).astype(np.float64),
(np.linalg.inv(M[i])).astype(np.float64),
)
).astype(np.float64),
H.astype(np.float64),
)
* phi_nonzerovalue_data[ii]
)
shape_func_value.append(shape_func_ij)
return shape_func_value
[docs]
def shape_func_n_nodes_by_n_nodes_vectorized(
x_G,
x_nodes,
num_non_zero_phi_a,
HT0,
M,
phi_nonzerovalue_data,
phi_nonzero_index_row,
phi_nonzero_index_column,
):
"""Vectorized computation of shape functions for non-zero phi values.
Args:
x_G: Gauss points array (n_gauss, dim)
x_nodes: Node points array (n_nodes, dim)
num_non_zero_phi_a: Number of non-zero phi values
HT0: Initial H transpose matrix
M: Moment matrices (n_gauss, basis_size, basis_size)
phi_nonzerovalue_data: Non-zero phi values
phi_nonzero_index_row: Row indices for non-zero phi
phi_nonzero_index_column: Column indices for non-zero phi
Returns:
numpy.ndarray: Array of shape function values of shape (num_non_zero_phi_a,)
"""
# Convert indices to integer arrays
row_indices = phi_nonzero_index_row[:num_non_zero_phi_a].astype(int)
col_indices = phi_nonzero_index_column[:num_non_zero_phi_a].astype(int)
# Get relevant points
x_G_sparse = x_G[row_indices] # (num_non_zero, dim)
x_nodes_sparse = x_nodes[col_indices] # (num_non_zero, dim)
# Compute differences
diff = x_G_sparse - x_nodes_sparse # (num_non_zero, dim)
# Determine dimension and basis size
dim = x_G.shape[1] if len(x_G.shape) > 1 else 1
basis_size = M.shape[1]
H_scaling_factor = 1.0e-6
# Build H_T matrices based on dimension
if basis_size == 3: # 2D case
# H_T shape: (num_non_zero, 3)
H_T = np.zeros((num_non_zero_phi_a, 3), dtype=np.float64)
H_T[:, 0] = 1.0
H_T[:, 1] = diff[:, 0] / H_scaling_factor
H_T[:, 2] = diff[:, 1] / H_scaling_factor
elif basis_size == 4: # 3D case
# H_T shape: (num_non_zero, 4)
H_T = np.zeros((num_non_zero_phi_a, 4), dtype=np.float64)
H_T[:, 0] = 1.0
H_T[:, 1] = diff[:, 0] / H_scaling_factor
H_T[:, 2] = diff[:, 1] / H_scaling_factor
H_T[:, 3] = diff[:, 2] / H_scaling_factor
else:
raise ValueError(f"Unsupported basis size: {basis_size}")
# In the original code, H = np.transpose(H_T) where H_T is a 1D array
# This doesn't change the shape for 1D arrays in numpy
# We need H to be shape (num_non_zero, basis_size) for vectorized dot product
H = H_T # Keep as (num_non_zero, basis_size)
# Get relevant M matrices and compute inverses
M_sparse = M[row_indices] # (num_non_zero, basis_size, basis_size)
# Compute M inverses for all relevant matrices at once
M_inv = np.linalg.inv(
M_sparse.astype(np.float64)
) # (num_non_zero, basis_size, basis_size)
# Compute shape function values using einsum for efficiency
# HT0 is (basis_size,) or (1, basis_size), M_inv is (num_non_zero, basis_size, basis_size)
# H is (num_non_zero, basis_size)
# Ensure HT0 is 2D with shape (1, basis_size)
if HT0.ndim == 1:
HT0_2d = HT0.reshape(1, -1) # Convert from (basis_size,) to (1, basis_size)
else:
HT0_2d = HT0 # Already 2D
# First compute HT0 @ M_inv for each matrix
# We need to broadcast HT0_2d to work with multiple M_inv matrices
# HT0_2d is (1, basis_size), M_inv is (num_non_zero, basis_size, basis_size)
# We want result shape: (num_non_zero, 1, basis_size)
HT0_M_inv = np.matmul(HT0_2d, M_inv)
# Then compute (HT0 @ M_inv) @ H for each pair
# We need to do element-wise dot product of HT0_M_inv and H
# HT0_M_inv is (num_non_zero, 1, basis_size), H is (num_non_zero, basis_size)
# Use einsum for clarity: 'nib,nb->n'
shape_func_base = np.einsum("nib,nb->n", HT0_M_inv, H)
# Get relevant phi values
phi_values = phi_nonzerovalue_data[:num_non_zero_phi_a]
# Compute final shape function values
shape_func_value = shape_func_base * phi_values
# Return the numpy array directly - no conversion needed
return shape_func_value