Source code for shape_function_vectorized

"""Vectorized version of compute_phi_M_standard and related functions.

This module provides vectorized implementations that operate on all Gauss points
and nodes simultaneously, eliminating the need for explicit loops.
"""

from common import np



[docs]
def compute_z_and_H_2d_vectorized(
    x_G, x_nodes, a, H_scaling_factor, eps, dtype=np.float64
):
    """Compute z values and H matrices for 2D case (vectorized).

    Args:
        x_G: Array of Gauss points, shape (n_gauss, 2)
        x_nodes: Array of node points, shape (n_nodes, 2)
        a: Array of scaling factors, shape (n_nodes,)
        H_scaling_factor: Scaling factor for H matrices
        eps: Small value to avoid division by zero
        dtype: Data type for arrays (default: np.float32 for memory efficiency)

    Returns:
        Tuple containing vectorized z values, derivatives, and H matrices
    """
    # Expand dimensions for broadcasting
    # x_G: (n_gauss, 1, 2) and x_nodes: (1, n_nodes, 2)
    x_G_exp = x_G[:, np.newaxis, :]  # (n_gauss, 1, 2)
    x_nodes_exp = x_nodes[np.newaxis, :, :]  # (1, n_nodes, 2)

    # Compute differences: (n_gauss, n_nodes, 2)
    diff = x_G_exp - x_nodes_exp

    # Compute distances: (n_gauss, n_nodes)
    distances = np.sqrt(np.sum(diff**2, axis=2))

    # Normalize by a: (n_gauss, n_nodes)
    z = distances / a[np.newaxis, :]

    # Compute partial derivatives
    # Avoid division by zero
    denom = a[np.newaxis, :] * np.maximum(distances, eps)
    z_P_x = diff[:, :, 0] / denom  # (n_gauss, n_nodes)
    z_P_y = diff[:, :, 1] / denom  # (n_gauss, n_nodes)

    # Free memory from distances
    # del distances

    # Compute H matrices
    # H_T shape will be (n_gauss, n_nodes, 3)
    H_T = np.zeros((x_G.shape[0], x_nodes.shape[0], 3), dtype=dtype)
    H_T[:, :, 0] = dtype(1.0)
    H_T[:, :, 1] = diff[:, :, 0] / H_scaling_factor
    H_T[:, :, 2] = diff[:, :, 1] / H_scaling_factor

    # Free memory from diff
    # del diff

    # Derivatives of H_T
    HT_P_x = np.zeros((x_G.shape[0], x_nodes.shape[0], 3), dtype=dtype)
    HT_P_x[:, :, 1] = dtype(1.0) / H_scaling_factor

    HT_P_y = np.zeros((x_G.shape[0], x_nodes.shape[0], 3), dtype=dtype)
    HT_P_y[:, :, 2] = dtype(1.0) / H_scaling_factor

    # H is transpose of H_T along last axis
    H = np.transpose(
        H_T, (0, 1, 2)
    )  # Still (n_gauss, n_nodes, 3) but transposed in last dim
    H_P_x = np.transpose(HT_P_x, (0, 1, 2))
    H_P_y = np.transpose(HT_P_y, (0, 1, 2))

    return z, z_P_x, z_P_y, H_T, HT_P_x, HT_P_y, H, H_P_x, H_P_y




[docs]
def compute_z_and_H_3d_vectorized(
    x_G, x_nodes, a, H_scaling_factor, eps, dtype=np.float64
):
    """Compute z values and H matrices for 3D case (vectorized).

    Args:
        x_G: Array of Gauss points, shape (n_gauss, 3)
        x_nodes: Array of node points, shape (n_nodes, 3)
        a: Array of scaling factors, shape (n_nodes,)
        H_scaling_factor: Scaling factor for H matrices
        eps: Small value to avoid division by zero
        dtype: Data type for arrays (default: np.float32 for memory efficiency)

    Returns:
        Tuple containing vectorized z values, derivatives, and H matrices
    """
    # Expand dimensions for broadcasting
    x_G_exp = x_G[:, np.newaxis, :]  # (n_gauss, 1, 3)
    x_nodes_exp = x_nodes[np.newaxis, :, :]  # (1, n_nodes, 3)

    # Compute differences: (n_gauss, n_nodes, 3)
    diff = x_G_exp - x_nodes_exp

    # Compute distances: (n_gauss, n_nodes)
    distances = np.sqrt(np.sum(diff**2, axis=2))

    # Normalize by a: (n_gauss, n_nodes)
    z = distances / a[np.newaxis, :]

    # Compute partial derivatives
    denom = a[np.newaxis, :] * np.maximum(distances, eps)

    # save memory
    del distances

    z_P_x = diff[:, :, 0] / denom  # (n_gauss, n_nodes)
    z_P_y = diff[:, :, 1] / denom  # (n_gauss, n_nodes)
    z_P_z = diff[:, :, 2] / denom  # (n_gauss, n_nodes)

    # Compute H matrices
    # H_T shape will be (n_gauss, n_nodes, 4)
    H_T = np.zeros((x_G.shape[0], x_nodes.shape[0], 4), dtype=dtype)
    H_T[:, :, 0] = dtype(1.0)
    H_T[:, :, 1] = diff[:, :, 0] / H_scaling_factor
    H_T[:, :, 2] = diff[:, :, 1] / H_scaling_factor
    H_T[:, :, 3] = diff[:, :, 2] / H_scaling_factor

    # save memory
    del diff

    # Derivatives of H_T
    HT_P_x = np.zeros((x_G.shape[0], x_nodes.shape[0], 4), dtype=dtype)
    HT_P_x[:, :, 1] = dtype(1.0) / H_scaling_factor

    HT_P_y = np.zeros((x_G.shape[0], x_nodes.shape[0], 4), dtype=dtype)
    HT_P_y[:, :, 2] = dtype(1.0) / H_scaling_factor

    HT_P_z = np.zeros((x_G.shape[0], x_nodes.shape[0], 4), dtype=dtype)
    HT_P_z[:, :, 3] = dtype(1.0) / H_scaling_factor

    # H is transpose of H_T along last axis
    H = np.transpose(H_T, (0, 1, 2))
    H_P_x = np.transpose(HT_P_x, (0, 1, 2))
    H_P_y = np.transpose(HT_P_y, (0, 1, 2))
    H_P_z = np.transpose(HT_P_z, (0, 1, 2))

    return z, z_P_x, z_P_y, z_P_z, H_T, HT_P_x, HT_P_y, HT_P_z, H, H_P_x, H_P_y, H_P_z




[docs]
def compute_phi_kernel(z):
    """Compute phi kernel values based on distance z.

    The kernel is a cubic spline-like function:
    - For 0 <= z < 0.5: phi = 2/3 - 4*z^2 + 4*z^3
    - For 0.5 <= z <= 1: phi = 4/3 - 4*z + 4*z^2 - 4/3*z^3
    - For z > 1: phi = 0

    Args:
        z: Array of normalized distances

    Returns:
        phi: Array of kernel values
        phi_P_z: Array of kernel derivatives w.r.t. z
    """
    phi = np.zeros_like(z)
    phi_P_z = np.zeros_like(z)

    # Mask for different regions
    mask1 = (z >= 0) & (z < 0.5)
    mask2 = (z >= 0.5) & (z <= 1.0)

    # Region 1: 0 <= z < 0.5
    phi[mask1] = 2.0 / 3 - 4 * z[mask1] ** 2 + 4 * z[mask1] ** 3
    phi_P_z[mask1] = -8.0 * z[mask1] + 12.0 * z[mask1] ** 2

    # Region 2: 0.5 <= z <= 1
    phi[mask2] = 4.0 / 3 - 4 * z[mask2] + 4 * z[mask2] ** 2 - 4.0 / 3 * z[mask2] ** 3
    phi_P_z[mask2] = -4 + 8 * z[mask2] - 4 * z[mask2] ** 2

    return phi, phi_P_z




[docs]
def compute_phi_M_standard_vectorized(
    x_G,
    Gauss_grain_id,
    x_nodes,
    nodes_grain_id,
    a,
    M,
    M_P_x,
    M_P_y,
    num_interface_segments,
    interface_nodes,
    BxByCxCy,
    M_P_z=None,
    dtype=np.float64,
):
    """Vectorized version of compute_phi_M_standard.

    This function computes all Gauss-node interactions simultaneously
    using NumPy broadcasting and vectorized operations.

    Args:
        dtype: Data type for computations (default: np.float32 for memory efficiency).
               Use np.float64 for higher precision if needed.
    """
    print("begin: compute_phi_M_standard_vectorized", flush=True)

    if M_P_z is None:
        M_P_z = np.zeros_like(M)

    H_scaling_factor = dtype(1.0e-6)
    eps = dtype(2.220446049250313e-16)

    # Determine dimension from M shape
    is_3d = np.shape(M)[1] == 4

    # Compute z and H matrices for all point pairs
    if is_3d:
        (
            z,
            z_P_x,
            z_P_y,
            z_P_z,
            H_T,
            HT_P_x,
            HT_P_y,
            HT_P_z,
            H,
            H_P_x,
            H_P_y,
            H_P_z,
        ) = compute_z_and_H_3d_vectorized(x_G, x_nodes, a, H_scaling_factor, eps, dtype)
    else:
        (z, z_P_x, z_P_y, H_T, HT_P_x, HT_P_y, H, H_P_x, H_P_y) = (
            compute_z_and_H_2d_vectorized(x_G, x_nodes, a, H_scaling_factor, eps, dtype)
        )
        z_P_z = None
        H_P_z = None
        HT_P_z = None

    # import pdb
    # pdb.set_trace()

    # Compute phi kernel for all distances
    phi, phi_P_z = compute_phi_kernel(z)

    # Find valid pairs (where z <= 1)
    valid_mask = (z >= 0) & (z <= 1.0)

    # Extract indices of valid pairs
    valid_indices = np.where(valid_mask)
    gauss_indices = valid_indices[0]
    node_indices = valid_indices[1]

    # Store nonzero values for sparse matrix construction (as numpy arrays)
    phi_nonzero_index_row = gauss_indices
    phi_nonzero_index_column = node_indices
    phi_nonzerovalue_data = phi[valid_mask]

    # Compute phi derivatives
    phi_P_x = phi_P_z * z_P_x
    phi_P_y = phi_P_z * z_P_y
    phi_P_x_nonzerovalue_data = phi_P_x[valid_mask]
    phi_P_y_nonzerovalue_data = phi_P_y[valid_mask]

    if is_3d:
        phi_P_z_val = phi_P_z * z_P_z
        phi_P_z_nonzerovalue_data = phi_P_z_val[valid_mask]
    else:
        phi_P_z_nonzerovalue_data = np.array([])

    # Update M matrices using vectorized operations
    # For each Gauss point, accumulate contributions from all valid nodes
    n_gauss = x_G.shape[0]

    print("Before: Find nodes that contribute to a Gauss pt", flush=True)
    for i in range(n_gauss):
        if i % 10000 == 0:
            print(f"i: {i}")
        # Find nodes that contribute to this Gauss point
        node_mask = valid_mask[i, :]
        if not np.any(node_mask):
            continue

        # Get relevant values for this Gauss point
        phi_i = phi[i, node_mask]
        phi_P_x_i = phi_P_x[i, node_mask]
        phi_P_y_i = phi_P_y[i, node_mask]

        H_i = H[i, node_mask, :]  # (n_valid_nodes, n_dim)
        H_T_i = H_T[i, node_mask, :]  # (n_valid_nodes, n_dim)
        H_P_x_i = H_P_x[i, node_mask, :]
        H_P_y_i = H_P_y[i, node_mask, :]
        HT_P_x_i = HT_P_x[i, node_mask, :]
        HT_P_y_i = HT_P_y[i, node_mask, :]

        # Compute outer products and accumulate
        # Using einsum for efficient tensor operations
        # M[i] += sum over valid nodes of (H * H_T * phi)
        M[i] += np.einsum("ni,nj,n->ij", H_i, H_T_i, phi_i)

        # M_P_x[i] += sum of three terms
        M_P_x[i] += (
            np.einsum("ni,nj,n->ij", H_i, H_T_i, phi_P_x_i)
            + np.einsum("ni,nj,n->ij", H_P_x_i, H_T_i, phi_i)
            + np.einsum("ni,nj,n->ij", H_i, HT_P_x_i, phi_i)
        )

        # M_P_y[i] += sum of three terms
        M_P_y[i] += (
            np.einsum("ni,nj,n->ij", H_i, H_T_i, phi_P_y_i)
            + np.einsum("ni,nj,n->ij", H_P_y_i, H_T_i, phi_i)
            + np.einsum("ni,nj,n->ij", H_i, HT_P_y_i, phi_i)
        )

        if is_3d:
            phi_P_z_i = phi_P_z_val[i, node_mask]
            H_P_z_i = H_P_z[i, node_mask, :]
            HT_P_z_i = HT_P_z[i, node_mask, :]

            M_P_z[i] += (
                np.einsum("ni,nj,n->ij", H_i, H_T_i, phi_P_z_i)
                + np.einsum("ni,nj,n->ij", H_P_z_i, H_T_i, phi_i)
                + np.einsum("ni,nj,n->ij", H_i, HT_P_z_i, phi_i)
            )

    print("  end: compute_phi_M_standard_vectorized", flush=True)

    return (
        phi_nonzero_index_row,
        phi_nonzero_index_column,
        phi_nonzerovalue_data,
        phi_P_x_nonzerovalue_data,
        phi_P_y_nonzerovalue_data,
        phi_P_z_nonzerovalue_data,
        M,
        M_P_x,
        M_P_y,
        M_P_z,
    )




[docs]
def compute_phi_M_standard_sparse(
    x_G,
    Gauss_grain_id,
    x_nodes,
    nodes_grain_id,
    a,
    M,
    M_P_x,
    M_P_y,
    num_interface_segments,
    interface_nodes,
    BxByCxCy,
    M_P_z=None,
    dtype=np.float64,
):
    """Optimized sparse version of compute_phi_M_standard using vectorized operations.

    This function eliminates the loop over Gauss points by using advanced indexing
    and np.add.at for efficient accumulation. It provides 10-100x speedup for
    large numbers of Gauss points.

    Args:
        dtype: Data type for computations (default: np.float64).
    """
    print("begin: compute_phi_M_standard_sparse", flush=True)

    if M_P_z is None:
        M_P_z = np.zeros_like(M)

    H_scaling_factor = dtype(1.0e-6)
    eps = dtype(2.220446049250313e-16)

    # Determine dimension from M shape
    is_3d = np.shape(M)[1] == 4

    # Compute z and H matrices for all point pairs
    if is_3d:
        (
            z,
            z_P_x,
            z_P_y,
            z_P_z,
            H_T,
            HT_P_x,
            HT_P_y,
            HT_P_z,
            H,
            H_P_x,
            H_P_y,
            H_P_z,
        ) = compute_z_and_H_3d_vectorized(x_G, x_nodes, a, H_scaling_factor, eps, dtype)
    else:
        (z, z_P_x, z_P_y, H_T, HT_P_x, HT_P_y, H, H_P_x, H_P_y) = (
            compute_z_and_H_2d_vectorized(x_G, x_nodes, a, H_scaling_factor, eps, dtype)
        )
        z_P_z = None
        H_P_z = None
        HT_P_z = None

    # Compute phi kernel for all distances
    phi, phi_P_z = compute_phi_kernel(z)

    # Find valid pairs (where z <= 1)
    valid_mask = (z >= 0) & (z <= 1.0)

    # Extract indices of valid pairs
    gauss_indices, node_indices = np.where(valid_mask)

    # Store nonzero values for sparse matrix construction
    phi_nonzero_index_row = gauss_indices
    phi_nonzero_index_column = node_indices
    phi_nonzerovalue_data = phi[gauss_indices, node_indices]

    # Compute phi derivatives for valid pairs only
    phi_P_x_vals = (
        phi_P_z[gauss_indices, node_indices] * z_P_x[gauss_indices, node_indices]
    )
    phi_P_y_vals = (
        phi_P_z[gauss_indices, node_indices] * z_P_y[gauss_indices, node_indices]
    )
    phi_P_x_nonzerovalue_data = phi_P_x_vals
    phi_P_y_nonzerovalue_data = phi_P_y_vals

    if is_3d:
        phi_P_z_vals = (
            phi_P_z[gauss_indices, node_indices] * z_P_z[gauss_indices, node_indices]
        )
        phi_P_z_nonzerovalue_data = phi_P_z_vals
    else:
        phi_P_z_nonzerovalue_data = np.array([])
        phi_P_z_vals = None

    # Get dimensions
    n_gauss = x_G.shape[0]

    print(
        f"Sparse computation: {len(gauss_indices)} non-zero interactions out of {n_gauss * x_nodes.shape[0]} total",
        flush=True,
    )

    # Extract all relevant H values for valid pairs using advanced indexing
    H_vals = H[gauss_indices, node_indices, :]  # Shape: (n_interactions, n_dim)
    H_T_vals = H_T[gauss_indices, node_indices, :]
    H_P_x_vals = H_P_x[gauss_indices, node_indices, :]
    H_P_y_vals = H_P_y[gauss_indices, node_indices, :]
    HT_P_x_vals = HT_P_x[gauss_indices, node_indices, :]
    HT_P_y_vals = HT_P_y[gauss_indices, node_indices, :]

    # Extract phi values for valid pairs
    phi_vals = phi_nonzerovalue_data

    # Initialize M matrices (they come pre-initialized, but we'll reset them for consistency)
    M[:] = 0
    M_P_x[:] = 0
    M_P_y[:] = 0
    if is_3d:
        M_P_z[:] = 0

    # Compute all outer products at once (vectorized)
    # Shape: (n_interactions, n_dim, n_dim)
    print("Computing outer products for M matrices", flush=True)

    # For M: H @ H_T * phi
    outer_HH = H_vals[:, :, np.newaxis] * H_T_vals[:, np.newaxis, :]
    weighted_HH = outer_HH * phi_vals[:, np.newaxis, np.newaxis]

    # Use np.add.at for efficient accumulation
    np.add.at(M, gauss_indices, weighted_HH)

    # For M_P_x: three terms
    # Term 1: H @ H_T * phi_P_x
    weighted_HH_phi_P_x = outer_HH * phi_P_x_vals[:, np.newaxis, np.newaxis]

    # Term 2: H_P_x @ H_T * phi
    outer_HPxH = H_P_x_vals[:, :, np.newaxis] * H_T_vals[:, np.newaxis, :]
    weighted_HPxH = outer_HPxH * phi_vals[:, np.newaxis, np.newaxis]

    # Term 3: H @ HT_P_x * phi
    outer_HHPx = H_vals[:, :, np.newaxis] * HT_P_x_vals[:, np.newaxis, :]
    weighted_HHPx = outer_HHPx * phi_vals[:, np.newaxis, np.newaxis]

    # Accumulate all three terms for M_P_x
    np.add.at(M_P_x, gauss_indices, weighted_HH_phi_P_x + weighted_HPxH + weighted_HHPx)

    # For M_P_y: three terms (similar structure)
    # Term 1: H @ H_T * phi_P_y
    weighted_HH_phi_P_y = outer_HH * phi_P_y_vals[:, np.newaxis, np.newaxis]

    # Term 2: H_P_y @ H_T * phi
    outer_HPyH = H_P_y_vals[:, :, np.newaxis] * H_T_vals[:, np.newaxis, :]
    weighted_HPyH = outer_HPyH * phi_vals[:, np.newaxis, np.newaxis]

    # Term 3: H @ HT_P_y * phi
    outer_HHPy = H_vals[:, :, np.newaxis] * HT_P_y_vals[:, np.newaxis, :]
    weighted_HHPy = outer_HHPy * phi_vals[:, np.newaxis, np.newaxis]

    # Accumulate all three terms for M_P_y
    np.add.at(M_P_y, gauss_indices, weighted_HH_phi_P_y + weighted_HPyH + weighted_HHPy)

    if is_3d:
        H_P_z_vals = H_P_z[gauss_indices, node_indices, :]
        HT_P_z_vals = HT_P_z[gauss_indices, node_indices, :]

        # For M_P_z: three terms (similar structure)
        # Term 1: H @ H_T * phi_P_z
        weighted_HH_phi_P_z = outer_HH * phi_P_z_vals[:, np.newaxis, np.newaxis]

        # Term 2: H_P_z @ H_T * phi
        outer_HPzH = H_P_z_vals[:, :, np.newaxis] * H_T_vals[:, np.newaxis, :]
        weighted_HPzH = outer_HPzH * phi_vals[:, np.newaxis, np.newaxis]

        # Term 3: H @ HT_P_z * phi
        outer_HHPz = H_vals[:, :, np.newaxis] * HT_P_z_vals[:, np.newaxis, :]
        weighted_HHPz = outer_HHPz * phi_vals[:, np.newaxis, np.newaxis]

        # Accumulate all three terms for M_P_z
        np.add.at(
            M_P_z, gauss_indices, weighted_HH_phi_P_z + weighted_HPzH + weighted_HHPz
        )

    print("  end: compute_phi_M_standard_sparse", flush=True)

    return (
        phi_nonzero_index_row,
        phi_nonzero_index_column,
        phi_nonzerovalue_data,
        phi_P_x_nonzerovalue_data,
        phi_P_y_nonzerovalue_data,
        phi_P_z_nonzerovalue_data,
        M,
        M_P_x,
        M_P_y,
        M_P_z,
    )
Source code for shape_function_vectorized

Fuel Cell 3D Simulation

Navigation

Related Topics