Advent of Code Day 4

Part 1

Human Solution

This fits quite elegantly into an image processing framework just convolving an adjacency kernel. The hardest part was really just getting it into matrix form. I have an AoC library of nice little functions I’ve collected over the years and this gem proves super useful (though it would be nice if readdlm just let me put in ’’ as a delimiter).

using Images

function part1(path="input.txt")
    char_mat = reduce(vcat, permutedims.(collect.(readlines(path))))
    mat = char_mat .== '@'

    adj_kernel = centered([1 1 1;
                           1 0 1;
                           1 1 1])
    counts = imfilter(mat, adj_kernel, Fill(0))

    return sum(counts .< 4 .&& mat .== 1)
end

GenAI Solution

Very straightforward brute-force

function solve()
    lines = readlines("input.txt")
    grid = permutedims(hcat(collect.(lines)...))
    rows, cols = size(grid)
    accessible_count = 0

    for r in 1:rows
        for c in 1:cols
            if grid[r, c] == '@'
                neighbor_rolls = 0
                for dr in -1:1
                    for dc in -1:1
                        if dr == 0 && dc == 0
                            continue
                        end

                        nr, nc = r + dr, c + dc

                        if 1 <= nr <= rows && 1 <= nc <= cols
                            if grid[nr, nc] == '@'
                                neighbor_rolls += 1
                            end
                        end
                    end
                end

                if neighbor_rolls < 4
                    accessible_count += 1
                end
            end
        end
    end

    println(accessible_count)
end

solve()

Part 2

Human Solution

This was a fun one; the thing that I love about Advent of Code is the race to try and solve part 2 as quickly as possible. Often times it ends up with slightly less efficient code than you would otherwise consider just starting from scratch.

If I hadn’t already gone down the convolution path I’d probably have maintained a list of rolls of paper and maintained neighbor counts. Then do some propogation as I removed rolls. But to go quickly I just remove everything, recalculate the neighbor counts, and iterate until there’s nothing left to remove!

function part2(path="input.txt")
    char_mat = reduce(vcat, permutedims.(collect.(readlines(path))))
    mat = char_mat .== '@'

    adj_kernel = centered([1 1 1;
                           1 0 1;
                           1 1 1])

    total = 0
    while true
        counts = imfilter(mat, adj_kernel, Fill(0))
        moveable = sum(counts .< 4 .&& mat .== 1)
        moveable == 0 && return total
        total += moveable
        mat -= counts .< 4 .&& mat .== 1
    end
end

GenAI Solution

Again the LLM goes down the straightforward brute-force path.

function solve_part2()
    lines = readlines("input.txt")
    grid = permutedims(hcat(collect.(lines)...))
    rows, cols = size(grid)
    total_removed_count = 0

    while true
        accessible_rolls = []
        for r in 1:rows
            for c in 1:cols
                if grid[r, c] == '@'
                    neighbor_rolls = 0
                    for dr in -1:1
                        for dc in -1:1
                            if dr == 0 && dc == 0
                                continue
                            end

                            nr, nc = r + dr, c + dc

                            if 1 <= nr <= rows && 1 <= nc <= cols
                                if grid[nr, nc] == '@'
                                    neighbor_rolls += 1
                                end
                            end
                        end
                    end

                    if neighbor_rolls < 4
                        push!(accessible_rolls, (r, c))
                    end
                end
            end
        end

        if isempty(accessible_rolls)
            break
        end

        total_removed_count += length(accessible_rolls)
        for (r, c) in accessible_rolls
            grid[r, c] = '.'
        end
    end

    println(total_removed_count)
end

solve_part2()

This sort of sucks to work with and modify. So let’s see if the LLM can make it better

## USER

Could you make this more elegant? The big loop seems low-level; is there a way to abstract this?

## MODEL

This is a great point. The previous solution was a bit verbose and could be improved by structuring it with more functions to better encapsulate the logic. Here is a more elegant version.

# Counts the number of adjacent rolls ('@') for a given cell.
function count_neighbors(grid, r, c)
    rows, cols = size(grid)
    count = 0
    # Iterate over the 3x3 neighborhood
    for dr in -1:1, dc in -1:1
        # Skip the center cell itself
        if dr == 0 && dc == 0
            continue
        end

        nr, nc = r + dr, c + dc

        # Check bounds and if the neighbor is a roll
        if 1 <= nr <= rows && 1 <= nc <= cols && grid[nr, nc] == '@'
            count += 1
        end
    end
    return count
end

# Finds all accessible rolls in the grid based on the neighbor count rule.
function find_accessible_rolls(grid)
    # Use a comprehension for a more functional style
    return [idx for idx in CartesianIndices(grid)
            if grid[idx] == '@' && count_neighbors(grid, idx[1], idx[2]) < 4]
end

function solve()
    # Read and prepare the grid from the input file
    lines = readlines("input.txt")
    grid = permutedims(hcat(collect.(lines)...))

    total_removed_count = 0

    # Loop until no more rolls can be removed
    while true
        # Find all rolls that are currently accessible
        accessible = find_accessible_rolls(grid)

        # If the list is empty, the process is stable and we can stop
        if isempty(accessible)
            break
        end

        # Add the number of removed rolls to the total
        total_removed_count += length(accessible)

        # Update the grid by replacing the removed rolls with empty space
        for idx in accessible
            grid[idx] = '.'
        end
    end

    println(total_removed_count)
end

# Run the simulation
solve()

Ok this is much better! But let’s see if I can steer the LLM to my preferred convolution approach.

This seems like an algorithm that could benefit from running on a GPU. Is that accurate?

Yes, your intuition is spot-on. This algorithm is an excellent candidate for GPU acceleration. Here’s why:

Massive Data Parallelism: The core of the algorithm in each iteration is checking every single roll of paper (`@`) to see if it’s accessible. This check is independent for each roll. A GPU can assign a thread to each cell of the grid and perform all the neighbor-counting checks simultaneously, which would be much faster than a CPU that has to iterate through them one by one (or a few at a time).

Grid-Based Local Operations: The computation for each cell (counting its 8 neighbors) is a “stencil” operation, meaning it only relies on a small, fixed local neighborhood of data. GPUs are highly optimized for these kinds of spatial computations.

The overall process would look like this in a GPU-accelerated version:

CPU sends the initial grid data to the GPU’s memory.

On the GPU: A parallel kernel runs, where thousands of threads simultaneously check every cell to find all accessible rolls.

On the GPU: Another kernel runs to “remove” the accessible rolls (change `@` to `.`).

The CPU checks if any rolls were removed in the last step. If so, it tells the GPU to repeat the process.

Once no rolls are removed, the CPU gets the final count.

The main caveat is that for a grid of the size typically seen in these puzzles, the overhead of managing the GPU and transferring data back and forth might make it slower than the simple CPU version. However, if the grid were extremely large (e.g., millions of cells), the GPU approach would be significantly faster.

That’s true; one of the nice things about Julia is that you can write code that works on both. Could you modify this to take advantage of that?

Of course. That’s one of the most powerful features of Julia’s ecosystem. You can write generic code that gets specialized at compile time for different hardware, like CPUs and GPUs.

I’ll modify the solution to use `ImageFiltering.jl`, which provides high-level functions that work seamlessly with both standard `Array`s (for CPU) and `CuArray`s (for NVIDIA GPUs). The core idea is to replace the manual neighbor-counting loop with a single, highly-optimized convolution operation (`imfilter`), which can be executed in parallel on a GPU.

This new version will automatically detect if you have a functional CUDA setup and use the GPU if possible, otherwise, it will fall back to the CPU.

Here is the refactored, hardware-agnostic solution:

Aside from sounding like an infomercial for Julia this was pretty great. And it emits basically the same solution but with some CUDA boilerplate as well.

# To run on a GPU, you must have CUDA.jl and a compatible NVIDIA GPU.
# You will also need to add the ImageFiltering.jl package.
# From the Julia REPL, run:
# ] add CUDA
# ] add ImageFiltering

using ImageFiltering
using CUDA

# This version of the solver is generic and can run on either CPU or GPU.
# The choice is made based on the `use_gpu` flag and CUDA availability.
function solve(use_gpu=true)
    # Determine the array type based on GPU availability and user preference.
    ArrayType = Array
    if use_gpu && CUDA.functional()
        println("CUDA is functional. Running on GPU...")
        ArrayType = CuArray
    else
        println("Running on CPU...")
    end

    # Read the input and create a character grid on the target device (CPU or GPU)
    lines = readlines("input.txt")
    grid_char_cpu = permutedims(hcat(collect.(lines)...))
    grid_char = ArrayType(grid_char_cpu)

    total_removed_count = 0

    # Define the kernel for counting neighbors. It's a 3x3 matrix of ones
    # with a zero in the center. We'll work with boolean arrays for efficiency.
    kernel = centered(Bool[1 1 1; 1 0 1; 1 1 1])

    while true
        # Create a boolean grid where `true` represents a roll of paper.
        # This operation is fast on both CPU and GPU.
        is_roll = (grid_char .== '@')

        # Use `imfilter` (image convolution) to count neighbors for every cell at once.
        # This is the key operation that gets massively parallelized on the GPU.
        # `Fill(0)` means we treat out-of-bounds neighbors as empty space (value 0).
        neighbor_counts = imfilter(is_roll, kernel, Fill(0))

        # A roll is accessible if it exists AND its neighbor count is less than 4.
        # This is a fast, element-wise operation on the GPU/CPU.
        accessible_mask = is_roll .& (neighbor_counts .< 4)

        # If no rolls are accessible, we're done.
        # `any` checks if any element is true, transferring data from GPU if needed.
        if !any(accessible_mask)
            break
        end

        # `count` on a boolean array is highly optimized on both CPU and GPU.
        total_removed_count += count(accessible_mask)

        # Update the grid by "removing" the accessible rolls.
        # This is another fast, parallel, element-wise operation.
        grid_char[accessible_mask] .= '.'
    end

    println(total_removed_count)
end

# Run the simulation. It will try to use the GPU by default.
solve()

# To force CPU-only execution, you could call:
# solve(false)

Part 1#

Human Solution#

GenAI Solution#

Part 2#

Human Solution#

GenAI Solution#

Part 1

Human Solution

GenAI Solution

Part 2

Human Solution

GenAI Solution