Part 1

Human Solution

Pretty straightforward. Seems like folks would go to regex initially especially those who don’t work with math that often. But all that translating back and forth from strings is messy. Fortunately it’s pretty straightforward to represent repeated digits as multiples of 11, 101, 1001, 10001 and the like.

function is_invalid(num::Int)
    length = floor(Int, log10(num)) + 1
    length % 2 != 0 && return false
    midpoint = div(length, 2)
    base = 10 ^ midpoint + 1
    return num % base == 0
end

function part1(path = "input.txt")
    count = 0
    for range in split(readline(path), ",")
        first, last = parse.(Int, split(range, "-"))
        count += sum(filter(is_invalid, first:last))
    end
    return count
end

GenAI Solution

It’s pretty clever: instead of iterating through the ranges it just goes once through and generates all the invalid numbers and sums them based upon whether they appear in range. This can be more efficient depending on the sparsity of the ranges. But there’s an issue: it makes the assumption that the ranges are not overlapping (look for the comment “Once found in a range, no need to check other ranges”). It’s a bit painful to read through but it’s relatively easy enough so I certainly caught that [though the answer is correct so the ranges don’t actually overlap].

using Printf

function solve()
    # 1. Read input and parse ranges
    input_str = ""
    try
        input_str = read("input.txt", String)
    catch e
        println(stderr, "Error reading input.txt: ", e)
        return
    end

    range_strs = split(strip(input_str), ',')

    if isempty(range_strs) || (length(range_strs) == 1 && isempty(range_strs[1]))
        println("Input is empty. Total sum is 0.")
        return
    end

    ranges = []
    max_id = BigInt(0)
    for r_str in range_strs
        if isempty(r_str) continue end
        parts = split(r_str, '-')
        if length(parts) != 2
            println(stderr, "Skipping invalid range format: ", r_str)
            continue
        end
        start_r = parse(BigInt, parts[1])
        stop_r = parse(BigInt, parts[2])
        push!(ranges, (start=start_r, stop=stop_r))
        if stop_r > max_id
            max_id = stop_r
        end
    end

    total_sum = BigInt(0)
    found_invalids = Set{BigInt}()

    # 2. Generate invalid numbers and check if they are in ranges
    l = 1
    while true
        # Smallest invalid number with 2*l digits
        # This is formed by the smallest part of length l, which is 10^(l-1)
        first_part_val = BigInt(10)^(l-1)
        s_part = string(first_part_val)

        # If the generated number string would be longer than the max_id's string, we can stop.
        if 2*l > length(string(max_id)) + 1
            break
        end

        # Iterate through all possible parts of length l
        # from 10^(l-1) to 10^l - 1
        start_n = first_part_val
        end_n = BigInt(10)^l - 1

        for n in start_n:end_n
            s = string(n)
            invalid_num_str = s * s
            invalid_num = parse(BigInt, invalid_num_str)

            if invalid_num > max_id
                break
            end

            for r in ranges
                if r.start <= invalid_num <= r.stop
                    if !(invalid_num in found_invalids)
                        total_sum += invalid_num
                        push!(found_invalids, invalid_num)
                    end
                    # Once found in a range, no need to check other ranges
                    break
                end
            end
        end
        l += 1
    end

    println(total_sum)
end

solve()

To fix this initially I pointed out the ranges, but upon hitting return thought better, cancelled it, and just responded with “it’s incorrect; can you fix it?”. This did not work: it did some “merging” of the ranges which while a little cleaner still didn’t solve the problem. Thus I got more directive:

Me: No; your goal should be to sum up each invalid ID as many times as it appears in each range.

LLM: Thank you for the clarification. That is a significant change to the logic.

Based on your instruction, I will modify the code to sum up an invalid ID each time it falls within one of the ranges. If an ID is present in three different ranges, it will be added to the sum three times.

I will remove the logic that prevents this multiple counting (the `Set` and the `break` statement). I will also not merge the ranges.

And lo it works!

using Printf

function solve()
    # 1. Read input and parse ranges
    input_str = ""
    try
        input_str = read("input.txt", String)
    catch e
        println(stderr, "Error reading input.txt: ", e)
        return
    end

    range_strs = split(strip(input_str), ',')

    if isempty(range_strs) || (length(range_strs) == 1 && isempty(range_strs[1]))
        println("Input is empty. Total sum is 0.")
        return
    end

    ranges = []
    max_id = BigInt(0)
    for r_str in range_strs
        if isempty(r_str) continue end
        parts = split(r_str, '-')
        if length(parts) != 2
            println(stderr, "Skipping invalid range format: ", r_str)
            continue
        end
        start_r = parse(BigInt, parts[1])
        stop_r = parse(BigInt, parts[2])
        push!(ranges, (start=start_r, stop=stop_r))
        if stop_r > max_id
            max_id = stop_r
        end
    end

    total_sum = BigInt(0)

    # 2. Generate invalid numbers and check if they are in ranges
    l = 1
    while true
        # Smallest invalid number with 2*l digits
        # This is formed by the smallest part of length l, which is 10^(l-1)
        first_part_val = BigInt(10)^(l-1)

        # If the generated number string would be longer than the max_id's string, we can stop.
        if 2*l > length(string(max_id)) + 1
            break
        end

        # Iterate through all possible parts of length l
        # from 10^(l-1) to 10^l - 1
        start_n = first_part_val
        end_n = BigInt(10)^l - 1

        for n in start_n:end_n
            s = string(n)
            invalid_num_str = s * s
            invalid_num = parse(BigInt, invalid_num_str)

            if invalid_num > max_id
                break
            end

            # Per user instruction, count ID for each range it appears in
            for r in ranges
                if r.start <= invalid_num <= r.stop
                    total_sum += invalid_num
                end
            end
        end
        l += 1
    end

    println(total_sum)
end

Again this took like 8 minutes all told which seems quite a bit faster. Again I had solved it first so I knew what to look for, but even still this was a relatively straightforward implementation. The style is very wordy and makes it hard to parse. The checks are probably good for production code but it tends to clutter this sort of puzzle code. But still well done for solving.

Part 2

Human Solution

This part benefitted from taking my morning run in between solving part 1 and part 2. The obvious extension that I’d do if I was solving this for time is just add in additional factors (specifically all the prime factors of the length).

But it seems so wasteful to iterate over all those numbers. Like we know it works since the first solution returns quickly and at most we’re adding like 2x-4x more work per number. But elegance!!

So instead let’s think about a cumulant function cumulant(n) = sum(filter(f,1:n)). This is a pretty standard Project Euler technique: if you want to calculate sum(filter(f, lo:hi)) it’s often easier to do this as cumulant(hi) - cumulant(lo - 1) as you can often get nice closed form solutions.

And indeed it’s decently straightforward to get the cumulant function here: just go through all of your potential patterns and sum up their multiples. The only tricky bit is the inclusion-exclusion weights: you end up double counting numbers such as 111111 which is a length 3, length 2, and length 1 repetition. Fortunatelly this too is pretty easy to workout as these all nest and you can get a recursive definition for the weights where you know each pattern has to be counted once so its weight is 1 minus the sum of all the other times it has shown up.

using Primes
using Memoize

@memoize function weight(length, max_length)
    length == max_length && return 0
    max_length % length != 0 && return 0
    return 1 - sum(weight(parent, max_length) for parent in (2*length):length:max_length)
end

function get_mult(digits::Int, pattern_length::Int)
    mult = 0
    for _ in 1:div(digits, pattern_length)
        mult *= 10^pattern_length
        mult += 1
    end
    return mult
end

function cumulant(num::Int)
    # sum(filter(is_invalid, 1:num))
    num <= 0 && return 0

    tot = BigInt(0)

    max_digits = floor(Int, log10(num)) + 1
    for n_digits in 2:(max_digits - 1)
        for pattern_length in divisors(n_digits)
            pattern_length == n_digits && continue
            mult = get_mult(n_digits, pattern_length)
            tot += (weight(pattern_length, n_digits)) * mult * sum((10^(pattern_length-1)):(10^pattern_length - 1))
        end
    end

    for pattern_length in divisors(max_digits)
        pattern_length == max_digits && continue
        mult = get_mult(max_digits, pattern_length)
        tot += (weight(pattern_length, max_digits)) * mult * sum((10^(pattern_length - 1)):div(num, mult))
    end

    return tot
end

function part2(path = "input.txt")
    count = 0
    for range in split(readline(path), ",")
        first, last = parse.(Int, split(range, "-"))
        count += cumulant(last) - cumulant(first - 1)
    end
    return count
  end

GenAI Solution

Part 2 returned incredibly quickly: 30 seconds at the most. And it indeed works. It’s basically the same strategy as before just with more patterns being generated. It even correctly detected that certain numbers will show up multiple times and maintains a set (the correct one this time!) to avoid overcounting. Style is still meh but hard to argue with results here.

using Printf

function solve()
    # 1. Read input and parse ranges
    input_str = ""
    try
        input_str = read("input.txt", String)
    catch e
        println(stderr, "Error reading input.txt: ", e)
        return
    end

    range_strs = split(strip(input_str), ',')

    if isempty(range_strs) || (length(range_strs) == 1 && isempty(range_strs[1]))
        println("Input is empty. Total sum is 0.")
        return
    end

    ranges = []
    max_id = BigInt(0)
    for r_str in range_strs
        if isempty(r_str) continue end
        parts = split(r_str, '-')
        if length(parts) != 2
            println(stderr, "Skipping invalid range format: ", r_str)
            continue
        end
        start_r = parse(BigInt, parts[1])
        stop_r = parse(BigInt, parts[2])
        push!(ranges, (start=start_r, stop=stop_r))
        if stop_r > max_id
            max_id = stop_r
        end
    end

    total_sum = BigInt(0)
    processed_invalids = Set{BigInt}()

    # 2. Generate invalid numbers (sequence repeated k>=2 times)
    l = 1
    while true
        # Length of the base sequence
        first_part_val = BigInt(10)^(l-1)

        # Optimization: if the smallest possible invalid number (l-part repeated twice)
        # is already bigger than max_id, we can stop.
        if parse(BigInt, repeat(string(first_part_val), 2)) > max_id
            break
        end

        start_n = first_part_val
        end_n = BigInt(10)^l - 1

        for n in start_n:end_n
            s = string(n)

            # Repeat the base sequence k times, k >= 2
            k = 2
            while true
                repeated_s = repeat(s, k)
                invalid_num = parse(BigInt, repeated_s)

                if invalid_num > max_id
                    break # This s won't produce any more valid numbers
                end

                # Avoid re-processing numbers that can be generated in multiple ways
                # (e.g., 12121212 is 12*4 and 1212*2)
                if invalid_num in processed_invalids
                    k += 1
                    continue
                end

                push!(processed_invalids, invalid_num)

                # Count the ID for each range it appears in
                for r in ranges
                    if r.start <= invalid_num <= r.stop
                        total_sum += invalid_num
                    end
                end

                k += 1
            end
        end
        l += 1
    end

    println(total_sum)
end