llm

`gpu_energy(model_active_parameter_count, output_token_count, gpu_energy_alpha, gpu_energy_beta)`

Compute energy consumption of a single GPU.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model.	required
`output_token_count`	`float`	Number of generated tokens.	required
`gpu_energy_alpha`	`float`	Alpha parameter of the GPU linear power consumption profile.	required
`gpu_energy_beta`	`float`	Beta parameter of the GPU linear power consumption profile.	required

Returns:

Type	Description
`float`	The energy consumption of a single GPU.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_energy(
    model_active_parameter_count: float,
    output_token_count: float,
    gpu_energy_alpha: float,
    gpu_energy_beta: float
) -> float:
    """
    Compute energy consumption of a single GPU.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        output_token_count: Number of generated tokens.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.

    Returns:
        The energy consumption of a single GPU.
    """
    return output_token_count * (gpu_energy_alpha * model_active_parameter_count + gpu_energy_beta)

`generation_latency(model_active_parameter_count, output_token_count, gpu_latency_alpha, gpu_latency_beta, request_latency)`

Compute the token generation latency in seconds.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model.	required
`output_token_count`	`float`	Number of generated tokens.	required
`gpu_latency_alpha`	`float`	Alpha parameter of the GPU linear latency profile.	required
`gpu_latency_beta`	`float`	Beta parameter of the GPU linear latency profile.	required
`request_latency`	`float`	Measured request latency (upper bound) in seconds.	required

Returns:

Type	Description
`float`	The token generation latency in seconds.

Source code in ecologits/impacts/llm.py

@dag.asset
def generation_latency(
    model_active_parameter_count: float,
    output_token_count: float,
    gpu_latency_alpha: float,
    gpu_latency_beta: float,
    request_latency: float,
) -> float:
    """
    Compute the token generation latency in seconds.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        output_token_count: Number of generated tokens.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        request_latency: Measured request latency (upper bound) in seconds.

    Returns:
        The token generation latency in seconds.
    """
    gpu_latency = output_token_count * (gpu_latency_alpha * model_active_parameter_count + gpu_latency_beta)
    return min(gpu_latency, request_latency)

`model_required_memory(model_total_parameter_count, model_quantization_bits)`

Compute the required memory to load the model on GPU.

Parameters:

Name	Type	Description	Default
`model_total_parameter_count`	`float`	Number of parameters of the model.	required
`model_quantization_bits`	`int`	Number of bits used to represent the model weights.	required

Returns:

Type	Description
`float`	The amount of required GPU memory to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def model_required_memory(
    model_total_parameter_count: float,
    model_quantization_bits: int,
) -> float:
    """
    Compute the required memory to load the model on GPU.

    Args:
        model_total_parameter_count: Number of parameters of the model.
        model_quantization_bits: Number of bits used to represent the model weights.

    Returns:
        The amount of required GPU memory to load the model.
    """
    return 1.2 * model_total_parameter_count * model_quantization_bits / 8

`gpu_required_count(model_required_memory, gpu_memory)`

Compute the number of required GPU to store the model.

Parameters:

Name	Type	Description	Default
`model_required_memory`	`float`	Required memory to load the model on GPU.	required
`gpu_memory`	`float`	Amount of memory available on a single GPU.	required

Returns:

Type	Description
`int`	The number of required GPUs to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_required_count(
    model_required_memory: float,
    gpu_memory: float
) -> int:
    """
    Compute the number of required GPU to store the model.

    Args:
        model_required_memory: Required memory to load the model on GPU.
        gpu_memory: Amount of memory available on a single GPU.

    Returns:
        The number of required GPUs to load the model.
    """
    return ceil(model_required_memory / gpu_memory)

`server_energy(generation_latency, server_power, server_gpu_count, gpu_required_count)`

Compute the energy consumption of the server.

Parameters:

Name	Type	Description	Default
`generation_latency`	`float`	Token generation latency in seconds.	required
`server_power`	`float`	Power consumption of the server.	required
`server_gpu_count`	`int`	Number of available GPUs in the server.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The energy consumption of the server (GPUs are not included).

Source code in ecologits/impacts/llm.py

@dag.asset
def server_energy(
    generation_latency: float,
    server_power: float,
    server_gpu_count: int,
    gpu_required_count: int
) -> float:
    """
    Compute the energy consumption of the server.

    Args:
        generation_latency: Token generation latency in seconds.
        server_power: Power consumption of the server.
        server_gpu_count: Number of available GPUs in the server.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The energy consumption of the server (GPUs are not included).
    """
    return (generation_latency / 3600) * server_power * (gpu_required_count / server_gpu_count)

`request_energy(datacenter_pue, server_energy, gpu_required_count, gpu_energy)`

Compute the energy consumption of the request.

Parameters:

Name	Type	Description	Default
`datacenter_pue`	`float`	PUE of the datacenter.	required
`server_energy`	`float`	Energy consumption of the server.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required
`gpu_energy`	`float`	Energy consumption of a single GPU.	required

Returns:

Type	Description
`float`	The energy consumption of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_energy(
    datacenter_pue: float,
    server_energy: float,
    gpu_required_count: int,
    gpu_energy: float
) -> float:
    """
    Compute the energy consumption of the request.

    Args:
        datacenter_pue: PUE of the datacenter.
        server_energy: Energy consumption of the server.
        gpu_required_count: Number of required GPUs to load the model.
        gpu_energy: Energy consumption of a single GPU.

    Returns:
        The energy consumption of the request.
    """
    return datacenter_pue * (server_energy + gpu_required_count * gpu_energy)

`request_usage_gwp(request_energy, if_electricity_mix_gwp)`

Compute the Global Warming Potential (GWP) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`float`	Energy consumption of the request.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption.	required

Returns:

Type	Description
`float`	The GWP usage impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_gwp(
    request_energy: float,
    if_electricity_mix_gwp: float
) -> float:
    """
    Compute the Global Warming Potential (GWP) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption.

    Returns:
        The GWP usage impact of the request.
    """
    return request_energy * if_electricity_mix_gwp

`request_usage_adpe(request_energy, if_electricity_mix_adpe)`

Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`float`	Energy consumption of the request.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption.	required

Returns:

Type	Description
`float`	The ADPe usage impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_adpe(
    request_energy: float,
    if_electricity_mix_adpe: float
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption.

    Returns:
        The ADPe usage impact of the request.
    """
    return request_energy * if_electricity_mix_adpe

`request_usage_pe(request_energy, if_electricity_mix_pe)`

Compute the Primary Energy (PE) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`float`	Energy consumption of the request.	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption.	required

Returns:

Type	Description
`float`	The PE usage impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_pe(
    request_energy: float,
    if_electricity_mix_pe: float
) -> float:
    """
    Compute the Primary Energy (PE) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request.
        if_electricity_mix_pe: PE impact factor of electricity consumption.

    Returns:
        The PE usage impact of the request.
    """
    return request_energy * if_electricity_mix_pe

`server_gpu_embodied_gwp(server_embodied_gwp, server_gpu_count, gpu_embodied_gwp, gpu_required_count)`

Compute the Global Warming Potential (GWP) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_gwp`	`float`	GWP embodied impact of the server.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_gwp`	`float`	GWP embodied impact of a single GPU.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The GWP embodied impact of the server and the GPUs.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_gwp(
    server_embodied_gwp: float,
    server_gpu_count: float,
    gpu_embodied_gwp: float,
    gpu_required_count: int
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the server

    Args:
        server_embodied_gwp: GWP embodied impact of the server.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_gwp: GWP embodied impact of a single GPU.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The GWP embodied impact of the server and the GPUs.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_gwp + gpu_required_count * gpu_embodied_gwp

`server_gpu_embodied_adpe(server_embodied_adpe, server_gpu_count, gpu_embodied_adpe, gpu_required_count)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_adpe`	`float`	ADPe embodied impact of the server.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_adpe`	`float`	ADPe embodied impact of a single GPU.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The ADPe embodied impact of the server and the GPUs.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_adpe(
    server_embodied_adpe: float,
    server_gpu_count: float,
    gpu_embodied_adpe: float,
    gpu_required_count: int
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

    Args:
        server_embodied_adpe: ADPe embodied impact of the server.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The ADPe embodied impact of the server and the GPUs.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_adpe + gpu_required_count * gpu_embodied_adpe

`server_gpu_embodied_pe(server_embodied_pe, server_gpu_count, gpu_embodied_pe, gpu_required_count)`

Compute the Primary Energy (PE) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_pe`	`float`	PE embodied impact of the server.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_pe`	`float`	PE embodied impact of a single GPU.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The PE embodied impact of the server and the GPUs.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_pe(
    server_embodied_pe: float,
    server_gpu_count: float,
    gpu_embodied_pe: float,
    gpu_required_count: int
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the server

    Args:
        server_embodied_pe: PE embodied impact of the server.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_pe: PE embodied impact of a single GPU.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The PE embodied impact of the server and the GPUs.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_pe + gpu_required_count * gpu_embodied_pe

`request_embodied_gwp(server_gpu_embodied_gwp, server_lifetime, generation_latency)`

Compute the Global Warming Potential (GWP) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_gwp`	`float`	GWP embodied impact of the server and the GPUs.	required
`server_lifetime`	`float`	Lifetime duration of the server.	required
`generation_latency`	`float`	Token generation latency in seconds.	required

Returns:

Type	Description
`float`	The GWP embodied impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_gwp(
    server_gpu_embodied_gwp: float,
    server_lifetime: float,
    generation_latency: float
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the request.

    Args:
        server_gpu_embodied_gwp: GWP embodied impact of the server and the GPUs.
        server_lifetime: Lifetime duration of the server.
        generation_latency: Token generation latency in seconds.

    Returns:
        The GWP embodied impact of the request.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_gwp

`request_embodied_adpe(server_gpu_embodied_adpe, server_lifetime, generation_latency)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_adpe`	`float`	ADPe embodied impact of the server and the GPUs.	required
`server_lifetime`	`float`	Lifetime duration of the server.	required
`generation_latency`	`float`	Token generation latency in seconds.	required

Returns:

Type	Description
`float`	The ADPe embodied impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_adpe(
    server_gpu_embodied_adpe: float,
    server_lifetime: float,
    generation_latency: float
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

    Args:
        server_gpu_embodied_adpe: ADPe embodied impact of the server and the GPUs.
        server_lifetime: Lifetime duration of the server.
        generation_latency: Token generation latency in seconds.

    Returns:
        The ADPe embodied impact of the request.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_adpe

`request_embodied_pe(server_gpu_embodied_pe, server_lifetime, generation_latency)`

Compute the Primary Energy (PE) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_pe`	`float`	PE embodied impact of the server and the GPUs.	required
`server_lifetime`	`float`	Lifetime duration of the server.	required
`generation_latency`	`float`	Token generation latency in seconds.	required

Returns:

Type	Description
`float`	The PE embodied impact of the request.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_pe(
    server_gpu_embodied_pe: float,
    server_lifetime: float,
    generation_latency: float
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the request.

    Args:
        server_gpu_embodied_pe: PE embodied impact of the server and the GPUs.
        server_lifetime: Lifetime duration of the server.
        generation_latency: Token generation latency in seconds.

    Returns:
        The PE embodied impact of the request.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_pe

compute_llm_impacts_dag(model_active_parameter_count, model_total_parameter_count, output_token_count, request_latency, model_quantization_bits=MODEL_QUANTIZATION_BITS, gpu_energy_alpha=GPU_ENERGY_ALPHA, gpu_energy_beta=GPU_ENERGY_BETA, gpu_latency_alpha=GPU_LATENCY_ALPHA, gpu_latency_beta=GPU_LATENCY_BETA, gpu_memory=GPU_MEMORY, gpu_embodied_gwp=GPU_EMBODIED_IMPACT_GWP, gpu_embodied_adpe=GPU_EMBODIED_IMPACT_ADPE, gpu_embodied_pe=GPU_EMBODIED_IMPACT_PE, server_gpu_count=SERVER_GPUS, server_power=SERVER_POWER, server_embodied_gwp=SERVER_EMBODIED_IMPACT_GWP, server_embodied_adpe=SERVER_EMBODIED_IMPACT_ADPE, server_embodied_pe=SERVER_EMBODIED_IMPACT_PE, server_lifetime=HARDWARE_LIFESPAN, datacenter_pue=DATACENTER_PUE, if_electricity_mix_gwp=IF_ELECTRICITY_MIX_GWP, if_electricity_mix_adpe=IF_ELECTRICITY_MIX_ADPE, if_electricity_mix_pe=IF_ELECTRICITY_MIX_PE)

Compute the impacts dag of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model.	required
`model_total_parameter_count`	`float`	Number of parameters of the model.	required
`output_token_count`	`float`	Number of generated tokens.	required
`request_latency`	`float`	Measured request latency in seconds.	required
`model_quantization_bits`	`Optional[int]`	Number of bits used to represent the model weights.	`MODEL_QUANTIZATION_BITS`
`gpu_energy_alpha`	`Optional[float]`	Alpha parameter of the GPU linear power consumption profile.	`GPU_ENERGY_ALPHA`
`gpu_energy_beta`	`Optional[float]`	Beta parameter of the GPU linear power consumption profile.	`GPU_ENERGY_BETA`
`gpu_latency_alpha`	`Optional[float]`	Alpha parameter of the GPU linear latency profile.	`GPU_LATENCY_ALPHA`
`gpu_latency_beta`	`Optional[float]`	Beta parameter of the GPU linear latency profile.	`GPU_LATENCY_BETA`
`gpu_memory`	`Optional[float]`	Amount of memory available on a single GPU.	`GPU_MEMORY`
`gpu_embodied_gwp`	`Optional[float]`	GWP embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_GWP`
`gpu_embodied_adpe`	`Optional[float]`	ADPe embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_ADPE`
`gpu_embodied_pe`	`Optional[float]`	PE embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_PE`
`server_gpu_count`	`Optional[int]`	Number of available GPUs in the server.	`SERVER_GPUS`
`server_power`	`Optional[float]`	Power consumption of the server.	`SERVER_POWER`
`server_embodied_gwp`	`Optional[float]`	GWP embodied impact of the server.	`SERVER_EMBODIED_IMPACT_GWP`
`server_embodied_adpe`	`Optional[float]`	ADPe embodied impact of the server.	`SERVER_EMBODIED_IMPACT_ADPE`
`server_embodied_pe`	`Optional[float]`	PE embodied impact of the server.	`SERVER_EMBODIED_IMPACT_PE`
`server_lifetime`	`Optional[float]`	Lifetime duration of the server.	`HARDWARE_LIFESPAN`
`datacenter_pue`	`Optional[float]`	PUE of the datacenter.	`DATACENTER_PUE`
`if_electricity_mix_gwp`	`Optional[float]`	GWP impact factor of electricity consumption.	`IF_ELECTRICITY_MIX_GWP`
`if_electricity_mix_adpe`	`Optional[float]`	ADPe impact factor of electricity consumption.	`IF_ELECTRICITY_MIX_ADPE`
`if_electricity_mix_pe`	`Optional[float]`	PE impact factor of electricity consumption.	`IF_ELECTRICITY_MIX_PE`

Returns:

Type	Description
`dict[str, float]`	The impacts dag with all intermediate states.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts_dag(
    model_active_parameter_count: float,
    model_total_parameter_count: float,
    output_token_count: float,
    request_latency: float,
    model_quantization_bits: Optional[int] = MODEL_QUANTIZATION_BITS,
    gpu_energy_alpha: Optional[float] = GPU_ENERGY_ALPHA,
    gpu_energy_beta: Optional[float] = GPU_ENERGY_BETA,
    gpu_latency_alpha: Optional[float] = GPU_LATENCY_ALPHA,
    gpu_latency_beta: Optional[float] = GPU_LATENCY_BETA,
    gpu_memory: Optional[float] = GPU_MEMORY,
    gpu_embodied_gwp: Optional[float] = GPU_EMBODIED_IMPACT_GWP,
    gpu_embodied_adpe: Optional[float] = GPU_EMBODIED_IMPACT_ADPE,
    gpu_embodied_pe: Optional[float] = GPU_EMBODIED_IMPACT_PE,
    server_gpu_count: Optional[int] = SERVER_GPUS,
    server_power: Optional[float] = SERVER_POWER,
    server_embodied_gwp: Optional[float] = SERVER_EMBODIED_IMPACT_GWP,
    server_embodied_adpe: Optional[float] = SERVER_EMBODIED_IMPACT_ADPE,
    server_embodied_pe: Optional[float] = SERVER_EMBODIED_IMPACT_PE,
    server_lifetime: Optional[float] = HARDWARE_LIFESPAN,
    datacenter_pue: Optional[float] = DATACENTER_PUE,
    if_electricity_mix_gwp: Optional[float] = IF_ELECTRICITY_MIX_GWP,
    if_electricity_mix_adpe: Optional[float] = IF_ELECTRICITY_MIX_ADPE,
    if_electricity_mix_pe: Optional[float] = IF_ELECTRICITY_MIX_PE,
) -> dict[str, float]:
    """
    Compute the impacts dag of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        model_total_parameter_count: Number of parameters of the model.
        output_token_count: Number of generated tokens.
        request_latency: Measured request latency in seconds.
        model_quantization_bits: Number of bits used to represent the model weights.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        gpu_memory: Amount of memory available on a single GPU.
        gpu_embodied_gwp: GWP embodied impact of a single GPU.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU.
        gpu_embodied_pe: PE embodied impact of a single GPU.
        server_gpu_count: Number of available GPUs in the server.
        server_power: Power consumption of the server.
        server_embodied_gwp: GWP embodied impact of the server.
        server_embodied_adpe: ADPe embodied impact of the server.
        server_embodied_pe: PE embodied impact of the server.
        server_lifetime: Lifetime duration of the server.
        datacenter_pue: PUE of the datacenter.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption.
        if_electricity_mix_pe: PE impact factor of electricity consumption.

    Returns:
        The impacts dag with all intermediate states.
    """
    results = dag.execute(
        model_active_parameter_count=model_active_parameter_count,
        model_total_parameter_count=model_total_parameter_count,
        model_quantization_bits=model_quantization_bits,
        output_token_count=output_token_count,
        request_latency=request_latency,
        gpu_energy_alpha=gpu_energy_alpha,
        gpu_energy_beta=gpu_energy_beta,
        gpu_latency_alpha=gpu_latency_alpha,
        gpu_latency_beta=gpu_latency_beta,
        gpu_memory=gpu_memory,
        gpu_embodied_gwp=gpu_embodied_gwp,
        gpu_embodied_adpe=gpu_embodied_adpe,
        gpu_embodied_pe=gpu_embodied_pe,
        server_gpu_count=server_gpu_count,
        server_power=server_power,
        server_embodied_gwp=server_embodied_gwp,
        server_embodied_adpe=server_embodied_adpe,
        server_embodied_pe=server_embodied_pe,
        server_lifetime=server_lifetime,
        datacenter_pue=datacenter_pue,
        if_electricity_mix_gwp=if_electricity_mix_gwp,
        if_electricity_mix_adpe=if_electricity_mix_adpe,
        if_electricity_mix_pe=if_electricity_mix_pe
    )
    return results

`compute_llm_impacts(model_active_parameter_count, model_total_parameter_count, output_token_count, request_latency=None, **kwargs)`

Compute the impacts of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`ValueOrRange`	Number of active parameters of the model.	required
`model_total_parameter_count`	`ValueOrRange`	Number of total parameters of the model.	required
`output_token_count`	`float`	Number of generated tokens.	required
`request_latency`	`Optional[float]`	Measured request latency in seconds.	`None`
`**kwargs`	`Any`	Any other optional parameter.	`{}`

Returns:

Type	Description
`Impacts`	The impacts of an LLM generation request.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts(
    model_active_parameter_count: ValueOrRange,
    model_total_parameter_count: ValueOrRange,
    output_token_count: float,
    request_latency: Optional[float] = None,
    **kwargs: Any
) -> Impacts:
    """
    Compute the impacts of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        model_total_parameter_count: Number of total parameters of the model.
        output_token_count: Number of generated tokens.
        request_latency: Measured request latency in seconds.
        **kwargs: Any other optional parameter.

    Returns:
        The impacts of an LLM generation request.
    """
    if request_latency is None:
        request_latency = math.inf

    active_params = [model_active_parameter_count]
    total_params = [model_total_parameter_count]

    if isinstance(model_active_parameter_count, Range) or isinstance(model_total_parameter_count, Range):
        if isinstance(model_active_parameter_count, Range):
            active_params = [model_active_parameter_count.min, model_active_parameter_count.max]
        else:
            active_params = [model_active_parameter_count, model_active_parameter_count]
        if isinstance(model_total_parameter_count, Range):
            total_params = [model_total_parameter_count.min, model_total_parameter_count.max]
        else:
            total_params = [model_total_parameter_count, model_total_parameter_count]

    results = {}
    fields = ["request_energy", "request_usage_gwp", "request_usage_adpe", "request_usage_pe",
              "request_embodied_gwp", "request_embodied_adpe", "request_embodied_pe"]
    for act_param, tot_param in zip(active_params, total_params):
        res = compute_llm_impacts_dag(
            model_active_parameter_count=act_param,
            model_total_parameter_count=tot_param,
            output_token_count=output_token_count,
            request_latency=request_latency,
            **kwargs
        )
        for field in fields:
            if field in results:
                results[field] = Range(min=results[field], max=res[field])
            else:
                results[field] = res[field]

    energy = Energy(value=results["request_energy"])
    gwp_usage = GWP(value=results["request_usage_gwp"])
    adpe_usage = ADPe(value=results["request_usage_adpe"])
    pe_usage = PE(value=results["request_usage_pe"])
    gwp_embodied = GWP(value=results["request_embodied_gwp"])
    adpe_embodied = ADPe(value=results["request_embodied_adpe"])
    pe_embodied = PE(value=results["request_embodied_pe"])
    return Impacts(
        energy=energy,
        gwp=gwp_usage + gwp_embodied,
        adpe=adpe_usage + adpe_embodied,
        pe=pe_usage + pe_embodied,
        usage=Usage(
            energy=energy,
            gwp=gwp_usage,
            adpe=adpe_usage,
            pe=pe_usage
        ),
        embodied=Embodied(
            gwp=gwp_embodied,
            adpe=adpe_embodied,
            pe=pe_embodied
        )
    )