import sys
import struct

########################################################################################
# Apenas comprimir
#
# python lz_78_rle.py <caminho_para_o_ficheiro_original>
# 
# 
# Os ficheiros gerados ficarão na mesma pasta que o script
########################################################################################

def rle_transform(bitstream):
    sequencia = []
    pixel_atual = '0' # Começa sempre com o pixel 0 conforme enunciado 
    contador = 0
    
    for bit in bitstream:
        if bit == pixel_atual:
            if contador == 255: # Limite de 8 bits 
                sequencia.append(255)
                sequencia.append(0) # Troço de 0 do outro pixel para continuar no mesmo [cite: 41]
                contador = 1
            else:
                contador += 1
        else:
            sequencia.append(contador)
            pixel_atual = bit
            contador = 1
    sequencia.append(contador)
    return sequencia

def lz78_compression(image_data):
    dictionary = {0: []}
    output = []
    symbol = [] 

    for i in image_data:
        temp = symbol + [i]
        dict_values = list(dictionary.values())
        if temp in dict_values:
            symbol = temp
        else:
            if not symbol:
                output.append([0, i])
                dictionary[len(dictionary)] = [i]
            else:
                idx = dict_values.index(symbol)
                output.append([idx, i])
                dictionary[len(dictionary)] = symbol + [i]
            symbol = []
    
    if symbol:
        idx = list(dictionary.values()).index(symbol)
        output.append([idx, -1])
    return output

def clean_image_data(image_data):
    
    clean_text = ""

    for line in image_data:
        line = line.strip()
        line = line.replace(" ", "")
        if not line or line.startswith('#'):
            continue
        clean_text += line

    return clean_text

def chunkstring(string, length):
    return [string[i : length+i] for i in range(0, len(string), length)]


with open(sys.argv[1], "r") as image:
    lines = image.readlines()
    image_size = lines[1].split()

    if lines[0].strip() != 'P1':
        raise ValueError("This is not a pbm file")
    
    clean_text = clean_image_data(lines[2:]) #Limpa os espaços em branco para comprimir melhor

    clean_text = rle_transform(clean_text)

    output = lz78_compression(clean_text)

    print(f"Output: {output}\n")

    #Compressão em texto (human-readable)
    with open(sys.argv[1] + "_compressed", "w") as save_file:
        save_file.write(f"{output} \n{image_size[0]} {image_size[1]}")
    
    #Compressão em binário (compressão "a sério")
    with open(sys.argv[1] + ".bin", "wb") as save_file:
        largura = int(image_size[0])
        altura = int(image_size[1])
        save_file.write(struct.pack('II', largura, altura))

        for indice, val in output:
            simbolo_byte = val if val != -1 else 255
            save_file.write(struct.pack('HB', indice, simbolo_byte))
