import sys
import struct

########################################################################################
# python lz_78.py <d/c> <caminho_para_o_ficheiro_original>
# 
# c: comprimir
# d: descomprimir
# 
# Os ficheiros gerados ficarão na mesma pasta que o script
########################################################################################

def lz78_compression(image):
    dictionary = {
        0: ""
    }

    output = []

    symbol = ""

    for i in image:
        if (symbol + i) in dictionary.values():
            symbol += i
        else:
            if symbol == "":
                output.append([0, i])
                dictionary[len(dictionary)] = i
            else:
                output.append([list(dictionary.keys())[list(dictionary.values()).index(symbol)], i])
                dictionary[len(dictionary)] = symbol + i
            symbol = ""

    if symbol != "":
        idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)]
        output.append([idx, ""])

    return output, dictionary

def lz78_decompression(compressed_image):
    dictionary = {
        0: ""
    }

    output = ""

    for i in compressed_image:
        output += dictionary.get(i[0]) + i[1]
        dictionary[len(dictionary)] = dictionary.get(i[0]) + i[1]

    return output

def clean_image_data(image_data):
    
    clean_text = ""

    for line in image_data:
        line = line.strip()
        line = line.replace(" ", "")
        if not line or line.startswith('#'):
            continue
        clean_text += line

    return clean_text

def chunkstring(string, length):
    return [string[i : length+i] for i in range(0, len(string), length)]

#Comprimir
if sys.argv[1] == "c":

    with open(sys.argv[2], "r") as image:
        lines = image.readlines()
        image_size = lines[1]

        if lines[0].strip() != 'P1':
            raise ValueError("This is not a pbm file")
        
        clean_text = clean_image_data(lines[2:]) #Limpa os espaços em branco para comprimir melhor

        output, dic = lz78_compression(clean_text)

        print(f"Output: {output}\n")
        print(f"Dictionary: {dic}")

        #Compressão em texto (human-readable)
        with open(sys.argv[2] + "_compressed", "w") as save_file:
            save_file.write(f"{output} \n{image_size}")
        
        #Compressão em binário (compressão "a sério")
        with open(sys.argv[2] + ".bin", "wb") as save_file:
            largura = int(image_size.split()[0])
            altura = int(image_size.split()[1])
            save_file.write(struct.pack('II', largura, altura))

            for indice, simbolo in output:
                simbolo_byte = ord(simbolo) if simbolo != "" else 0
                save_file.write(struct.pack('HB', indice, simbolo_byte))
#Descomprimir
elif sys.argv[1] == "d":

    #Descomprimir do ficheiro de texto
    with open(sys.argv[2] + "_compressed", "r") as image:
        lines = image.readlines()

        compressed_image = eval(lines[0])
        image_size = lines[1]
        length = int(lines[1].split(" ")[0])
        
        output = lz78_decompression(compressed_image)
        
        chuncked_output = chunkstring(output, length)

        with open(sys.argv[2] + "_decompressed", "w") as save_file:
            save_file.write(f"P1\n{image_size}")
            for line in chuncked_output:
                save_file.write(f"{line}\n")
    
    #Descomprimir do binário
    with open(sys.argv[2] + ".bin", "rb") as image:
        header = image.read(8)
        largura, altura = struct.unpack('II', header)
        
        compressed_image_bin = []
        while True:
            chunk = image.read(3)
            if not chunk:
                break
            indice, simbolo_byte = struct.unpack('HB', chunk)
            simbolo = chr(simbolo_byte) if simbolo_byte != 0 else ""
            compressed_image_bin.append([indice, simbolo])

        output = lz78_decompression(compressed_image_bin)

        chuncked_output = chunkstring(output, largura)

        with open(sys.argv[2]+ "_bin_decompressed", "w") as save_file:
            save_file.write(f"P1\n{largura} {altura}\n")
            for line in chuncked_output:
                save_file.write(f"{line}\n")
