import sys
import struct

########################################################################################
# Apenas comprimir
#
# python lz_78_2x2.py <caminho_para_o_ficheiro_original>
# 
# 
# Os ficheiros gerados ficarão na mesma pasta que o script
########################################################################################

def lz78_compression(image):
    dictionary = {0: ""}
    output = []
    symbol = ""

    for i in image:
        if (symbol + i) in dictionary.values():
            symbol += i
        else:
            if symbol == "":
                output.append([0, i])
                dictionary[len(dictionary)] = i
            else:
                idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)]
                output.append([idx, i])
                dictionary[len(dictionary)] = symbol + i
            symbol = ""

    if symbol != "":
        idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)]
        output.append([idx, ""])

    return output, dictionary

def clean_image_data(image_data):
    clean_text = ""
    for line in image_data:
        line = line.strip().replace(" ", "")
        if not line or line.startswith('#'):
            continue
        clean_text += line
    return clean_text

def chunkstring(string, length):
    return [string[i : length+i] for i in range(0, len(string), length)]

def get_blocks(bitstream, largura, altura):
    matriz = [bitstream[i*largura:(i+1)*largura] for i in range(altura)]
    # Padding
    if largura % 2 != 0:
        matriz = [linha + '0' for linha in matriz]
        largura += 1
    if altura % 2 != 0:
        matriz.append('0' * largura)
        altura += 1
    
    blocos = []
    for r in range(0, altura, 2):
        for c in range(0, largura, 2):
            bloco = matriz[r][c] + matriz[r][c+1] + matriz[r+1][c] + matriz[r+1][c+1]
            blocos.append(bloco)
    return blocos, largura, altura


with open(sys.argv[1], "r") as image:
    lines = image.readlines()
    image_size = lines[1].split()

    if lines[0].strip() != 'P1':
        raise ValueError("This is not a pbm file")
    
    largura_orig = int(image_size[0])
    altura_orig = int(image_size[1])
    
    clean_text = clean_image_data(lines[2:])
    
    image_blocks, largura_pad, altura_pad = get_blocks(clean_text, largura_orig, altura_orig)

    output, dic = lz78_compression(image_blocks)

    with open(sys.argv[1] + "_compressed", "w") as save_file:
        save_file.write(f"{output}\n{largura_orig} {altura_orig} {largura_pad} {altura_pad}")
    
    with open(sys.argv[1] + ".bin", "wb") as save_file:
        save_file.write(struct.pack('IIII', largura_orig, altura_orig, largura_pad, altura_pad))
        for indice, simbolo in output:
            simbolo_val = int(simbolo, 2) if simbolo != "" else 16
            save_file.write(struct.pack('HB', indice, simbolo_val))