From: VGoncalo Date: Fri, 27 Mar 2026 15:39:34 +0000 (+0000) Subject: second version delivered to teacher X-Git-Url: https://vgcfreebox.myrthtech.pt/gitweb/ue-ccd-compressaodeimagensbinarias.git/commitdiff_plain/4ca611e71ede7b14953b104c7f0d0fe76e897002?ds=inline second version delivered to teacher --- 4ca611e71ede7b14953b104c7f0d0fe76e897002 diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..92c1a9b Binary files /dev/null and b/.DS_Store differ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..b58b603 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/ccd-projecto-61609&57098&70323.iml b/.idea/ccd-projecto-61609&57098&70323.iml new file mode 100644 index 0000000..1314836 --- /dev/null +++ b/.idea/ccd-projecto-61609&57098&70323.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..e4e6bff --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..aaf65ba --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a15c890 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/arithmetic.py b/arithmetic.py new file mode 100644 index 0000000..d9b1968 --- /dev/null +++ b/arithmetic.py @@ -0,0 +1,16 @@ +import utils +import sys + +_opt_type = sys.argv[1] +_file_path = sys.argv[2] + +_file = utils.get_file(_file_path) + + +if _opt_type == "c": + _pbm_content = utils.BitMapFile(_file) + _, _result_option_2 = utils.arithmetic_encode_image(_pbm_content) + print(_result_option_2) +elif _opt_type == "d": + _result = utils.arithmetic_decode_file(_file_path) + print(_result) \ No newline at end of file diff --git a/calculo_entropia.py b/calculo_entropia.py new file mode 100644 index 0000000..321cd44 --- /dev/null +++ b/calculo_entropia.py @@ -0,0 +1,60 @@ +import math +import sys + +def calcular_entropia(bitstream): + n = len(bitstream) + if n == 0: return 0 + + p0 = bitstream.count('0') / n + p1 = bitstream.count('1') / n + + entropia = 0 + for p in [p0, p1]: + if p > 0: + entropia -= p * math.log2(p) + return entropia + +def calcular_entropia_condicional(bitstream): + transicoes = {'00': 0, '01': 0, '10': 0, '11': 0} + contagem_base = {'0': 0, '1': 0} + + for i in range(len(bitstream) - 1): + par = bitstream[i:i+2] + transicoes[par] += 1 + contagem_base[bitstream[i]] += 1 + + h_condicional = 0 + for base in ['0', '1']: + p_base = contagem_base[base] / (len(bitstream) - 1) + if p_base > 0: + h_local = 0 + for prox in ['0', '1']: + p_transicao = transicoes[base + prox] / contagem_base[base] + if p_transicao > 0: + h_local -= p_transicao * math.log2(p_transicao) + h_condicional += p_base * h_local + + return h_condicional + +def clean_image_data(image_data): + + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + +with open(sys.argv[1], "r") as image: + lines = image.readlines() + image_data = clean_image_data(lines[2:]) + + h_x = calcular_entropia(image_data) + h_condicional = calcular_entropia_condicional(image_data) + + print(f"Entropia: {h_x:.4f} bits/pixel") + print(f"Entropia Condicional: {h_condicional:.4f} bits/pixel") \ No newline at end of file diff --git a/huffman.py b/huffman.py new file mode 100644 index 0000000..c4442f5 --- /dev/null +++ b/huffman.py @@ -0,0 +1,18 @@ +import utils +import sys + +_opt_type = sys.argv[1] +_file_path = sys.argv[2] + + +if _opt_type == "c": + _file = utils.get_file(_file_path) + _pbm_content = utils.BitMapFile(_file) + _result = utils.huffman_encode_image(_pbm_content) + with open(sys.argv[2]+".bin", "wb") as file: + file.write(_result) + print(_result) +elif _opt_type == "d": + _result = utils.huffman_decode_file(_file_path) + with open(sys.argv[2].strip('.bin'), "w") as file: + file.write(_result) \ No newline at end of file diff --git a/lz78/lz_78.py b/lz78/lz_78.py new file mode 100644 index 0000000..28dd34c --- /dev/null +++ b/lz78/lz_78.py @@ -0,0 +1,140 @@ +import sys +import struct + +######################################################################################## +# python lz_78.py +# +# c: comprimir +# d: descomprimir +# +# Os ficheiros gerados ficarão na mesma pasta que o script +######################################################################################## + +def lz78_compression(image): + dictionary = { + 0: "" + } + + output = [] + + symbol = "" + + for i in image: + if (symbol + i) in dictionary.values(): + symbol += i + else: + if symbol == "": + output.append([0, i]) + dictionary[len(dictionary)] = i + else: + output.append([list(dictionary.keys())[list(dictionary.values()).index(symbol)], i]) + dictionary[len(dictionary)] = symbol + i + symbol = "" + + if symbol != "": + idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)] + output.append([idx, ""]) + + return output, dictionary + +def lz78_decompression(compressed_image): + dictionary = { + 0: "" + } + + output = "" + + for i in compressed_image: + output += dictionary.get(i[0]) + i[1] + dictionary[len(dictionary)] = dictionary.get(i[0]) + i[1] + + return output + +def clean_image_data(image_data): + + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + +def chunkstring(string, length): + return [string[i : length+i] for i in range(0, len(string), length)] + +#Comprimir +if sys.argv[1] == "c": + + with open(sys.argv[2], "r") as image: + lines = image.readlines() + image_size = lines[1] + + if lines[0].strip() != 'P1': + raise ValueError("This is not a pbm file") + + clean_text = clean_image_data(lines[2:]) #Limpa os espaços em branco para comprimir melhor + + output, dic = lz78_compression(clean_text) + + print(f"Output: {output}\n") + print(f"Dictionary: {dic}") + + #Compressão em texto (human-readable) + with open(sys.argv[2] + "_compressed", "w") as save_file: + save_file.write(f"{output} \n{image_size}") + + #Compressão em binário (compressão "a sério") + with open(sys.argv[2] + ".bin", "wb") as save_file: + largura = int(image_size.split()[0]) + altura = int(image_size.split()[1]) + save_file.write(struct.pack('II', largura, altura)) + + for indice, simbolo in output: + simbolo_byte = ord(simbolo) if simbolo != "" else 0 + save_file.write(struct.pack('HB', indice, simbolo_byte)) +#Descomprimir +elif sys.argv[1] == "d": + + #Descomprimir do ficheiro de texto + with open(sys.argv[2] + "_compressed", "r") as image: + lines = image.readlines() + + compressed_image = eval(lines[0]) + image_size = lines[1] + length = int(lines[1].split(" ")[0]) + + output = lz78_decompression(compressed_image) + + chuncked_output = chunkstring(output, length) + + with open(sys.argv[2] + "_decompressed", "w") as save_file: + save_file.write(f"P1\n{image_size}") + for line in chuncked_output: + save_file.write(f"{line}\n") + + #Descomprimir do binário + with open(sys.argv[2] + ".bin", "rb") as image: + header = image.read(8) + largura, altura = struct.unpack('II', header) + + compressed_image_bin = [] + while True: + chunk = image.read(3) + if not chunk: + break + indice, simbolo_byte = struct.unpack('HB', chunk) + simbolo = chr(simbolo_byte) if simbolo_byte != 0 else "" + compressed_image_bin.append([indice, simbolo]) + + output = lz78_decompression(compressed_image_bin) + + chuncked_output = chunkstring(output, largura) + + with open(sys.argv[2]+ "_bin_decompressed", "w") as save_file: + save_file.write(f"P1\n{largura} {altura}\n") + for line in chuncked_output: + save_file.write(f"{line}\n") diff --git a/lz78/lz_78_2x2.py b/lz78/lz_78_2x2.py new file mode 100644 index 0000000..47aa2c8 --- /dev/null +++ b/lz78/lz_78_2x2.py @@ -0,0 +1,90 @@ +import sys +import struct + +######################################################################################## +# Apenas comprimir +# +# python lz_78_2x2.py +# +# +# Os ficheiros gerados ficarão na mesma pasta que o script +######################################################################################## + +def lz78_compression(image): + dictionary = {0: ""} + output = [] + symbol = "" + + for i in image: + if (symbol + i) in dictionary.values(): + symbol += i + else: + if symbol == "": + output.append([0, i]) + dictionary[len(dictionary)] = i + else: + idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)] + output.append([idx, i]) + dictionary[len(dictionary)] = symbol + i + symbol = "" + + if symbol != "": + idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)] + output.append([idx, ""]) + + return output, dictionary + +def clean_image_data(image_data): + clean_text = "" + for line in image_data: + line = line.strip().replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + return clean_text + +def chunkstring(string, length): + return [string[i : length+i] for i in range(0, len(string), length)] + +def get_blocks(bitstream, largura, altura): + matriz = [bitstream[i*largura:(i+1)*largura] for i in range(altura)] + # Padding + if largura % 2 != 0: + matriz = [linha + '0' for linha in matriz] + largura += 1 + if altura % 2 != 0: + matriz.append('0' * largura) + altura += 1 + + blocos = [] + for r in range(0, altura, 2): + for c in range(0, largura, 2): + bloco = matriz[r][c] + matriz[r][c+1] + matriz[r+1][c] + matriz[r+1][c+1] + blocos.append(bloco) + return blocos, largura, altura + + +with open(sys.argv[1], "r") as image: + lines = image.readlines() + image_size = lines[1].split() + + if lines[0].strip() != 'P1': + raise ValueError("This is not a pbm file") + + largura_orig = int(image_size[0]) + altura_orig = int(image_size[1]) + + clean_text = clean_image_data(lines[2:]) + + image_blocks, largura_pad, altura_pad = get_blocks(clean_text, largura_orig, altura_orig) + + output, dic = lz78_compression(image_blocks) + + with open(sys.argv[1] + "_compressed", "w") as save_file: + save_file.write(f"{output}\n{largura_orig} {altura_orig} {largura_pad} {altura_pad}") + + with open(sys.argv[1] + ".bin", "wb") as save_file: + save_file.write(struct.pack('IIII', largura_orig, altura_orig, largura_pad, altura_pad)) + for indice, simbolo in output: + simbolo_val = int(simbolo, 2) if simbolo != "" else 16 + save_file.write(struct.pack('HB', indice, simbolo_val)) \ No newline at end of file diff --git a/lz78/lz_78_rle.py b/lz78/lz_78_rle.py new file mode 100644 index 0000000..261a376 --- /dev/null +++ b/lz78/lz_78_rle.py @@ -0,0 +1,102 @@ +import sys +import struct + +######################################################################################## +# Apenas comprimir +# +# python lz_78_rle.py +# +# +# Os ficheiros gerados ficarão na mesma pasta que o script +######################################################################################## + +def rle_transform(bitstream): + sequencia = [] + pixel_atual = '0' # Começa sempre com o pixel 0 conforme enunciado + contador = 0 + + for bit in bitstream: + if bit == pixel_atual: + if contador == 255: # Limite de 8 bits + sequencia.append(255) + sequencia.append(0) # Troço de 0 do outro pixel para continuar no mesmo [cite: 41] + contador = 1 + else: + contador += 1 + else: + sequencia.append(contador) + pixel_atual = bit + contador = 1 + sequencia.append(contador) + return sequencia + +def lz78_compression(image_data): + dictionary = {0: []} + output = [] + symbol = [] + + for i in image_data: + temp = symbol + [i] + dict_values = list(dictionary.values()) + if temp in dict_values: + symbol = temp + else: + if not symbol: + output.append([0, i]) + dictionary[len(dictionary)] = [i] + else: + idx = dict_values.index(symbol) + output.append([idx, i]) + dictionary[len(dictionary)] = symbol + [i] + symbol = [] + + if symbol: + idx = list(dictionary.values()).index(symbol) + output.append([idx, -1]) + return output + +def clean_image_data(image_data): + + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + +def chunkstring(string, length): + return [string[i : length+i] for i in range(0, len(string), length)] + + +with open(sys.argv[1], "r") as image: + lines = image.readlines() + image_size = lines[1].split() + + if lines[0].strip() != 'P1': + raise ValueError("This is not a pbm file") + + clean_text = clean_image_data(lines[2:]) #Limpa os espaços em branco para comprimir melhor + + clean_text = rle_transform(clean_text) + + output = lz78_compression(clean_text) + + print(f"Output: {output}\n") + + #Compressão em texto (human-readable) + with open(sys.argv[1] + "_compressed", "w") as save_file: + save_file.write(f"{output} \n{image_size[0]} {image_size[1]}") + + #Compressão em binário (compressão "a sério") + with open(sys.argv[1] + ".bin", "wb") as save_file: + largura = int(image_size[0]) + altura = int(image_size[1]) + save_file.write(struct.pack('II', largura, altura)) + + for indice, val in output: + simbolo_byte = val if val != -1 else 255 + save_file.write(struct.pack('HB', indice, simbolo_byte)) diff --git a/lz78/lz_78_xor.py b/lz78/lz_78_xor.py new file mode 100644 index 0000000..0d3ee90 --- /dev/null +++ b/lz78/lz_78_xor.py @@ -0,0 +1,189 @@ +import sys +import struct + +######################################################################################## +# python lz_78_xor.py +# +# c: comprimir +# d: descomprimir +# +# Os ficheiros gerados ficarão na mesma pasta que o script +######################################################################################## + +def xor(bitstream, largura, altura): + matriz = [] + for i in range(altura): + linha = [int(b) for b in bitstream[i * largura : (i + 1) * largura]] + matriz.append(linha) + + nova_imagem = "" + # A primeira linha mantém-se igual (não tem linha anterior) + nova_imagem += "".join(map(str, matriz[0])) + + for i in range(1, altura): + nova_linha = [] + for j in range(largura): + res = matriz[i][j] ^ matriz[i-1][j] + nova_linha.append(str(res)) + nova_imagem += "".join(nova_linha) + + return nova_imagem + +def descodificar_xor(bitstream_transformado, largura, altura): + matriz_temp = [] + for i in range(altura): + linha = [int(b) for b in bitstream_transformado[i * largura : (i + 1) * largura]] + matriz_temp.append(linha) + + matriz_original = [] + + matriz_original.append(matriz_temp[0]) + + for i in range(1, altura): + linha_recuperada = [] + for j in range(largura): + pixel_original = matriz_temp[i][j] ^ matriz_original[i-1][j] + linha_recuperada.append(pixel_original) + matriz_original.append(linha_recuperada) + + bitstream_final = "" + for linha in matriz_original: + bitstream_final += "".join(map(str, linha)) + + return bitstream_final + +def lz78_compression(image): + dictionary = { + 0: "" + } + + output = [] + + symbol = "" + + for i in image: + if (symbol + i) in dictionary.values(): + symbol += i + else: + if symbol == "": + output.append([0, i]) + dictionary[len(dictionary)] = i + else: + output.append([list(dictionary.keys())[list(dictionary.values()).index(symbol)], i]) + dictionary[len(dictionary)] = symbol + i + symbol = "" + + if symbol != "": + idx = list(dictionary.keys())[list(dictionary.values()).index(symbol)] + output.append([idx, ""]) + + return output, dictionary + +def lz78_decompression(compressed_image): + dictionary = { + 0: "" + } + + output = "" + + for i in compressed_image: + output += dictionary.get(i[0]) + i[1] + dictionary[len(dictionary)] = dictionary.get(i[0]) + i[1] + + return output + +def clean_image_data(image_data): + + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + +def chunkstring(string, length): + return [string[i : length+i] for i in range(0, len(string), length)] + +#Comprimir +if sys.argv[1] == "c": + + with open(sys.argv[2], "r") as image: + lines = image.readlines() + image_size = lines[1].split() + + if lines[0].strip() != 'P1': + raise ValueError("This is not a pbm file") + + clean_text = clean_image_data(lines[2:]) #Limpa os espaços em branco para comprimir melhor + + clean_text = xor(clean_text, int(image_size[0]), int(image_size[1])) + + output, dic = lz78_compression(clean_text) + + print(f"Output: {output}\n") + print(f"Dictionary: {dic}") + + #Compressão em texto (human-readable) + with open(sys.argv[2] + "_compressed", "w") as save_file: + save_file.write(f"{output} \n{image_size[0]} {image_size[1]}") + + #Compressão em binário (compressão "a sério") + with open(sys.argv[2] + ".bin", "wb") as save_file: + largura = int(image_size[0]) + altura = int(image_size[1]) + save_file.write(struct.pack('II', largura, altura)) + + for indice, simbolo in output: + simbolo_byte = ord(simbolo) if simbolo != "" else 0 + save_file.write(struct.pack('HB', indice, simbolo_byte)) +#Descomprimir +elif sys.argv[1] == "d": + + #Descomprimir do ficheiro de texto + with open(sys.argv[2] + "_compressed", "r") as image: + lines = image.readlines() + + compressed_image = eval(lines[0]) + image_size = lines[1] + largura = int(lines[1].split(" ")[0]) + altura = int(lines[1].split(" ")[1]) + + output = lz78_decompression(compressed_image) + + output = descodificar_xor(output, largura, altura) + + chuncked_output = chunkstring(output, largura) + + with open(sys.argv[2] + "_decompressed", "w") as save_file: + save_file.write(f"P1\n{image_size}\n") + for line in chuncked_output: + save_file.write(f"{line}\n") + + #Descomprimir do binário + with open(sys.argv[2] + ".bin", "rb") as image: + header = image.read(8) + largura, altura = struct.unpack('II', header) + + compressed_image_bin = [] + while True: + chunk = image.read(3) + if not chunk: + break + indice, simbolo_byte = struct.unpack('HB', chunk) + simbolo = chr(simbolo_byte) if simbolo_byte != 0 else "" + compressed_image_bin.append([indice, simbolo]) + + output = lz78_decompression(compressed_image_bin) + + output = descodificar_xor(output, largura, altura) + + chuncked_output = chunkstring(output, largura) + + with open(sys.argv[2]+ "_bin_decompressed", "w") as save_file: + save_file.write(f"P1\n{largura} {altura}\n") + for line in chuncked_output: + save_file.write(f"{line}\n") diff --git a/main.py b/main.py new file mode 100644 index 0000000..df1449e --- /dev/null +++ b/main.py @@ -0,0 +1,42 @@ +import utils + +if __name__ == '__main__': + _original = utils.get_file('pixel_character.pbm') + #_original = utils.get_file('tetris_example.pbm') + _pbm_content = utils.BitMapFile(_original) + + while True: + print("\n1) print file info") + print("2) Preview arithmetic codification info ") + print("3) Encode with arithmetic style ") + print("4) Decode with arithmetic style ") + print("5) Apply XOR to image before compression \n") + _user_input = input("your option --> ") + + if _user_input == "1": + print("\n*** PORTABLE BITMAP FILE INFO ***") + print(f"magic number:{_pbm_content.magic_number},\nimage width:{_pbm_content.image_width},image height:{_pbm_content.image_height}") + print(f"Zeros ---> {_pbm_content.zeros}") + print(f"Uns ---> {_pbm_content.ones}") + print("***** \t ******** \t *****\n") + input("") + if _user_input == "2": + utils.arithmetic_cod_preview(_pbm_content) + if _user_input == "3": + _result_option_1, _result_option_2 = utils.arithmetic_encode_image(_pbm_content) + with open('arithmetic_output_opt1.txt', 'w') as f: + f.write(_result_option_1) + with open('arithmetic_output_opt2.txt', 'w') as f: + f.write(_result_option_2) + if _user_input == "4": + _result = utils.arithmetic_decode_file('arithmetic_output_opt2.txt') + with open('arithmetic_decode_output.pbm', 'w') as f: + f.write(_result) + if _user_input == "5": + _new_pbm = _pbm_content + _xor_on_image = utils.xor(_new_pbm.image_array,int(_new_pbm.image_width),int(_new_pbm.image_height)) + _new_pbm.image_arra = _xor_on_image + _, _result = utils.arithmetic_encode_image(_new_pbm) + print(_result) + with open('arithmetic_output_wxor.txt', 'w') as f: + f.write(_result) \ No newline at end of file diff --git a/pixel_character.pbm b/pixel_character.pbm new file mode 100644 index 0000000..30de542 --- /dev/null +++ b/pixel_character.pbm @@ -0,0 +1,50 @@ +P1 +48 48 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..ddff7a7 --- /dev/null +++ b/readme.txt @@ -0,0 +1,24 @@ +Universidade de Évora +Projeto da disciplina de Compressão de Dados - 2025/2026 +Compressão de Imagens Binárias + + +LZ78 - André Moleirinho (61609) +- lz_78.py +- lz_78_xor.py +- lz_78_2x2.py +- lz_78_rle.py + +Huffman - Fábio Macarrão (57098) +- huffman.py +- utils.py + +Aritmética - Vitor Costa (70323) +- arithmetic.py +- utils.py + + +Executar os scripts em ficheiros pbm: + > python + + diff --git "a/relat\303\263rio.pdf" "b/relat\303\263rio.pdf" new file mode 100644 index 0000000..a3cf56e Binary files /dev/null and "b/relat\303\263rio.pdf" differ diff --git "a/taxa_compress\303\243o.py" "b/taxa_compress\303\243o.py" new file mode 100644 index 0000000..b9bdbd8 --- /dev/null +++ "b/taxa_compress\303\243o.py" @@ -0,0 +1,40 @@ +import os +import sys + +def calcular_taxa_compressao(caminho_original, caminho_comprimido, total_pixeis): + + tamanho_orig = os.path.getsize(caminho_original) + tamanho_comp = os.path.getsize(caminho_comprimido) + + poupanca = (1 - (tamanho_comp / tamanho_orig)) * 100 + + bpp = (tamanho_comp * 8) / total_pixeis + + return poupanca, bpp, tamanho_orig, tamanho_comp + +def clean_image_data(image_data): + + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + +with open(sys.argv[1], "r") as ficheiro: + lines = ficheiro.readlines() + image_data = clean_image_data(lines[2:]) + + largura, altura = lines[1].split(" ") + + total_pixeis = int(largura) * int(altura) + p, b, t_o, t_c = calcular_taxa_compressao(sys.argv[1], sys.argv[1] + "_compressed", len(image_data)) + +print(f"Tamanho Original: {t_o} bytes") +print(f"Tamanho Comprimido: {t_c} bytes") +print(f"Redução: {p:.2f}%") +print(f"Rácio: {b:.4f} bits/pixel") \ No newline at end of file diff --git a/tetris_example.pbm b/tetris_example.pbm new file mode 100644 index 0000000..07ce596 --- /dev/null +++ b/tetris_example.pbm @@ -0,0 +1,7 @@ +P1 +4 5 +0 0 0 0 +0 0 1 0 +0 1 1 0 +0 1 0 0 +0 0 0 0 diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..3b0ab7a --- /dev/null +++ b/utils.py @@ -0,0 +1,392 @@ +import os +import math +from collections import Counter +import heapq +import numpy as np +import struct + + +class BitMapFile: + def __init__(self, text): + self._text = text.split() + self.magic_number = self._text[0] + self.image_width = self._text[1] + self.image_height = self._text[2] + self.image_array = self._text[3:] + self.zeros = 0 + self.ones = 0 + self.size = int(self.image_width) * int(self.image_height) + + for _ in range(len(self.image_array)): + if self.image_array[_] == "0": + self.zeros += 1 + elif self.image_array[_] == "1": + self.ones += 1 + else: + print("ignore") + + +""" +To read files +""" +def get_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + _file_content = file.read() + return _file_content + +""" +To evaluate comprimento médio do código - l(c) + l(all_pixels) = ceil(log2P(all_pixels))+1 +""" +def arithmetic_cod_preview(_pbm): + print("\n*** Arithmetic Preview ***") + pixels = [int(pixel) for pixel in _pbm.image_array] + _P0 = _pbm.zeros/_pbm.size + _P1 = _pbm.ones/_pbm.size + _PT01_int = 1 + + for pixel in pixels: + if pixel == 0: + _PT01_int *= _P0 + elif pixel == 1: + _PT01_int *= _P1 + + _arithmetic_encode_lenght = np.ceil(-1*np.log2(_PT01_int)) + 1 + print(f"L(image) --> {_arithmetic_encode_lenght} bits") + _entropy = calcular_entropia(_pbm.image_array) + print(f"entropy --> {_entropy} bits") + _datastream = clean_image_data(_pbm.image_array) + _entropia_conditional = calcular_entropia_condicional(_datastream) + print(f"entropy condicional --> {_entropia_conditional} bits") + return int(_arithmetic_encode_lenght) + +""" +Arithmetic Encoding approach + +it uses static probabilities provided on output file + +c(x)=|interval_low_val +(interval_high_val).F(x)_low + =|interval_low_val +(interval_high_val).F(x)_high +_to_encode_val = avg(C(all)_low,C(all)_high) +_output = bin(_to_encode_val)) +""" +def arithmetic_encode_image(_pbm): + pixels = [int(pixel) for pixel in _pbm.image_array] + _P0 = np.float64(_pbm.zeros/_pbm.size) + _P1 = np.float64(_pbm.ones/_pbm.size) + _interval = [0,1] + # probabilidades acomuladas orderdanas por ordem crescente + _Fx = {1: (0,_P1), + 0: (_P1,1)} + _output_opt_1 = f"{_pbm.zeros} {_pbm.ones} {_pbm.image_width} {_pbm.image_height} " + + for pixel in pixels: + if pixel == 0: + _low_val = np.float64(_interval[0] + (_interval[1]-_interval[0]) * _Fx.get(0)[0]) + _high_val = np.float64(_interval[0] + (_interval[1]-_interval[0])*_Fx.get(0)[1]) + if pixel == 1: + _low_val = np.float64(_interval[0] + (_interval[1] - _interval[0]) * _Fx.get(1)[0]) + _high_val = np.float64(_interval[0] + (_interval[1] - _interval[0]) * _Fx.get(1)[1]) + _interval[0] = _low_val + _interval[1] = _high_val + + _to_encode_value = np.float64((_interval[0]+_interval[1])/2) + print(f"to encode --> {_to_encode_value}") + _output_opt_2 = _output_opt_1 + f"{_to_encode_value}" + + _size = arithmetic_cod_preview(_pbm) + _res = _to_encode_value + for _ in range(_size): + _res *=2 + if _res < 1: + _output_opt_1 += "0" + else: + _output_opt_1 += "1" + _res = _res-1 + + + return _output_opt_1,_output_opt_2 + +""" +To decode encoded pbm files with arithmetic_encode_image() + amount_of_zeros amount_of_ones image_width image_height encoded_image +""" +def arithmetic_decode_file(_txt): + _encoded_pbm_file = get_file(_txt).split() + + amount_of_zeros = int(_encoded_pbm_file[0]) + amount_of_ones = int(_encoded_pbm_file[1]) + image_width = int(_encoded_pbm_file[2]) + image_height = int(_encoded_pbm_file[3]) + encoded_image = np.float64(_encoded_pbm_file[4]) + + _output = f"P1\n{image_width} {image_height}\n" + _size = int(image_width) * int(image_height) + _P0 = np.float64(amount_of_zeros/_size) + _P1 = np.float64(amount_of_ones/_size) + _Fx = {1:(0,_P1), + 0:(_P1,1)} + + _constructed_image = "" + for h in range(image_height): + for w in range(image_width): + if _P1 < encoded_image < 1: + _constructed_image += "0 " + encoded_image = (encoded_image-_P1)/_P0 + elif 0 < encoded_image < _P1: + _constructed_image += "1 " + encoded_image = (encoded_image - 0) / _P1 + + if w == image_width-1: + _constructed_image += "\n" + + _output += _constructed_image + return _output + + +def calcular_entropia(bitstream): + n = len(bitstream) + if n == 0: return 0 + + p0 = bitstream.count('0') / n + p1 = bitstream.count('1') / n + + entropia = 0 + for p in [p0, p1]: + if p > 0: + entropia -= p * math.log2(p) + return entropia + + +def calcular_entropia_condicional(bitstream): + transicoes = {'00': 0, '01': 0, '10': 0, '11': 0} + contagem_base = {'0': 0, '1': 0} + + for i in range(len(bitstream) - 1): + par = bitstream[i:i + 2] + transicoes[par] += 1 + contagem_base[bitstream[i]] += 1 + + h_condicional = 0 + for base in ['0', '1']: + p_base = contagem_base[base] / (len(bitstream) - 1) + if p_base > 0: + h_local = 0 + for prox in ['0', '1']: + p_transicao = transicoes[base + prox] / contagem_base[base] + if p_transicao > 0: + h_local -= p_transicao * math.log2(p_transicao) + h_condicional += p_base * h_local + + return h_condicional + + +def clean_image_data(image_data): + clean_text = "" + + for line in image_data: + line = line.strip() + line = line.replace(" ", "") + if not line or line.startswith('#'): + continue + clean_text += line + + return clean_text + + +def calcular_taxa_compressao(caminho_original, caminho_comprimido, total_pixeis): + tamanho_orig = os.path.getsize(caminho_original) + tamanho_comp = os.path.getsize(caminho_comprimido) + + poupanca = (1 - (tamanho_comp / tamanho_orig)) * 100 + + bpp = (tamanho_comp * 8) / total_pixeis + + return poupanca, bpp, tamanho_orig, tamanho_comp + + +def xor(bitstream, largura, altura): + matriz = [] + for i in range(altura): + linha = [int(b) for b in bitstream[i * largura: (i + 1) * largura]] + matriz.append(linha) + + nova_imagem = "" + # A primeira linha mantém-se igual (não tem linha anterior) + nova_imagem += "".join(map(str, matriz[0])) + + for i in range(1, altura): + nova_linha = [] + for j in range(largura): + res = matriz[i][j] ^ matriz[i - 1][j] + nova_linha.append(str(res)) + nova_imagem += "".join(nova_linha) + + return nova_imagem + + +def descodificar_xor(bitstream_transformado, largura, altura): + matriz_temp = [] + for i in range(altura): + linha = [int(b) for b in bitstream_transformado[i * largura: (i + 1) * largura]] + matriz_temp.append(linha) + + matriz_original = [] + + matriz_original.append(matriz_temp[0]) + + for i in range(1, altura): + linha_recuperada = [] + for j in range(largura): + pixel_original = matriz_temp[i][j] ^ matriz_original[i - 1][j] + linha_recuperada.append(pixel_original) + matriz_original.append(linha_recuperada) + + bitstream_final = "" + for linha in matriz_original: + bitstream_final += "".join(map(str, linha)) + + return bitstream_final + + +# ------------------------------------------------------------------------- +# HUFFMAN IMPLEMENTATION +# ------------------------------------------------------------------------- + +class HuffmanNode: + def __init__(self, char, freq): + self.char = char # 0 ou 1 que está no pixel + self.freq = freq # Quantas vezes aparece + self.left = None # Filho à esquerda + self.right = None # Filho à direita + + # Nó com menor frequencia + def __lt__(self, other): + return self.freq < other.freq + + +# Constroi a árvore +def build_huffman_tree(pixels): + frequency = Counter(pixels) + heap = [HuffmanNode(char, freq) for char, freq in frequency.items()] + heapq.heapify(heap) + + # Cria a árvore de baixo para cima / frequencia menor para maior + while len(heap) > 1: + node1 = heapq.heappop(heap) + node2 = heapq.heappop(heap) + merged = HuffmanNode(None, node1.freq + node2.freq) + merged.left = node1 + merged.right = node2 + heapq.heappush(heap, merged) + + return heap[0] if heap else None + + +# Cria os códigos (0 para a esquerda, 1 para a direira) +def make_codes(node, current_code="", codes=None): + if codes is None: + codes = {} + if node is None: + return codes + if node.char is not None: + codes[node.char] = current_code + return codes + + make_codes(node.left, current_code + "0", codes) + make_codes(node.right, current_code + "1", codes) + return codes + + +# Faz encoding do ficheiro +def huffman_encode_image(_pbm): + print("\n*** Huffman Encoding ***") + pixels = _pbm.image_array + + # Erro caso não encontre a imagem + if not pixels: + print("Empty image.") + return None + + # Constroi a árvore e os códigos + root = build_huffman_tree(pixels) + codes = make_codes(root) + + print(f"Codes: {codes}") + + # Faz encode dos pixeis + encoded_str = "".join([codes[p] for p in pixels]) + + # Garante que o total é multiplo de 8 + extra_padding = 8 - len(encoded_str) % 8 + encoded_str = encoded_str + "0" * extra_padding + + # Reduz o tamanho convertendo de byte para bit + b = bytearray() + for i in range(0, len(encoded_str), 8): + byte = encoded_str[i:i + 8] + b.append(int(byte, 2)) + + # Cabeçalho + width = int(_pbm.image_width) + height = int(_pbm.image_height) + freq0 = _pbm.zeros + freq1 = _pbm.ones + header = struct.pack('>IIIIB', width, height, freq0, freq1, extra_padding) + + return header + b + + +# Faz decode do ficheiro +def huffman_decode_file(filename): + print(f"\n*** Huffman Decoding from {filename} ***") + + # Lê o ficheiro + with open(filename, 'rb') as f: + file_content = f.read() + + # Separa o cabeçalho + header_size = struct.calcsize('>IIIIB') + width, height, freq0, freq1, extra_padding = struct.unpack('>IIIIB', file_content[:header_size]) + encoded_data = file_content[header_size:] + + print(f"Header Info -> W:{width}, H:{height}, Zeros:{freq0}, Ones:{freq1}") + + # Constroi a árvore + fake_pixels = ['0'] * freq0 + ['1'] * freq1 + root = build_huffman_tree(fake_pixels) + codes = make_codes(root) + + # Faz decode dos pixeis + reverse_codes = {v: k for k, v in codes.items()} + + # Converte os bytes em string + bit_string = "" + for byte in encoded_data: + bit_string += f"{byte:08b}" + + # Desencripta + decoded_pixels = [] + current_code = "" + for bit in bit_string: + current_code += bit + if current_code in reverse_codes: + decoded_pixels.append(reverse_codes[current_code]) + current_code = "" + + # Limita o tamanho width * height + expected_pixels = width * height + decoded_pixels = decoded_pixels[:expected_pixels] + + pbm_content = f"P1\n{width} {height}\n" + + # Formata as linhas para não ficar sequencial + row_str = "" + for i, p in enumerate(decoded_pixels): + row_str += p + " " + if (i + 1) % width == 0: + pbm_content += row_str.strip() + "\n" + row_str = "" + + print("Decoding finished.") + return pbm_content