]>
vgcfreebox.myrthtech.pt Git - ue-rnap-aerossol.git/blob - torch-randomdata-example-Aerossol.py
2f5189bb461b82810a94f206639743bc68f0bdfa
3 import cv2
# OpenCV for image handling
5 # ----------------------------------------------
7 # - load data from files
8 # - validate the existence of all images loaded in folder
9 # - creates numpy array to store data attributes
10 # ----------------------------------------------
12 # Define your file paths
13 csv_path
= 'data/data/train.csv'
14 image_folder_path
= './data/data/' # <-- UPDATE THIS PATH!
18 df
= pd
.read_csv(csv_path
)
19 print("Metadata loaded successfully.")
20 except FileNotFoundError
:
21 print("Error: train.csv not found.")
25 def check_image_paths(df
, image_folder_path
):
26 """Checks if all required images exist in the target folder."""
27 print("\n--- Running Image Path Check ---")
29 # Get a set of all names actually present in the folder
30 available_files
= set(os
.listdir(image_folder_path
))
32 # The CSV names (must be standardized, e.g., convert to lowercase)
33 required_names
= set(df
['img_name'])
35 # Check for missing files
36 missing_files
= required_names
- available_files
39 print(f
"🚨 WARNING: {len(missing_files)} images are missing! Examples: {list(missing_files)[:5]}")
40 # You might want to filter the DataFrame to only use the rows that have images
41 df
= df
[~df
['img_name'].isin(missing_files
)]
42 print(f
"Cleaned DataFrame size: {len(df)}")
44 print("✅ All required images were found in the directory.")
48 # ---> Execute the check
49 df
= check_image_paths(df
, image_folder_path
)
52 def load_image_data(df
, image_folder_path
):
53 """Loads images and returns a list of (image_data, feature_data) tuples."""
57 print("\n--- Loading Images and Features (This may take time) ---")
59 for index
, row
in df
.iterrows():
60 img_name
= row
['img_name']
62 # Construct the full, absolute path
63 full_path
= os
.path
.join(image_folder_path
, img_name
)
66 # Load the image using OpenCV
67 image
= cv2
.imread(full_path
)
70 print(f
"Skipping row {index}: Could not load image at {full_path}")
73 # Extract the pollutant/feature metadata
75 'ozone': row
['ozone'],
78 'elevation': row
['elevation']
81 # Store the combination
82 processed_data
.append({
83 'image': image
, # The actual image NumPy array
87 except Exception as e
:
88 print(f
"An error occurred processing row {index}: {e}")
90 print("✅ Data loading complete.")
93 # ---> Execute the full data loading
94 combined_dataset
= load_image_data(df
, image_folder_path
)