initial commit
This commit is contained in:
142
ac_specs_extraction.py
Normal file
142
ac_specs_extraction.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
import base64
|
||||||
|
import vertexai
|
||||||
|
from vertexai.generative_models import GenerativeModel, Part, SafetySetting
|
||||||
|
import io
|
||||||
|
from PIL import Image
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def image_to_base64(image_path):
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
image_bytes = image_file.read()
|
||||||
|
encoded_string = base64.b64encode(image_bytes).decode("utf-8")
|
||||||
|
return encoded_string
|
||||||
|
|
||||||
|
def generate(image_path):
|
||||||
|
vertexai.init(project="lt-dev-prj-3d3b5f2a", location="asia-south1")
|
||||||
|
model = GenerativeModel(
|
||||||
|
"gemini-1.5-pro-002",
|
||||||
|
)
|
||||||
|
|
||||||
|
generation_config = {
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"temperature": 0.3,
|
||||||
|
"top_p": 0.95,
|
||||||
|
"response_mime_type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
safety_settings = [
|
||||||
|
SafetySetting(
|
||||||
|
category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
||||||
|
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
||||||
|
),
|
||||||
|
SafetySetting(
|
||||||
|
category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
||||||
|
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
||||||
|
),
|
||||||
|
SafetySetting(
|
||||||
|
category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
||||||
|
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
||||||
|
),
|
||||||
|
SafetySetting(
|
||||||
|
category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
||||||
|
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
base64_string = image_to_base64(image_path)
|
||||||
|
|
||||||
|
image = Part.from_data(
|
||||||
|
mime_type="image/jpeg",
|
||||||
|
data=base64.b64decode(base64_string)
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = """Extract the following information from the image in JSON format.
|
||||||
|
STAR RATING --> INTEGER
|
||||||
|
ISEER --> FLOAT
|
||||||
|
EER (W/W) --> FLOAT
|
||||||
|
Label Period Start Date --> DATE
|
||||||
|
Label Period End Date --> DATE
|
||||||
|
ApplianceType --> STRING
|
||||||
|
Brand --> STRING
|
||||||
|
Model --> STRING
|
||||||
|
Year --> DATE (YYYY)
|
||||||
|
Cooling Capacity (100%) (W) --> INTEGER
|
||||||
|
Cooling Capacity (50%)(W) --> INTEGER
|
||||||
|
Cooling Capacity (W) --> INTEGER
|
||||||
|
Electricity Consumption (UNITS) --> FLOAT
|
||||||
|
Power Consumption (W) --> FLOAT
|
||||||
|
Compressor Type --> STRING
|
||||||
|
Variable Speed Compressor --> BOOLEAN(YES/NO)
|
||||||
|
Heat Pump --> BOOLEAN(YES/NO)
|
||||||
|
MODEL NO. --> STRING
|
||||||
|
MODEL NAME. --> STRING
|
||||||
|
INPUT POWER --> INTEGER
|
||||||
|
REFRIGERANT --> STRING
|
||||||
|
ISO --> STRING
|
||||||
|
OPERATING HOURS PER ANNUM --> INTEGER
|
||||||
|
|
||||||
|
Note: STAR RATING is usually highlighted in Red. SO all the starts with background in RED are considered as STAR RATING. EG.
|
||||||
|
If only one star has background RED then the STAR RATING WILL BE 1. If all 5 starts have Background RED then the STAR RATING
|
||||||
|
will be 5.
|
||||||
|
|
||||||
|
Provide blank value "" for fields that are not applicable/found in the image.
|
||||||
|
The value for boolean fields should be "YES" or "NO".
|
||||||
|
|
||||||
|
There should strictly no other keys apart from the mentioned above. Even the naming conventions should not change.
|
||||||
|
"""
|
||||||
|
|
||||||
|
responses = model.generate_content(
|
||||||
|
[image, prompt],
|
||||||
|
generation_config=generation_config,
|
||||||
|
safety_settings=safety_settings,
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.loads(responses.text)
|
||||||
|
|
||||||
|
def process_files(directory_path):
|
||||||
|
list_df = []
|
||||||
|
print(f"Processing files in directory: {directory_path}")
|
||||||
|
image_files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f)) and f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
|
||||||
|
|
||||||
|
if not image_files:
|
||||||
|
print("No image files found in the directory.")
|
||||||
|
return pd.DataFrame(list_df)
|
||||||
|
|
||||||
|
for filename in tqdm(image_files, desc="Processing images"):
|
||||||
|
filepath = os.path.join(directory_path, filename)
|
||||||
|
try:
|
||||||
|
output_response = generate(filepath)
|
||||||
|
output_response["File Name"] = str(filename)
|
||||||
|
list_df.append(output_response)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing file {filename}: {e}")
|
||||||
|
list_df.append({"File Name": str(filename), "Error": str(e)})
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = pd.DataFrame(list_df)
|
||||||
|
return data
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
directory_path = 'FOLDER4' # Make sure this folder exists and contains images
|
||||||
|
|
||||||
|
if not os.path.isdir(directory_path):
|
||||||
|
print(f"Error: Directory '{directory_path}' not found. Please create it or provide the correct path.")
|
||||||
|
else:
|
||||||
|
extracted_data = process_files(directory_path)
|
||||||
|
|
||||||
|
if not extracted_data.empty:
|
||||||
|
print("\n--- Extracted Data (First 5 rows) ---")
|
||||||
|
print(extracted_data.head())
|
||||||
|
|
||||||
|
output_csv_path = 'extracted_image_data.csv'
|
||||||
|
try:
|
||||||
|
extracted_data.to_csv(output_csv_path, index=False)
|
||||||
|
print(f"\nSuccessfully saved extracted data to {output_csv_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError saving data to CSV: {e}")
|
||||||
|
else:
|
||||||
|
print("\nNo data was extracted.")
|
||||||
Reference in New Issue
Block a user