Skip to content

Commit

Permalink
efficiency and time complexity managed
Browse files Browse the repository at this point in the history
  • Loading branch information
Hk669 committed Jul 14, 2024
1 parent eb7706f commit 3824a9d
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 115 deletions.
21 changes: 11 additions & 10 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,30 +165,31 @@ async def get_recommendations(request: Request, current_user: dict = Depends(get
languages = body.get("languages", [])

urls = []
user = User(username=current_user["username"], access_token=current_user["access_token"],extra_topics=extra_topics, extra_languages=languages)
user = User(username=current_user["username"], access_token=current_user["access_token"],extra_topics=extra_topics, languages=languages)

user_details, language_topics = await get_repos(user)
if not user_details:
logger.info("No repos found for user")
logger.info("Generating topic-based recommendations")
return get_topic_based_recommendations(user)
return await get_topic_based_recommendations(user)

try:
fetched_repos = await main(language_topics, access_token=user.access_token, extra_topics=extra_topics, extra_languages=languages)
logger.info(f"Fetched {len(fetched_repos)} repositories")
except Exception as e:
logger.error(f"Error fetching repositories: {str(e)}")
raise ValueError("Error fetching repositories")
# try:
# fetched_repos = await main(language_topics, access_token=user.access_token, extra_topics=extra_topics, extra_languages=languages)
# logger.info(f"Fetched {len(fetched_repos)} repositories")
# except Exception as e:
# logger.error(f"Error fetching repositories: {str(e)}")
# raise ValueError("Error fetching repositories")

try:
urls = recommend(user_details, language_topics)
urls = await recommend(user_details=user_details, languages_topics=language_topics)
except Exception as e:
logger.error(f"Error generating recommendations: {str(e)}")
logger.info("Generating topic-based recommendations")
return get_topic_based_recommendations(user)

if urls and len(urls) < 10:
logger.info("Fewer than 10 recommendations found, fetching more repositories based on topics")
fetched_repos = await main(language_topics, access_token=user.access_token, extra_topics=extra_topics, extra_languages=languages)
urls = recommend(user_details, language_topics)

seen_full_names = set()
Expand All @@ -203,7 +204,7 @@ async def get_recommendations(request: Request, current_user: dict = Depends(get
if not unique_recommendations:
return {'recommendations': [], 'message': 'No recommendations found'}

return {'recommendations': unique_recommendations}
return {'recommendations': unique_recommendations[::-1]}
except Exception as e:
logger.error(f"Error: {str(e)}")
raise HTTPException(status_code=500, detail="An error occurred while generating recommendations")
Expand Down
188 changes: 83 additions & 105 deletions src/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,129 +13,107 @@
logger = logging.getLogger(__name__)


def recommend(user_details,
languages_topics) -> List[RepositoryRecommendation]:
"""generate recommendations for the user"""

async def recommend(user_details=None,
languages_topics=None,
topics=None,
max_recommendations=15) -> List[RepositoryRecommendation]:
"""Generate recommendations for users based on projects or topics."""

recommendations = []
recommended_repos = set()

collection = get_chromadb_collection()
lang_topics = languages_topics["languages"] + languages_topics["topics"]
for user_proj in user_details:
new_doc = f"{user_proj['project_name']} : {user_proj['description']} : {lang_topics}"

# print(f"Querying ChromaDB for project: {user_proj}")
embeddings = generate_embeddings(new_doc)
# print(f"Embeddings: {embeddings}")

if user_details:
lang_topics = languages_topics["languages"] + languages_topics["topics"]
for user_proj in user_details:
new_doc = f"{user_proj['project_name']} : {user_proj['description']} : {lang_topics}"
embeddings = generate_embeddings(new_doc)

results = collection.query(
query_embeddings=[embeddings],
n_results=5,
include=["metadatas", "distances"]
)

print(f"UserProject: {user_proj} : language topics : {languages_topics}, Repositories: {results}")
if results['metadatas'][0]:
metadatas = results["metadatas"][0]

for metadata in metadatas:
repo_name = metadata.get("full_name")
print('----------------\n Repo Name:', repo_name)
print('\n------------------')
if '/' in repo_name:
repo_url = f"https://github.com/{repo_name}"
if repo_url not in recommended_repos:
recommendations.append({
"repo_url": repo_url,
"full_name": metadata.get("full_name"),
"description": metadata.get("description"),
"stargazers_count": metadata.get("stargazers_count"),
"forks_count": metadata.get("forks_count"),
"open_issues_count": metadata.get("open_issues_count"),
"avatar_url": metadata.get("avatar_url"),
"language": metadata.get("language"),
"updated_at": metadata.get("updated_at"),
"topics": metadata.get("topics", [])
})
recommended_repos.add(repo_url)
print('\n--------------\nRecommendations:', recommendations)
if len(recommendations) >= max_recommendations:
break
else:
logger.info(f"No recommendations found for project {user_proj['project_name']}")

if topics:
logger.info(f"Querying ChromaDB for topics: {topics}")
embeddings = [generate_embeddings(topic) for topic in topics]

results = collection.query(
query_embeddings = [embeddings],
n_results = 10,
query_embeddings=embeddings,
n_results=8, # Get more results to allow for filtering
include=["ids", "metadatas"]
)

logging.info(f"UserProject: {user_proj}, Repositories: {results}")
# print(f"UserProject: {user_proj}, Repositories: {results}")
logger.info(f"Recommendation results: {results}")
if results['documents'][0]:
# Extract repository names and construct GitHub URLs
ids = results["ids"][0]
i = 0
for doc in results['documents'][0]:
repo_name = doc.split('\n')[0] # Get the part before the first newline
if '/' in repo_name: # Ensure it's a valid repo name
repo_url = f"https://github.com/{repo_name}"
if repo_url not in recommended_repos:
try:
repo_details = collection.get(ids=[ids[i]])
metadata = repo_details.get("metadatas")[0]
except Exception as e:
logging.error(f"Error getting repo details: {str(e)}")
metadatas = results["metadatas"][0]

if metadata:
for metadata in metadatas:
repo_name = metadata.get("full_name")
if '/' in repo_name:
repo_url = f"https://github.com/{repo_name}"
if repo_url not in recommended_repos:
recommendations.append({
"repo_url": repo_url,
"full_name": metadata.get("full_name"),
"description": metadata.get("description"),
"stargazers_count": metadata.get("stargazers_count"),
"forks_count": metadata.get("forks_count"),
"open_issues_count": metadata.get("open_issues_count"),
"avatar_url": metadata.get("owner", {}).get("avatar_url"),
"language": metadata.get("language"),
"updated_at": metadata.get("updated_at"),
"topics": metadata.get("topics", [])
})
recommended_repos.add(repo_url)
i+=1
if len(recommendations) >= 15:
break
else:
logger.error(f"Repository details not found for {repo_name}")
"repo_url": repo_url,
"full_name": metadata.get("full_name"),
"description": metadata.get("description"),
"stargazers_count": metadata.get("stargazers_count"),
"forks_count": metadata.get("forks_count"),
"open_issues_count": metadata.get("open_issues_count"),
"avatar_url": metadata.get("avatar_url"),
"language": metadata.get("language"),
"updated_at": metadata.get("updated_at"),
"topics": metadata.get("topics", [])
})
recommended_repos.add(repo_url)
if len(recommendations) >= max_recommendations:
break
else:
logger.info(f"No recommendations found for project {user_proj['project_name']}")


return recommendations


def recommend_by_topics(topics: List[str],
max_recommendations: int = 7) -> List[RepositoryRecommendation]:
"""Generate recommendations based on given topics"""
recommendations = []
recommended_repos = set()
collection = get_chromadb_collection()

logger.info(f"Querying ChromaDB for topics: {topics}")
embeddings = [generate_embeddings(topic) for topic in topics]

results = collection.query(
query_embeddings=embeddings,
n_results=max_recommendations * 2, # Get more results to allow for filtering
where={"related_language_or_topic": {"$in": topics}}
)
logger.info(f"Recommendation results: {results}")
if results['documents'][0]:
ids = results["ids"][0]
i = 0
for doc in results['documents'][0]:
repo_name = doc.split('\n')[0] # Get the part before the first newline
if '/' in repo_name: # Ensure it's a valid repo name
repo_url = f"https://github.com/{repo_name}"
if repo_url not in recommended_repos:
try:
repo_details = collection.get(ids=[ids[i]])
metadata = repo_details.get("metadatas")[0]
except Exception as e:
logging.error(f"Error getting repo details: {str(e)}")

if metadata:
recommendations.append({
"repo_url": repo_url,
"full_name": metadata.get("full_name"),
"description": metadata.get("description"),
"stargazers_count": metadata.get("stargazers_count"),
"forks_count": metadata.get("forks_count"),
"open_issues_count": metadata.get("open_issues_count"),
"avatar_url": metadata.get("owner", {}).get("avatar_url"),
"language": metadata.get("language"),
"updated_at": convert_to_readable_format(metadata.get("updated_at")),
"topics": metadata.get("topics", [])
})
recommended_repos.add(repo_url)
i+=1
if len(recommendations) >= 15:
break
else:
logger.error(f"Repository details not found for {repo_name}")

return recommendations


def get_topic_based_recommendations(user):
async def get_topic_based_recommendations(user):
all_topics = user.languages + user.extra_topics
if not all_topics:
raise ValueError("Please provide at least one language or topic")

# Get recommendations based on topics
try:
urls = recommend_by_topics(all_topics)
urls = await recommend(topics=all_topics)
except Exception as e:
logger.error(f"Error generating topic-based recommendations: {str(e)}")
return {'recommendations': [], 'message': 'Error generating recommendations'}
Expand Down Expand Up @@ -252,12 +230,12 @@ def convert_to_readable_format(time_str):
]

languages_topics = {
'languages': ['Python', 'JavaScript'],
'topics': ['agentic-ai', 'openai']
'languages': ['Python', 'typescript'],
'topics': ['agentic-ai', 'openai', "GPT", "llm"]
}
try:
recommendations = recommend(user_details, languages_topics)
logger.info(recommendations)
# logger.info(recommendations)
print('--------')
print(recommendations)
except Exception as e:
Expand Down
8 changes: 8 additions & 0 deletions src/user_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,14 @@ async def get_repos(user):
url = f'/users/{user.username}/repos'
repos_data = await octokit.request('GET', url)

repo_limit = 15
cnt = 0

# iterates through every repository of the user data
for repo in repos_data:
if cnt >= repo_limit:
break

if not repo['fork'] and (repo['description'] or repo['language'] or len(repo['topics'])>0):
language_url = repo['languages_url'].replace('https://api.github.com', '')
languages_data = await octokit.request('GET', language_url)
Expand All @@ -52,6 +58,8 @@ async def get_repos(user):

for topic in repo['topics']:
topics_map[topic] = topics_map.get(topic, 0) + 1

cnt += 1

# return the top5 languages
top5_languages = user.languages + sorted(languages_map, key=languages_map.get, reverse=True)[:5] if user.languages else sorted(languages_map, key=languages_map.get, reverse=True)[:5]
Expand Down

0 comments on commit 3824a9d

Please sign in to comment.