mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
* big bang gitub workflows * add missing settings in local.settings.json * config refactor * fix devlead plan response * swap cosmos to table storage for metadata storage * unify config via options * azd-ify WIP * add qdrant bicep WIP * working azd provision setup * consolidate SK version in projects * replace localhost :) * add fqdn to options * httpclient fixes * add managed identity to the function and assign contrib role * qdrant endpoint setting * add container instances cleanup code + wait on termination to upload to Github * formatting fixes * add tables in bicep * local getting started WIP * add azure setup instructions * add the load-waf bits * docs WIP --------- Co-authored-by: Kosta Petan <Kosta.Petan@microsoft.com>
64 lines
2.8 KiB
C#
64 lines
2.8 KiB
C#
using UglyToad.PdfPig;
|
|
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
|
|
using Microsoft.SemanticKernel;
|
|
using Microsoft.SemanticKernel.Text;
|
|
using Microsoft.Extensions.Logging;
|
|
using System.Text;
|
|
using Microsoft.SemanticKernel.Connectors.Memory.Qdrant;
|
|
using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding;
|
|
using Microsoft.SemanticKernel.Memory;
|
|
using System.Reflection;
|
|
|
|
class Program
|
|
{
|
|
static string WafFileName = "azure-well-architected.pdf";
|
|
static async Task Main(string[] args)
|
|
{
|
|
var kernelSettings = KernelSettings.LoadSettings();
|
|
var kernelConfig = new KernelConfig();
|
|
|
|
using ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
|
|
{
|
|
builder
|
|
.SetMinimumLevel(kernelSettings.LogLevel ?? LogLevel.Warning)
|
|
.AddConsole()
|
|
.AddDebug();
|
|
});
|
|
|
|
var memoryStore = new QdrantMemoryStore(new QdrantVectorDbClient(kernelSettings.QdrantEndpoint, 1536));
|
|
var embedingGeneration = new AzureTextEmbeddingGeneration(kernelSettings.EmbeddingDeploymentOrModelId, kernelSettings.Endpoint, kernelSettings.ApiKey);
|
|
var semanticTextMemory = new SemanticTextMemory(memoryStore, embedingGeneration);
|
|
|
|
var kernel = new KernelBuilder()
|
|
.WithLogger(loggerFactory.CreateLogger<IKernel>())
|
|
.WithAzureChatCompletionService(kernelSettings.DeploymentOrModelId, kernelSettings.Endpoint, kernelSettings.ApiKey, true, kernelSettings.ServiceId, true)
|
|
.WithMemory(semanticTextMemory)
|
|
.WithConfiguration(kernelConfig).Build();
|
|
await ImportDocumentAsync(kernel, WafFileName);
|
|
}
|
|
|
|
public static async Task ImportDocumentAsync(IKernel kernel, string filename)
|
|
{
|
|
var currentDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
|
|
var filePath = Path.Combine(currentDirectory, filename);
|
|
using var pdfDocument = PdfDocument.Open(File.OpenRead(filePath));
|
|
var pages = pdfDocument.GetPages();
|
|
foreach (var page in pages)
|
|
{
|
|
try
|
|
{
|
|
var text = ContentOrderTextExtractor.GetText(page);
|
|
var descr = text.Take(100);
|
|
await kernel.Memory.SaveInformationAsync(
|
|
collection: "waf-pages",
|
|
text: text,
|
|
id: $"{Guid.NewGuid()}",
|
|
description: $"Document: {descr}");
|
|
}
|
|
catch(Exception ex)
|
|
{
|
|
Console.WriteLine(ex.Message);
|
|
}
|
|
}
|
|
}
|
|
} |